Update Linux to v5.4.2

Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f34..fbd6832 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -1,17 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # For a description of the syntax of this configuration file,
-# see Documentation/kbuild/kconfig-language.txt.
+# see Documentation/kbuild/kconfig-language.rst.
 #
 
 config NDS32
-        def_bool y
+	def_bool y
+	select ARCH_32BIT_OFF_T
+	select ARCH_HAS_DMA_PREP_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_WANT_FRAME_POINTERS if FTRACE
 	select CLKSRC_MMIO
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_REMAP
 	select GENERIC_ATOMIC64
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
@@ -29,14 +32,14 @@
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_MEMBLOCK
+	select HAVE_EXIT_THREAD
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
 	select LOCKDEP_SUPPORT
 	select MODULES_USE_ELF_RELA
 	select OF
 	select OF_EARLY_FLATTREE
-	select NO_BOOTMEM
 	select NO_IOPORT_MAP
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
@@ -51,23 +54,20 @@
 	def_bool y
 
 config GENERIC_CSUM
-        def_bool y
+	def_bool y
 
 config GENERIC_HWEIGHT
-        def_bool y
+	def_bool y
 
 config GENERIC_LOCKBREAK
-        def_bool y
-	depends on PREEMPT
-
-config RWSEM_GENERIC_SPINLOCK
 	def_bool y
+	depends on PREEMPT
 
 config TRACE_IRQFLAGS_SUPPORT
 	def_bool y
 
 config STACKTRACE_SUPPORT
-        def_bool y
+	def_bool y
 
 config FIX_EARLYCON_MEM
 	def_bool y
@@ -82,11 +82,11 @@
 	default 1
 
 config MMU
-        def_bool y
+	def_bool y
 
 config NDS32_BUILTIN_DTB
-        string "Builtin DTB"
-        default ""
+	string "Builtin DTB"
+	default ""
 	help
 	  User can use it to specify the dts of the SoC
 endmenu
@@ -94,3 +94,13 @@
 menu "Kernel Features"
 source "kernel/Kconfig.hz"
 endmenu
+
+menu "Power management options"
+config SYS_SUPPORTS_APM_EMULATION
+	bool
+
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+
+source "kernel/power/Kconfig"
+endmenu
diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu
index b8c8984..f80a4ab 100644
--- a/arch/nds32/Kconfig.cpu
+++ b/arch/nds32/Kconfig.cpu
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 comment "Processor Features"
 
 config CPU_BIG_ENDIAN
@@ -7,6 +8,40 @@
 	bool "Little endian"
 	default y
 
+config FPU
+	bool "FPU support"
+	default n
+	help
+	  If FPU ISA is used in user space, this configuration shall be Y to
+          enable required support in kerenl such as fpu context switch and
+          fpu exception handler.
+
+	  If no FPU ISA is used in user space, say N.
+
+config LAZY_FPU
+	bool "lazy FPU support"
+	depends on FPU
+	default y
+	help
+	  Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
+          enhance system performance by reducing the context switch
+	  frequency of the FPU register.
+
+	  For nomal case, say Y.
+
+config SUPPORT_DENORMAL_ARITHMETIC
+	bool "Denormal arithmetic support"
+	depends on FPU
+	default n
+	help
+	  Say Y here to enable arithmetic of denormalized number. Enabling
+	  this feature can enhance the precision for tininess number.
+	  However, performance loss in float pointe calculations is
+	  possibly significant due to additional FPU exception.
+
+	  If the calculated tolerance for tininess number is not critical,
+	  say N to prevent performance loss.
+
 config HWZOL
 	bool "hardware zero overhead loop support"
 	depends on CPU_D10 || CPU_D15
@@ -143,6 +178,13 @@
 	  Say Y here to enable L2 cache if your SoC are integrated with L2CC.
 	  If unsure, say N.
 
+config HW_PRE
+	bool "Enable hardware prefetcher"
+	default y
+	help
+	  Say Y here to enable hardware prefetcher feature.
+	  Only when CPU_VER.REV >= 0x09 can support.
+
 menu "Memory configuration"
 
 choice
diff --git a/arch/nds32/Kconfig.debug b/arch/nds32/Kconfig.debug
index 22a162c..295942f 100644
--- a/arch/nds32/Kconfig.debug
+++ b/arch/nds32/Kconfig.debug
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
 # dummy file, do not delete
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 3509fac..ccdca71 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -1,14 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
 LDFLAGS_vmlinux	:= --no-undefined -X
 OBJCOPYFLAGS	:= -O binary -R .note -R .note.gnu.build-id -R .comment -S
 
-KBUILD_DEFCONFIG := defconfig
-
-comma = ,
-
 ifdef CONFIG_FUNCTION_TRACER
 arch-y += -malways-save-lp -mno-relax
 endif
 
+# Avoid generating FPU instructions
+arch-y  += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft
+
 KBUILD_CFLAGS	+= $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS	+= -mcmodel=large
 
@@ -26,6 +26,7 @@
 
 # If we have a machine-specific directory, then include it in the build.
 core-y				+= arch/nds32/kernel/ arch/nds32/mm/
+core-$(CONFIG_FPU)              += arch/nds32/math-emu/
 libs-y				+= arch/nds32/lib/
 
 ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
@@ -47,9 +48,7 @@
 endif
 
 boot := arch/nds32/boot
-core-$(BUILTIN_DTB) += $(boot)/dts/
-
-.PHONY: FORCE
+core-y += $(boot)/dts/
 
 Image: vmlinux
 	$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
@@ -63,9 +62,6 @@
 vdso_prepare: prepare0
 	$(Q)$(MAKE) $(build)=arch/nds32/kernel/vdso include/generated/vdso-offsets.h
 
-CLEAN_FILES += include/asm-nds32/constants.h*
-
-# We use MRPROPER_FILES and CLEAN_FILES now
 archclean:
 	$(Q)$(MAKE) $(clean)=$(boot)
 
diff --git a/arch/nds32/boot/Makefile b/arch/nds32/boot/Makefile
index 3f9b86f..c4cc0c2 100644
--- a/arch/nds32/boot/Makefile
+++ b/arch/nds32/boot/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 targets := Image Image.gz
 
 $(obj)/Image: vmlinux FORCE
diff --git a/arch/nds32/boot/dts/Makefile b/arch/nds32/boot/dts/Makefile
index d31faa8..fff8ade 100644
--- a/arch/nds32/boot/dts/Makefile
+++ b/arch/nds32/boot/dts/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""'
 BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_NDS32_BUILTIN_DTB)).dtb.o
 else
diff --git a/arch/nds32/boot/dts/ae3xx.dts b/arch/nds32/boot/dts/ae3xx.dts
index bb39749..16a9f54 100644
--- a/arch/nds32/boot/dts/ae3xx.dts
+++ b/arch/nds32/boot/dts/ae3xx.dts
@@ -82,4 +82,9 @@
 			interrupts = <18>;
 		};
 	};
+
+	pmu {
+		compatible = "andestech,nds32v3-pmu";
+		interrupts= <13>;
+	};
 };
diff --git a/arch/nds32/configs/defconfig b/arch/nds32/configs/defconfig
index 2546d87..40313a6 100644
--- a/arch/nds32/configs/defconfig
+++ b/arch/nds32/configs/defconfig
@@ -74,7 +74,7 @@
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_EXT4_ENCRYPTION=y
+CONFIG_FS_ENCRYPTION=y
 CONFIG_FUSE_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
@@ -92,6 +92,7 @@
 CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_GDB_SCRIPTS=y
 CONFIG_READABLE_ASM=y
+CONFIG_HEADERS_INSTALL=y
 CONFIG_HEADERS_CHECK=y
 CONFIG_DEBUG_SECTION_MISMATCH=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e54..77eae62 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -1,32 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
 generic-y += asm-offsets.h
 generic-y += atomic.h
 generic-y += bitops.h
-generic-y += bitsperlong.h
-generic-y += bpf_perf_event.h
 generic-y += bug.h
 generic-y += bugs.h
 generic-y += checksum.h
-generic-y += clkdev.h
 generic-y += cmpxchg.h
-generic-y += cmpxchg-local.h
 generic-y += compat.h
-generic-y += cputime.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
 generic-y += dma-mapping.h
 generic-y += emergency-restart.h
-generic-y += errno.h
 generic-y += exec.h
 generic-y += export.h
 generic-y += fb.h
-generic-y += fcntl.h
-generic-y += ftrace.h
 generic-y += gpio.h
 generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += ioctl.h
-generic-y += ioctls.h
 generic-y += irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
@@ -34,20 +25,16 @@
 generic-y += kmap_types.h
 generic-y += kprobes.h
 generic-y += kvm_para.h
-generic-y += limits.h
 generic-y += local.h
+generic-y += local64.h
 generic-y += mm-arch-hooks.h
-generic-y += mman.h
+generic-y += mmiowb.h
 generic-y += parport.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
-generic-y += segment.h
 generic-y += serial.h
-generic-y += shmbuf.h
-generic-y += sizes.h
-generic-y += stat.h
 generic-y += switch_to.h
 generic-y += timex.h
 generic-y += topology.h
diff --git a/arch/nds32/include/asm/assembler.h b/arch/nds32/include/asm/assembler.h
index c385578..5e7c569 100644
--- a/arch/nds32/include/asm/assembler.h
+++ b/arch/nds32/include/asm/assembler.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_ASSEMBLER_H__
diff --git a/arch/nds32/include/asm/barrier.h b/arch/nds32/include/asm/barrier.h
index faafc37..1641317 100644
--- a/arch/nds32/include/asm/barrier.h
+++ b/arch/nds32/include/asm/barrier.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_ASM_BARRIER_H
diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h
index 8e84fc3..b02a58e 100644
--- a/arch/nds32/include/asm/bitfield.h
+++ b/arch/nds32/include/asm/bitfield.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_BITFIELD_H__
@@ -251,6 +251,11 @@
 #define ITYPE_mskSTYPE		( 0xF  << ITYPE_offSTYPE )
 #define ITYPE_mskCPID		( 0x3  << ITYPE_offCPID )
 
+/* Additional definitions of ITYPE register for FPU */
+#define FPU_DISABLE_EXCEPTION	(0x1  << ITYPE_offSTYPE)
+#define FPU_EXCEPTION		(0x2  << ITYPE_offSTYPE)
+#define FPU_CPID		0	/* FPU Co-Processor ID is 0 */
+
 #define NDS32_VECTOR_mskNONEXCEPTION	0x78
 #define NDS32_VECTOR_offEXCEPTION	8
 #define NDS32_VECTOR_offINTERRUPT	9
@@ -692,8 +697,8 @@
 #define PFM_CTL_offKU1		13	/* Enable user mode event counting for PFMC1 */
 #define PFM_CTL_offKU2		14	/* Enable user mode event counting for PFMC2 */
 #define PFM_CTL_offSEL0		15	/* The event selection for PFMC0 */
-#define PFM_CTL_offSEL1		21	/* The event selection for PFMC1 */
-#define PFM_CTL_offSEL2		27	/* The event selection for PFMC2 */
+#define PFM_CTL_offSEL1		16	/* The event selection for PFMC1 */
+#define PFM_CTL_offSEL2		22	/* The event selection for PFMC2 */
 /* bit 28:31 reserved */
 
 #define PFM_CTL_mskEN0		( 0x01  << PFM_CTL_offEN0 )
@@ -735,14 +740,20 @@
 #define N13MISC_CTL_offRTP	1	/* Disable Return Target Predictor */
 #define N13MISC_CTL_offPTEPF	2	/* Disable HPTWK L2 PTE pefetch */
 #define N13MISC_CTL_offSP_SHADOW_EN	4	/* Enable shadow stack pointers */
+#define MISC_CTL_offHWPRE      11      /* Enable HardWare PREFETCH */
 /* bit 6, 9:31 reserved */
 
 #define N13MISC_CTL_makBTB	( 0x1  << N13MISC_CTL_offBTB )
 #define N13MISC_CTL_makRTP	( 0x1  << N13MISC_CTL_offRTP )
 #define N13MISC_CTL_makPTEPF	( 0x1  << N13MISC_CTL_offPTEPF )
 #define N13MISC_CTL_makSP_SHADOW_EN	( 0x1  << N13MISC_CTL_offSP_SHADOW_EN )
+#define MISC_CTL_makHWPRE_EN     ( 0x1  << MISC_CTL_offHWPRE )
 
+#ifdef CONFIG_HW_PRE
+#define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN|MISC_CTL_makHWPRE_EN)
+#else
 #define MISC_init	(N13MISC_CTL_makBTB|N13MISC_CTL_makRTP|N13MISC_CTL_makSP_SHADOW_EN)
+#endif
 
 /******************************************************************************
  * PRUSR_ACC_CTL (Privileged Resource User Access Control Registers)
@@ -926,6 +937,7 @@
 #define FPCSR_mskDNIT           ( 0x1  << FPCSR_offDNIT )
 #define FPCSR_mskRIT		( 0x1  << FPCSR_offRIT )
 #define FPCSR_mskALL		(FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
+#define FPCSR_mskALLE_NO_UDF_IEXE (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE)
 #define FPCSR_mskALLE		(FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLT           (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
 
@@ -946,6 +958,15 @@
 #define FPCFG_mskIMVER		( 0x1F  << FPCFG_offIMVER )
 #define FPCFG_mskAVER		( 0x1F  << FPCFG_offAVER )
 
+/* 8 Single precision or 4 double precision registers are available */
+#define SP8_DP4_reg		0
+/* 16 Single precision or 8 double precision registers are available */
+#define SP16_DP8_reg		1
+/* 32 Single precision or 16 double precision registers are available */
+#define SP32_DP16_reg		2
+/* 32 Single precision or 32 double precision registers are available */
+#define SP32_DP32_reg		3
+
 /******************************************************************************
  * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
  *****************************************************************************/
diff --git a/arch/nds32/include/asm/cache.h b/arch/nds32/include/asm/cache.h
index 347db48..fc3c41b 100644
--- a/arch/nds32/include/asm/cache.h
+++ b/arch/nds32/include/asm/cache.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_CACHE_H__
diff --git a/arch/nds32/include/asm/cache_info.h b/arch/nds32/include/asm/cache_info.h
index 38ec458..e89d807 100644
--- a/arch/nds32/include/asm/cache_info.h
+++ b/arch/nds32/include/asm/cache_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 struct cache_info {
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 8b26198..d9ac7e6 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_CACHEFLUSH_H__
diff --git a/arch/nds32/include/asm/current.h b/arch/nds32/include/asm/current.h
index b4dcd22..65d3009 100644
--- a/arch/nds32/include/asm/current.h
+++ b/arch/nds32/include/asm/current.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASM_NDS32_CURRENT_H
diff --git a/arch/nds32/include/asm/delay.h b/arch/nds32/include/asm/delay.h
index 519ba97..56ea389 100644
--- a/arch/nds32/include/asm/delay.h
+++ b/arch/nds32/include/asm/delay.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_DELAY_H__
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index f5f9cf7..1c8e56d 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASMNDS32_ELF_H
@@ -9,14 +9,14 @@
  */
 
 #include <asm/ptrace.h>
+#include <asm/fpu.h>
+#include <linux/elf-em.h>
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_freg_t[3];
 
 extern unsigned int elf_hwcap;
 
-#define EM_NDS32			167
-
 #define R_NDS32_NONE			0
 #define R_NDS32_16_RELA			19
 #define R_NDS32_32_RELA			20
@@ -159,8 +159,18 @@
 
 #endif
 
+
+#if IS_ENABLED(CONFIG_FPU)
+#define FPU_AUX_ENT	NEW_AUX_ENT(AT_FPUCW, FPCSR_INIT)
+#else
+#define FPU_AUX_ENT	NEW_AUX_ENT(AT_IGNORE, 0)
+#endif
+
 #define ARCH_DLINFO						\
 do {								\
+	/* Optional FPU initialization */			\
+	FPU_AUX_ENT;						\
+								\
 	NEW_AUX_ENT(AT_SYSINFO_EHDR,				\
 		    (elf_addr_t)current->mm->context.vdso);	\
 } while (0)
diff --git a/arch/nds32/include/asm/fixmap.h b/arch/nds32/include/asm/fixmap.h
index 0e60e15..5a4bf11 100644
--- a/arch/nds32/include/asm/fixmap.h
+++ b/arch/nds32/include/asm/fixmap.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_FIXMAP_H
diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
new file mode 100644
index 0000000..8294ed4
--- /dev/null
+++ b/arch/nds32/include/asm/fpu.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ASM_NDS32_FPU_H
+#define __ASM_NDS32_FPU_H
+
+#if IS_ENABLED(CONFIG_FPU)
+#ifndef __ASSEMBLY__
+#include <linux/sched/task_stack.h>
+#include <linux/preempt.h>
+#include <asm/ptrace.h>
+
+extern bool has_fpu;
+
+extern void save_fpu(struct task_struct *__tsk);
+extern void load_fpu(const struct fpu_struct *fpregs);
+extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
+extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu);
+
+#define test_tsk_fpu(regs)	(regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
+
+/*
+ * Initially load the FPU with signalling NANS.  This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+
+#define sNAN64    0xFFFFFFFFFFFFFFFFULL
+#define sNAN32    0xFFFFFFFFUL
+
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+/*
+ * Denormalized number is unsupported by nds32 FPU. Hence the operation
+ * is treated as underflow cases when the final result is a denormalized
+ * number. To enhance precision, underflow exception trap should be
+ * enabled by default and kerenl will re-execute it by fpu emulator
+ * when getting underflow exception.
+ */
+#define FPCSR_INIT  (FPCSR_mskUDFE | FPCSR_mskIEXE)
+#else
+#define FPCSR_INIT  0x0UL
+#endif
+
+extern const struct fpu_struct init_fpuregs;
+
+static inline void disable_ptreg_fpu(struct pt_regs *regs)
+{
+	regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_ptreg_fpu(struct pt_regs *regs)
+{
+	regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_fpu(void)
+{
+	unsigned long fucop_ctl;
+
+	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
+	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+	__nds32__isb();
+}
+
+static inline void disable_fpu(void)
+{
+	unsigned long fucop_ctl;
+
+	fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
+	__nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+	__nds32__isb();
+}
+
+static inline void lose_fpu(void)
+{
+	preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	if (last_task_used_math == current) {
+		last_task_used_math = NULL;
+#else
+	if (test_tsk_fpu(task_pt_regs(current))) {
+#endif
+		save_fpu(current);
+	}
+	disable_ptreg_fpu(task_pt_regs(current));
+	preempt_enable();
+}
+
+static inline void own_fpu(void)
+{
+	preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	if (last_task_used_math != current) {
+		if (last_task_used_math != NULL)
+			save_fpu(last_task_used_math);
+		load_fpu(&current->thread.fpu);
+		last_task_used_math = current;
+	}
+#else
+	if (!test_tsk_fpu(task_pt_regs(current))) {
+		load_fpu(&current->thread.fpu);
+	}
+#endif
+	enable_ptreg_fpu(task_pt_regs(current));
+	preempt_enable();
+}
+
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+	preempt_disable();
+	if (test_tsk_fpu(task_pt_regs(tsk)))
+		save_fpu(tsk);
+	preempt_enable();
+}
+#endif /* !CONFIG_LAZY_FPU */
+static inline void clear_fpu(struct pt_regs *regs)
+{
+	preempt_disable();
+	if (test_tsk_fpu(regs))
+		disable_ptreg_fpu(regs);
+	preempt_enable();
+}
+#endif /* CONFIG_FPU */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_NDS32_FPU_H */
diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h
new file mode 100644
index 0000000..63e7ef5
--- /dev/null
+++ b/arch/nds32/include/asm/fpuemu.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ARCH_NDS32_FPUEMU_H
+#define __ARCH_NDS32_FPUEMU_H
+
+/*
+ * single precision
+ */
+
+void fadds(void *ft, void *fa, void *fb);
+void fsubs(void *ft, void *fa, void *fb);
+void fmuls(void *ft, void *fa, void *fb);
+void fdivs(void *ft, void *fa, void *fb);
+void fs2d(void *ft, void *fa);
+void fs2si(void *ft, void *fa);
+void fs2si_z(void *ft, void *fa);
+void fs2ui(void *ft, void *fa);
+void fs2ui_z(void *ft, void *fa);
+void fsi2s(void *ft, void *fa);
+void fui2s(void *ft, void *fa);
+void fsqrts(void *ft, void *fa);
+void fnegs(void *ft, void *fa);
+int fcmps(void *ft, void *fa, void *fb, int cop);
+
+/*
+ * double precision
+ */
+void faddd(void *ft, void *fa, void *fb);
+void fsubd(void *ft, void *fa, void *fb);
+void fmuld(void *ft, void *fa, void *fb);
+void fdivd(void *ft, void *fa, void *fb);
+void fsqrtd(void *ft, void *fa);
+void fd2s(void *ft, void *fa);
+void fd2si(void *ft, void *fa);
+void fd2si_z(void *ft, void *fa);
+void fd2ui(void *ft, void *fa);
+void fd2ui_z(void *ft, void *fa);
+void fsi2d(void *ft, void *fa);
+void fui2d(void *ft, void *fa);
+void fnegd(void *ft, void *fa);
+int fcmpd(void *ft, void *fa, void *fb, int cop);
+
+#endif /* __ARCH_NDS32_FPUEMU_H */
diff --git a/arch/nds32/include/asm/futex.h b/arch/nds32/include/asm/futex.h
index cb6cb91..5213c65 100644
--- a/arch/nds32/include/asm/futex.h
+++ b/arch/nds32/include/asm/futex.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_FUTEX_H__
@@ -40,7 +40,7 @@
 	int ret = 0;
 	u32 val, tmp, flags;
 
-	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+	if (!access_ok(uaddr, sizeof(u32)))
 		return -EFAULT;
 
 	smp_mb();
diff --git a/arch/nds32/include/asm/highmem.h b/arch/nds32/include/asm/highmem.h
index 425d546..b3a82c9 100644
--- a/arch/nds32/include/asm/highmem.h
+++ b/arch/nds32/include/asm/highmem.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASM_HIGHMEM_H
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 71cd226..16f2623 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_IO_H
@@ -55,8 +55,6 @@
 #define __iormb()               rmb()
 #define __iowmb()               wmb()
 
-#define mmiowb()        __asm__ __volatile__ ("msync all" : : : "memory");
-
 /*
  * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
  * are not guaranteed to provide ordering against spinlocks or memory
diff --git a/arch/nds32/include/asm/irqflags.h b/arch/nds32/include/asm/irqflags.h
index 2bfd00f..fb45ec4 100644
--- a/arch/nds32/include/asm/irqflags.h
+++ b/arch/nds32/include/asm/irqflags.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <asm/nds32.h>
diff --git a/arch/nds32/include/asm/l2_cache.h b/arch/nds32/include/asm/l2_cache.h
index 37dd5ef..3ea48e1 100644
--- a/arch/nds32/include/asm/l2_cache.h
+++ b/arch/nds32/include/asm/l2_cache.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef L2_CACHE_H
diff --git a/arch/nds32/include/asm/linkage.h b/arch/nds32/include/asm/linkage.h
index e708c8b..a696469 100644
--- a/arch/nds32/include/asm/linkage.h
+++ b/arch/nds32/include/asm/linkage.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_LINKAGE_H
diff --git a/arch/nds32/include/asm/memory.h b/arch/nds32/include/asm/memory.h
index 60efc72..940d328 100644
--- a/arch/nds32/include/asm/memory.h
+++ b/arch/nds32/include/asm/memory.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_MEMORY_H
@@ -15,14 +15,6 @@
 #define PHYS_OFFSET     (0x0)
 #endif
 
-#ifndef __virt_to_bus
-#define __virt_to_bus	__virt_to_phys
-#endif
-
-#ifndef __bus_to_virt
-#define __bus_to_virt	__phys_to_virt
-#endif
-
 /*
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
diff --git a/arch/nds32/include/asm/mmu.h b/arch/nds32/include/asm/mmu.h
index 88b9ee8..89d63af 100644
--- a/arch/nds32/include/asm/mmu.h
+++ b/arch/nds32/include/asm/mmu.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_MMU_H
diff --git a/arch/nds32/include/asm/mmu_context.h b/arch/nds32/include/asm/mmu_context.h
index fd7d13c..b8fd3d1 100644
--- a/arch/nds32/include/asm/mmu_context.h
+++ b/arch/nds32/include/asm/mmu_context.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_MMU_CONTEXT_H
diff --git a/arch/nds32/include/asm/module.h b/arch/nds32/include/asm/module.h
index 16cf9c7..a3a08e9 100644
--- a/arch/nds32/include/asm/module.h
+++ b/arch/nds32/include/asm/module.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASM_NDS32_MODULE_H
diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h
index 68c3815..4994f6a 100644
--- a/arch/nds32/include/asm/nds32.h
+++ b/arch/nds32/include/asm/nds32.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASM_NDS32_NDS32_H_
diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h
new file mode 100644
index 0000000..1e4b86a
--- /dev/null
+++ b/arch/nds32/include/asm/nds32_fpu_inst.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __NDS32_FPU_INST_H
+#define __NDS32_FPU_INST_H
+
+#define cop0_op	0x35
+
+/*
+ * COP0 field of opcodes.
+ */
+#define fs1_op	0x0
+#define fs2_op  0x4
+#define fd1_op  0x8
+#define fd2_op  0xc
+
+/*
+ * FS1 opcode.
+ */
+enum fs1 {
+	fadds_op, fsubs_op, fcpynss_op, fcpyss_op,
+	fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op,
+	fnmadds_op, fnmsubs_op,
+	fmuls_op = 0xc, fdivs_op,
+	fs1_f2op_op = 0xf
+};
+
+/*
+ * FS1/F2OP opcode.
+ */
+enum fs1_f2 {
+	fs2d_op, fsqrts_op,
+	fui2s_op = 0x8, fsi2s_op = 0xc,
+	fs2ui_op = 0x10, fs2ui_z_op = 0x14,
+	fs2si_op = 0x18, fs2si_z_op = 0x1c
+};
+
+/*
+ * FS2 opcode.
+ */
+enum fs2 {
+	fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op,
+	fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op
+};
+
+/*
+ * FD1 opcode.
+ */
+enum fd1 {
+	faddd_op, fsubd_op, fcpynsd_op, fcpysd_op,
+	fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op,
+	fnmaddd_op, fnmsubd_op,
+	fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf
+};
+
+/*
+ * FD1/F2OP opcode.
+ */
+enum fd1_f2 {
+	fd2s_op, fsqrtd_op,
+	fui2d_op = 0x8, fsi2d_op = 0xc,
+	fd2ui_op = 0x10, fd2ui_z_op = 0x14,
+	fd2si_op = 0x18, fd2si_z_op = 0x1c
+};
+
+/*
+ * FD2 opcode.
+ */
+enum fd2 {
+	fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op,
+	fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op
+};
+
+#define NDS32Insn(x) x
+
+#define I_OPCODE_off			25
+#define NDS32Insn_OPCODE(x)		(NDS32Insn(x) >> I_OPCODE_off)
+
+#define I_OPCODE_offRt			20
+#define I_OPCODE_mskRt			(0x1fUL << I_OPCODE_offRt)
+#define NDS32Insn_OPCODE_Rt(x) \
+	((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt)
+
+#define I_OPCODE_offRa			15
+#define I_OPCODE_mskRa			(0x1fUL << I_OPCODE_offRa)
+#define NDS32Insn_OPCODE_Ra(x) \
+	((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa)
+
+#define I_OPCODE_offRb			10
+#define I_OPCODE_mskRb			(0x1fUL << I_OPCODE_offRb)
+#define NDS32Insn_OPCODE_Rb(x) \
+	((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb)
+
+#define I_OPCODE_offbit1014		10
+#define I_OPCODE_mskbit1014		(0x1fUL << I_OPCODE_offbit1014)
+#define NDS32Insn_OPCODE_BIT1014(x) \
+	((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014)
+
+#define I_OPCODE_offbit69		6
+#define I_OPCODE_mskbit69		(0xfUL << I_OPCODE_offbit69)
+#define NDS32Insn_OPCODE_BIT69(x) \
+	((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69)
+
+#define I_OPCODE_offCOP0		0
+#define I_OPCODE_mskCOP0		(0x3fUL << I_OPCODE_offCOP0)
+#define NDS32Insn_OPCODE_COP0(x) \
+	((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0)
+
+#endif /* __NDS32_FPU_INST_H */
diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h
index 947f049..8feb1fa 100644
--- a/arch/nds32/include/asm/page.h
+++ b/arch/nds32/include/asm/page.h
@@ -1,5 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * SPDX-License-Identifier: GPL-2.0
  * Copyright (C) 2005-2017 Andes Technology Corporation
  */
 
diff --git a/arch/nds32/include/asm/perf_event.h b/arch/nds32/include/asm/perf_event.h
new file mode 100644
index 0000000..fcdff02
--- /dev/null
+++ b/arch/nds32/include/asm/perf_event.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PERF_EVENT_H
+#define __ASM_PERF_EVENT_H
+
+/*
+ * This file is request by Perf,
+ * please refer to tools/perf/design.txt for more details
+ */
+struct pt_regs;
+unsigned long perf_instruction_pointer(struct pt_regs *regs);
+unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)   perf_misc_flags(regs)
+
+#endif
diff --git a/arch/nds32/include/asm/pgalloc.h b/arch/nds32/include/asm/pgalloc.h
index 2744886..37125e6 100644
--- a/arch/nds32/include/asm/pgalloc.h
+++ b/arch/nds32/include/asm/pgalloc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_PGALLOC_H
@@ -9,6 +9,9 @@
 #include <asm/tlbflush.h>
 #include <asm/proc-fns.h>
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include <asm-generic/pgalloc.h>	/* for pte_{alloc,free}_one */
+
 /*
  * Since we have only two-level page tables, these are trivial
  */
@@ -20,25 +23,11 @@
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 extern void pgd_free(struct mm_struct *mm, pgd_t * pgd);
 
-#define check_pgt_cache()		do { } while (0)
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
-					  unsigned long addr)
-{
-	pte_t *pte;
-
-	pte =
-	    (pte_t *) __get_free_page(GFP_KERNEL | __GFP_RETRY_MAYFAIL |
-				      __GFP_ZERO);
-
-	return pte;
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr)
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
 	pgtable_t pte;
 
-	pte = alloc_pages(GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO, 0);
+	pte = __pte_alloc_one(mm, GFP_PGTABLE_USER);
 	if (pte)
 		cpu_dcache_wb_page((unsigned long)page_address(pte));
 
@@ -46,21 +35,6 @@
 }
 
 /*
- * Free one PTE table.
- */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t * pte)
-{
-	if (pte) {
-		free_page((unsigned long)pte);
-	}
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-	__free_page(pte);
-}
-
-/*
  * Populate the pmdp entry with a pointer to the pte.  This pmd is part
  * of the mm address space.
  *
diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h
index 9f52db9..0588ec9 100644
--- a/arch/nds32/include/asm/pgtable.h
+++ b/arch/nds32/include/asm/pgtable.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_PGTABLE_H
@@ -6,7 +6,7 @@
 
 #define __PAGETABLE_PMD_FOLDED 1
 #include <asm-generic/4level-fixup.h>
-#include <asm-generic/sizes.h>
+#include <linux/sizes.h>
 
 #include <asm/memory.h>
 #include <asm/nds32.h>
@@ -403,8 +403,6 @@
  * into virtual address `from'
  */
 
-#define pgtable_cache_init()       do { } while (0)
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASMNDS32_PGTABLE_H */
diff --git a/arch/nds32/include/asm/pmu.h b/arch/nds32/include/asm/pmu.h
new file mode 100644
index 0000000..e1ac0b0
--- /dev/null
+++ b/arch/nds32/include/asm/pmu.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_PMU_H
+#define __ASM_PMU_H
+
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <asm/bitfield.h>
+
+/* Has special meaning for perf core implementation */
+#define HW_OP_UNSUPPORTED		0x0
+#define C(_x)				PERF_COUNT_HW_CACHE_##_x
+#define CACHE_OP_UNSUPPORTED		0x0
+
+/* Enough for both software and hardware defined events */
+#define SOFTWARE_EVENT_MASK		0xFF
+
+#define PFM_OFFSET_MAGIC_0		2	/* DO NOT START FROM 0 */
+#define PFM_OFFSET_MAGIC_1		(PFM_OFFSET_MAGIC_0 + 36)
+#define PFM_OFFSET_MAGIC_2		(PFM_OFFSET_MAGIC_1 + 36)
+
+enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
+
+u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
+		       PFM_CTL_mskOVF2 };
+u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
+		      PFM_CTL_mskEN2 };
+u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
+			  PFM_CTL_offSEL2 };
+u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
+u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
+u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
+u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
+/*
+ * Perf Events' indices
+ */
+#define NDS32_IDX_CYCLE_COUNTER			0
+#define NDS32_IDX_COUNTER0			1
+#define NDS32_IDX_COUNTER1			2
+
+/* The events for a given PMU register set. */
+struct pmu_hw_events {
+	/*
+	 * The events that are active on the PMU for the given index.
+	 */
+	struct perf_event *events[MAX_COUNTERS];
+
+	/*
+	 * A 1 bit for an index indicates that the counter is being used for
+	 * an event. A 0 means that the counter can be used.
+	 */
+	unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
+
+	/*
+	 * Hardware lock to serialize accesses to PMU registers. Needed for the
+	 * read/modify/write sequences.
+	 */
+	raw_spinlock_t pmu_lock;
+};
+
+struct nds32_pmu {
+	struct pmu pmu;
+	cpumask_t active_irqs;
+	char *name;
+	 irqreturn_t (*handle_irq)(int irq_num, void *dev);
+	void (*enable)(struct perf_event *event);
+	void (*disable)(struct perf_event *event);
+	int (*get_event_idx)(struct pmu_hw_events *hw_events,
+			     struct perf_event *event);
+	int (*set_event_filter)(struct hw_perf_event *evt,
+				struct perf_event_attr *attr);
+	u32 (*read_counter)(struct perf_event *event);
+	void (*write_counter)(struct perf_event *event, u32 val);
+	void (*start)(struct nds32_pmu *nds32_pmu);
+	void (*stop)(struct nds32_pmu *nds32_pmu);
+	void (*reset)(void *data);
+	int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
+	void (*free_irq)(struct nds32_pmu *nds32_pmu);
+	int (*map_event)(struct perf_event *event);
+	int num_events;
+	atomic_t active_events;
+	u64 max_period;
+	struct platform_device *plat_device;
+	struct pmu_hw_events *(*get_hw_events)(void);
+};
+
+#define to_nds32_pmu(p)			(container_of(p, struct nds32_pmu, pmu))
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
+
+u64 nds32_pmu_event_update(struct perf_event *event);
+
+int nds32_pmu_event_set_period(struct perf_event *event);
+
+/*
+ * Common NDS32 SPAv3 event types
+ *
+ * Note: An implementation may not be able to count all of these events
+ * but the encodings are considered to be `reserved' in the case that
+ * they are not available.
+ *
+ * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
+ * NOT_SUPPORTED EVENT mapping in generic perf code.
+ * You will need to deal it in the event writing implementation.
+ */
+enum spav3_counter_0_perf_types {
+	SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,	/* counting symbol */
+	SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
+	SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
+	SPAV3_0_SEL_LAST	/* counting symbol */
+};
+
+enum spav3_counter_1_perf_types {
+	SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,	/* counting symbol */
+	SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
+	SPAV3_1_SEL_LAST	/* counting symbol */
+};
+
+enum spav3_counter_2_perf_types {
+	SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,	/* counting symbol */
+	SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
+	    3 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
+	SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
+	SPAV3_2_SEL_LAST	/* counting symbol */
+};
+
+/* Get converted event counter index */
+static inline int get_converted_event_idx(unsigned long event)
+{
+	int idx;
+
+	if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
+		idx = 0;
+	} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
+		idx = 1;
+	} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
+		idx = 2;
+	} else {
+		pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
+		return -EPERM;
+	}
+
+	return idx;
+}
+
+/* Get converted hardware event number */
+static inline u32 get_converted_evet_hw_num(u32 event)
+{
+	if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
+		event -= PFM_OFFSET_MAGIC_0;
+	else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
+		event -= PFM_OFFSET_MAGIC_1;
+	else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
+		event -= PFM_OFFSET_MAGIC_2;
+	else if (event != 0)
+		pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
+
+	return event;
+}
+
+/*
+ * NDS32 HW events mapping
+ *
+ * The hardware events that we support. We do support cache operations but
+ * we have harvard caches and no way to combine instruction and data
+ * accesses/misses in hardware.
+ */
+static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
+	[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
+	[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
+	[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
+	[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
+};
+
+static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
+	[PERF_COUNT_HW_CACHE_OP_MAX]
+	[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+	[C(L1D)] = {
+		    [C(OP_READ)] = {
+				    [C(RESULT_ACCESS)] =
+				    SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
+				    [C(RESULT_MISS)] =
+				    SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
+				    },
+		    [C(OP_WRITE)] = {
+				     [C(RESULT_ACCESS)] =
+				     SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
+				     [C(RESULT_MISS)] =
+				     SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
+				     },
+		    [C(OP_PREFETCH)] = {
+					[C(RESULT_ACCESS)] =
+						CACHE_OP_UNSUPPORTED,
+					[C(RESULT_MISS)] =
+						CACHE_OP_UNSUPPORTED,
+					},
+		    },
+	[C(L1I)] = {
+		    [C(OP_READ)] = {
+				    [C(RESULT_ACCESS)] =
+				    SPAV3_1_SEL_CODE_CACHE_ACCESS,
+				    [C(RESULT_MISS)] =
+				    SPAV3_2_SEL_CODE_CACHE_MISS,
+				    },
+		    [C(OP_WRITE)] = {
+				     [C(RESULT_ACCESS)] =
+				     SPAV3_1_SEL_CODE_CACHE_ACCESS,
+				     [C(RESULT_MISS)] =
+				     SPAV3_2_SEL_CODE_CACHE_MISS,
+				     },
+		    [C(OP_PREFETCH)] = {
+					[C(RESULT_ACCESS)] =
+					CACHE_OP_UNSUPPORTED,
+					[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+					},
+		    },
+	/* TODO: L2CC */
+	[C(LL)] = {
+		   [C(OP_READ)] = {
+				   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+				   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				   },
+		   [C(OP_WRITE)] = {
+				    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+				    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				    },
+		   [C(OP_PREFETCH)] = {
+				       [C(RESULT_ACCESS)] =
+				       CACHE_OP_UNSUPPORTED,
+				       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				       },
+		   },
+	/* NDS32 PMU does not support TLB read/write hit/miss,
+	 * However, it can count access/miss, which mixed with read and write.
+	 * Therefore, only READ counter will use it.
+	 * We do as possible as we can.
+	 */
+	[C(DTLB)] = {
+		     [C(OP_READ)] = {
+				     [C(RESULT_ACCESS)] =
+					SPAV3_1_SEL_UDTLB_ACCESS,
+				     [C(RESULT_MISS)] =
+					SPAV3_2_SEL_UDTLB_MISS,
+				     },
+		     [C(OP_WRITE)] = {
+				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				      },
+		     [C(OP_PREFETCH)] = {
+					 [C(RESULT_ACCESS)] =
+					 CACHE_OP_UNSUPPORTED,
+					 [C(RESULT_MISS)] =
+					 CACHE_OP_UNSUPPORTED,
+					 },
+		     },
+	[C(ITLB)] = {
+		     [C(OP_READ)] = {
+				     [C(RESULT_ACCESS)] =
+					SPAV3_1_SEL_UITLB_ACCESS,
+				     [C(RESULT_MISS)] =
+					SPAV3_2_SEL_UITLB_MISS,
+				     },
+		     [C(OP_WRITE)] = {
+				      [C(RESULT_ACCESS)] =
+					CACHE_OP_UNSUPPORTED,
+				      [C(RESULT_MISS)] =
+					CACHE_OP_UNSUPPORTED,
+				      },
+		     [C(OP_PREFETCH)] = {
+					 [C(RESULT_ACCESS)] =
+						CACHE_OP_UNSUPPORTED,
+					 [C(RESULT_MISS)] =
+						CACHE_OP_UNSUPPORTED,
+					 },
+		     },
+	[C(BPU)] = {		/* What is BPU? */
+		    [C(OP_READ)] = {
+				    [C(RESULT_ACCESS)] =
+					CACHE_OP_UNSUPPORTED,
+				    [C(RESULT_MISS)] =
+					CACHE_OP_UNSUPPORTED,
+				    },
+		    [C(OP_WRITE)] = {
+				     [C(RESULT_ACCESS)] =
+					CACHE_OP_UNSUPPORTED,
+				     [C(RESULT_MISS)] =
+					CACHE_OP_UNSUPPORTED,
+				     },
+		    [C(OP_PREFETCH)] = {
+					[C(RESULT_ACCESS)] =
+						CACHE_OP_UNSUPPORTED,
+					[C(RESULT_MISS)] =
+						CACHE_OP_UNSUPPORTED,
+					},
+		    },
+	[C(NODE)] = {		/* What is NODE? */
+		     [C(OP_READ)] = {
+				     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+				     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				     },
+		     [C(OP_WRITE)] = {
+				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+				      },
+		     [C(OP_PREFETCH)] = {
+					 [C(RESULT_ACCESS)] =
+						CACHE_OP_UNSUPPORTED,
+					 [C(RESULT_MISS)] =
+						CACHE_OP_UNSUPPORTED,
+					 },
+		     },
+};
+
+int nds32_pmu_map_event(struct perf_event *event,
+			const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+			const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+			[PERF_COUNT_HW_CACHE_OP_MAX]
+			[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
+
+#endif /* __ASM_PMU_H */
diff --git a/arch/nds32/include/asm/proc-fns.h b/arch/nds32/include/asm/proc-fns.h
index bedc4f5..27c617f 100644
--- a/arch/nds32/include/asm/proc-fns.h
+++ b/arch/nds32/include/asm/proc-fns.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_PROCFNS_H__
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
index 9c83caf..b82369c 100644
--- a/arch/nds32/include/asm/processor.h
+++ b/arch/nds32/include/asm/processor.h
@@ -1,15 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_PROCESSOR_H
 #define __ASM_NDS32_PROCESSOR_H
 
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ __label__ _l; _l: &&_l;})
-
 #ifdef __KERNEL__
 
 #include <asm/ptrace.h>
@@ -41,6 +35,8 @@
 	unsigned long address;
 	unsigned long trap_no;
 	unsigned long error_code;
+
+	struct fpu_struct fpu;
 };
 
 #define INIT_THREAD  {	}
@@ -78,6 +74,11 @@
 
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
+#if IS_ENABLED(CONFIG_FPU)
+#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
+extern struct task_struct *last_task_used_math;
+#endif
+#endif
 
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)	do { } while (0)
diff --git a/arch/nds32/include/asm/ptrace.h b/arch/nds32/include/asm/ptrace.h
index c453883..919ee22 100644
--- a/arch/nds32/include/asm/ptrace.h
+++ b/arch/nds32/include/asm/ptrace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_PTRACE_H
diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h
new file mode 100644
index 0000000..b1a5caa
--- /dev/null
+++ b/arch/nds32/include/asm/sfp-machine.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#include <asm/bitfield.h>
+
+#define _FP_W_TYPE_SIZE		32
+#define _FP_W_TYPE		unsigned long
+#define _FP_WS_TYPE		signed long
+#define _FP_I_TYPE		long
+
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+#define _FP_MUL_MEAT_S(R, X, Y)				\
+	_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_D(R, X, Y)				\
+	_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_Q(R, X, Y)				\
+	_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm)
+
+#define _FP_MUL_MEAT_DW_S(R, X, Y)			\
+	_FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm)
+#define _FP_MUL_MEAT_DW_D(R, X, Y)			\
+	_FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R, X, Y)	_FP_DIV_MEAT_1_udiv_norm(S, R, X, Y)
+#define _FP_DIV_MEAT_D(R, X, Y)	_FP_DIV_MEAT_2_udiv(D, R, X, Y)
+
+#define _FP_NANFRAC_S		((_FP_QNANBIT_S << 1) - 1)
+#define _FP_NANFRAC_D		((_FP_QNANBIT_D << 1) - 1), -1
+#define _FP_NANFRAC_Q		((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1
+#define _FP_NANSIGN_S		0
+#define _FP_NANSIGN_D		0
+#define _FP_NANSIGN_Q		0
+
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+do {								\
+	if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)	\
+	  && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \
+		R##_s = Y##_s;					\
+		_FP_FRAC_COPY_##wc(R, Y);			\
+	} else {						\
+		R##_s = X##_s;					\
+		_FP_FRAC_COPY_##wc(R, X);			\
+	}							\
+	R##_c = FP_CLS_NAN;					\
+} while (0)
+
+#define __FPU_FPCSR	(current->thread.fpu.fpcsr)
+
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE                    \
+({                                      \
+	__FPU_FPCSR & FPCSR_mskRM;      \
+})
+
+#define FP_RND_NEAREST		0
+#define FP_RND_PINF		1
+#define FP_RND_MINF		2
+#define FP_RND_ZERO		3
+
+#define FP_EX_INVALID		FPCSR_mskIVO
+#define FP_EX_DIVZERO		FPCSR_mskDBZ
+#define FP_EX_OVERFLOW		FPCSR_mskOVF
+#define FP_EX_UNDERFLOW		FPCSR_mskUDF
+#define FP_EX_INEXACT		FPCSR_mskIEX
+
+#define SF_CEQ	2
+#define SF_CLT	1
+#define SF_CGT	3
+#define SF_CUN	4
+
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
+#define abort() do { } while (0)
+#define umul_ppmm(w1, w0, u, v)						\
+do {									\
+	UWtype __x0, __x1, __x2, __x3;                                  \
+	UHWtype __ul, __vl, __uh, __vh;                                 \
+									\
+	__ul = __ll_lowpart(u);						\
+	__uh = __ll_highpart(u);					\
+	__vl = __ll_lowpart(v);						\
+	__vh = __ll_highpart(v);					\
+									\
+	__x0 = (UWtype) __ul * __vl;                                    \
+	__x1 = (UWtype) __ul * __vh;                                    \
+	__x2 = (UWtype) __uh * __vl;                                    \
+	__x3 = (UWtype) __uh * __vh;                                    \
+									\
+	__x1 += __ll_highpart(__x0);					\
+	__x1 += __x2;							\
+	if (__x1 < __x2)						\
+		__x3 += __ll_B;						\
+									\
+	(w1) = __x3 + __ll_highpart(__x1);				\
+	(w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);	\
+} while (0)
+
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) + (bl); \
+	(sh) = (ah) + (bh) + (__x < (al)); \
+	(sl) = __x; \
+} while (0)
+
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+	UWtype __x; \
+	__x = (al) - (bl); \
+	(sh) = (ah) - (bh) - (__x > (al)); \
+	(sl) = __x; \
+} while (0)
+
+#define udiv_qrnnd(q, r, n1, n0, d)				\
+do {								\
+	UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;		\
+	__d1 = __ll_highpart(d);				\
+	__d0 = __ll_lowpart(d);					\
+								\
+	__r1 = (n1) % __d1;					\
+	__q1 = (n1) / __d1;					\
+	__m = (UWtype) __q1 * __d0;				\
+	__r1 = __r1 * __ll_B | __ll_highpart(n0);		\
+	if (__r1 < __m)	{					\
+		__q1--, __r1 += (d);				\
+		if (__r1 >= (d))				\
+			if (__r1 < __m)				\
+				__q1--, __r1 += (d);		\
+	}							\
+	__r1 -= __m;						\
+	__r0 = __r1 % __d1;					\
+	__q0 = __r1 / __d1;					\
+	__m = (UWtype) __q0 * __d0;				\
+	__r0 = __r0 * __ll_B | __ll_lowpart(n0);		\
+	if (__r0 < __m)	{					\
+		__q0--, __r0 += (d);				\
+		if (__r0 >= (d))				\
+			if (__r0 < __m)				\
+				__q0--, __r0 += (d);		\
+	}							\
+	__r0 -= __m;						\
+	(q) = (UWtype) __q1 * __ll_B | __q0;			\
+	(r) = __r0;						\
+} while (0)
diff --git a/arch/nds32/include/asm/shmparam.h b/arch/nds32/include/asm/shmparam.h
index fd1cff6..3aeee94 100644
--- a/arch/nds32/include/asm/shmparam.h
+++ b/arch/nds32/include/asm/shmparam.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_SHMPARAM_H
diff --git a/arch/nds32/include/asm/stacktrace.h b/arch/nds32/include/asm/stacktrace.h
new file mode 100644
index 0000000..6bf7c77
--- /dev/null
+++ b/arch/nds32/include/asm/stacktrace.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2008-2018 Andes Technology Corporation */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+/* Kernel callchain */
+struct stackframe {
+	unsigned long fp;
+	unsigned long sp;
+	unsigned long lp;
+};
+
+/*
+ * struct frame_tail: User callchain
+ * IMPORTANT:
+ * This struct is used for call-stack walking,
+ * the order and types matters.
+ * Do not use array, it only stores sizeof(pointer)
+ *
+ * The details can refer to arch/arm/kernel/perf_event.c
+ */
+struct frame_tail {
+	unsigned long stack_fp;
+	unsigned long stack_lp;
+};
+
+/* For User callchain with optimize for size */
+struct frame_tail_opt_size {
+	unsigned long stack_r6;
+	unsigned long stack_fp;
+	unsigned long stack_gp;
+	unsigned long stack_lp;
+};
+
+extern void
+get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/nds32/include/asm/string.h b/arch/nds32/include/asm/string.h
index 179272c..cae8fe1 100644
--- a/arch/nds32/include/asm/string.h
+++ b/arch/nds32/include/asm/string.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_STRING_H
diff --git a/arch/nds32/include/asm/suspend.h b/arch/nds32/include/asm/suspend.h
new file mode 100644
index 0000000..6ed2418
--- /dev/null
+++ b/arch/nds32/include/asm/suspend.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#ifndef __ASM_NDS32_SUSPEND_H
+#define __ASM_NDS32_SUSPEND_H
+
+extern void suspend2ram(void);
+extern void cpu_resume(void);
+extern unsigned long wake_mask;
+
+#endif
diff --git a/arch/nds32/include/asm/swab.h b/arch/nds32/include/asm/swab.h
index e01a755..362a466 100644
--- a/arch/nds32/include/asm/swab.h
+++ b/arch/nds32/include/asm/swab.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_SWAB_H__
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86..7b5180d 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -1,10 +1,11 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASM_NDS32_SYSCALL_H
 #define _ASM_NDS32_SYSCALL_H	1
 
+#include <uapi/linux/audit.h>
 #include <linux/err.h>
 struct task_struct;
 struct pt_regs;
@@ -25,7 +26,8 @@
  *
  * It's only valid to call this when @task is known to be blocked.
  */
-int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+static inline int
+syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 {
 	return regs->syscallno;
 }
@@ -46,7 +48,8 @@
  * system call instruction.  This may not be the same as what the
  * register state looked like at system call entry tracing.
  */
-void syscall_rollback(struct task_struct *task, struct pt_regs *regs)
+static inline void
+syscall_rollback(struct task_struct *task, struct pt_regs *regs)
 {
 	regs->uregs[0] = regs->orig_r0;
 }
@@ -61,7 +64,8 @@
  * It's only valid to call this when @task is stopped for tracing on exit
  * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
  */
-long syscall_get_error(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_error(struct task_struct *task, struct pt_regs *regs)
 {
 	unsigned long error = regs->uregs[0];
 	return IS_ERR_VALUE(error) ? error : 0;
@@ -78,7 +82,8 @@
  * It's only valid to call this when @task is stopped for tracing on exit
  * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
  */
-long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
+static inline long
+syscall_get_return_value(struct task_struct *task, struct pt_regs *regs)
 {
 	return regs->uregs[0];
 }
@@ -98,8 +103,9 @@
  * It's only valid to call this when @task is stopped for tracing on exit
  * from a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
  */
-void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
-			      int error, long val)
+static inline void
+syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
+			 int error, long val)
 {
 	regs->uregs[0] = (long)error ? error : val;
 }
@@ -108,81 +114,51 @@
  * syscall_get_arguments - extract system call parameter values
  * @task:	task of interest, must be blocked
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array filled with argument values
  *
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 #define SYSCALL_MAX_ARGS 6
-void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n, unsigned long *args)
+static inline void
+syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
+		      unsigned long *args)
 {
-	if (n == 0)
-		return;
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-	}
-
-	if (i == 0) {
-		args[0] = regs->orig_r0;
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+	args[0] = regs->orig_r0;
+	args++;
+	memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
 }
 
 /**
  * syscall_set_arguments - change system call parameter value
  * @task:	task of interest, must be in system call entry tracing
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array of argument values to store
  *
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
-void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n,
-			   const unsigned long *args)
+static inline void
+syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
+		      const unsigned long *args)
 {
-	if (n == 0)
-		return;
+	regs->orig_r0 = args[0];
+	args++;
 
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->orig_r0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+	memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
 }
+
+static inline int
+syscall_get_arch(struct task_struct *task)
+{
+	return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+		? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32;
+}
+
 #endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nds32/include/asm/syscalls.h b/arch/nds32/include/asm/syscalls.h
index 78778ec..4e72160 100644
--- a/arch/nds32/include/asm/syscalls.h
+++ b/arch/nds32/include/asm/syscalls.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_SYSCALLS_H
@@ -7,6 +7,7 @@
 asmlinkage long sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op);
 asmlinkage long sys_fadvise64_64_wrapper(int fd, int advice, loff_t offset, loff_t len);
 asmlinkage long sys_rt_sigreturn_wrapper(void);
+asmlinkage long sys_fp_udfiex_crtl(int cmd, int act);
 
 #include <asm-generic/syscalls.h>
 
diff --git a/arch/nds32/include/asm/thread_info.h b/arch/nds32/include/asm/thread_info.h
index bff741f..c135111 100644
--- a/arch/nds32/include/asm/thread_info.h
+++ b/arch/nds32/include/asm/thread_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_THREAD_INFO_H
@@ -42,7 +42,6 @@
  *  TIF_SIGPENDING	- signal pending
  *  TIF_NEED_RESCHED	- rescheduling necessary
  *  TIF_NOTIFY_RESUME	- callback before returning to user
- *  TIF_USEDFPU		- FPU was used by this task this quantum (SMP)
  *  TIF_POLLING_NRFLAG	- true if poll_idle() is polling TIF_NEED_RESCHED
  */
 #define TIF_SIGPENDING		1
@@ -50,7 +49,6 @@
 #define TIF_SINGLESTEP		3
 #define TIF_NOTIFY_RESUME	4	/* callback before returning to user */
 #define TIF_SYSCALL_TRACE	8
-#define TIF_USEDFPU             16
 #define TIF_POLLING_NRFLAG	17
 #define TIF_MEMDIE		18
 #define TIF_FREEZE		19
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index b35ae5e..a8aff1c 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -1,25 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASMNDS32_TLB_H
 #define __ASMNDS32_TLB_H
 
-#define tlb_start_vma(tlb,vma)						\
-	do {								\
-		if (!tlb->fullmm)					\
-			flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-	} while (0)
-
-#define tlb_end_vma(tlb,vma)				\
-	do { 						\
-		if(!tlb->fullmm)			\
-			flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-	} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb)	flush_tlb_mm((tlb)->mm)
-
 #include <asm-generic/tlb.h>
 
 #define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 9b411f4..9715536 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_TLBFLUSH_H
@@ -42,6 +42,5 @@
 
 void update_mmu_cache(struct vm_area_struct *vma,
 		      unsigned long address, pte_t * pte);
-void tlb_migrate_finish(struct mm_struct *mm);
 
 #endif
diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 362a32d..8916ad9 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMANDES_UACCESS_H
@@ -13,9 +13,6 @@
 #include <asm/types.h>
 #include <linux/mm.h>
 
-#define VERIFY_READ	0
-#define VERIFY_WRITE	1
-
 #define __asmeq(x, y)  ".ifnc " x "," y " ; .err ; .endif\n\t"
 
 /*
@@ -40,7 +37,6 @@
 #define KERNEL_DS 	((mm_segment_t) { ~0UL })
 #define USER_DS		((mm_segment_t) {TASK_SIZE - 1})
 
-#define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
 #define user_addr_max	get_fs
 
@@ -53,7 +49,7 @@
 
 #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
 
-#define access_ok(type, addr, size)	\
+#define access_ok(addr, size)	\
 	__range_ok((unsigned long)addr, (unsigned long)size)
 /*
  * Single-value transfer routines.  They automatically use the right
@@ -94,7 +90,7 @@
 ({									\
 	const __typeof__(*(ptr)) __user *__p = (ptr);			\
 	might_fault();							\
-	if (access_ok(VERIFY_READ, __p, sizeof(*__p))) {		\
+	if (access_ok(__p, sizeof(*__p))) {		\
 		__get_user_err((x), __p, (err));			\
 	} else {							\
 		(x) = 0; (err) = -EFAULT;				\
@@ -189,7 +185,7 @@
 ({									\
 	__typeof__(*(ptr)) __user *__p = (ptr);				\
 	might_fault();							\
-	if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) {		\
+	if (access_ok(__p, sizeof(*__p))) {		\
 		__put_user_err((x), __p, (err));			\
 	} else	{							\
 		(err) = -EFAULT;					\
@@ -279,7 +275,7 @@
 #define INLINE_COPY_TO_USER
 static inline unsigned long clear_user(void __user * to, unsigned long n)
 {
-	if (access_ok(VERIFY_WRITE, to, n))
+	if (access_ok(to, n))
 		n = __arch_clear_user(to, n);
 	return n;
 }
diff --git a/arch/nds32/include/asm/unistd.h b/arch/nds32/include/asm/unistd.h
index b586a28..bf5e2d4 100644
--- a/arch/nds32/include/asm/unistd.h
+++ b/arch/nds32/include/asm/unistd.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/nds32/include/asm/vdso.h b/arch/nds32/include/asm/vdso.h
index af2c6af..89b113f 100644
--- a/arch/nds32/include/asm/vdso.h
+++ b/arch/nds32/include/asm/vdso.h
@@ -1,5 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * SPDX-License-Identifier: GPL-2.0
  * Copyright (C) 2005-2017 Andes Technology Corporation
  */
 
diff --git a/arch/nds32/include/asm/vdso_datapage.h b/arch/nds32/include/asm/vdso_datapage.h
index 79db5a1..74c6880 100644
--- a/arch/nds32/include/asm/vdso_datapage.h
+++ b/arch/nds32/include/asm/vdso_datapage.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2012 ARM Limited
 // Copyright (C) 2005-2017 Andes Technology Corporation
 #ifndef __ASM_VDSO_DATAPAGE_H
@@ -20,6 +20,7 @@
 	u32 xtime_clock_sec;	/* CLOCK_REALTIME - seconds */
 	u32 cs_mult;		/* clocksource multiplier */
 	u32 cs_shift;		/* Cycle to nanosecond divisor (power of two) */
+	u32 hrtimer_res;	/* hrtimer resolution */
 
 	u64 cs_cycle_last;	/* last cycle value */
 	u64 cs_mask;		/* clocksource mask */
diff --git a/arch/nds32/include/asm/vdso_timer_info.h b/arch/nds32/include/asm/vdso_timer_info.h
index 50ba117..328439c 100644
--- a/arch/nds32/include/asm/vdso_timer_info.h
+++ b/arch/nds32/include/asm/vdso_timer_info.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 extern struct timer_info_t timer_info;
diff --git a/arch/nds32/include/uapi/asm/Kbuild b/arch/nds32/include/uapi/asm/Kbuild
index 40be972..e784701 100644
--- a/arch/nds32/include/uapi/asm/Kbuild
+++ b/arch/nds32/include/uapi/asm/Kbuild
@@ -1,29 +1,2 @@
-# UAPI Header export list
-include include/uapi/asm-generic/Kbuild.asm
-
-generic-y += bpf_perf_event.h
-generic-y += errno.h
-generic-y += ioctl.h
-generic-y += ioctls.h
-generic-y += ipcbuf.h
-generic-y += shmbuf.h
-generic-y += bitsperlong.h
-generic-y += fcntl.h
-generic-y += stat.h
-generic-y += mman.h
-generic-y += msgbuf.h
-generic-y += poll.h
-generic-y += posix_types.h
-generic-y += resource.h
-generic-y += sembuf.h
-generic-y += setup.h
-generic-y += siginfo.h
-generic-y += signal.h
-generic-y += socket.h
-generic-y += sockios.h
-generic-y += swab.h
-generic-y += statfs.h
-generic-y += termbits.h
-generic-y += termios.h
-generic-y += types.h
+# SPDX-License-Identifier: GPL-2.0
 generic-y += ucontext.h
diff --git a/arch/nds32/include/uapi/asm/auxvec.h b/arch/nds32/include/uapi/asm/auxvec.h
index 56043ce..bc0b92a 100644
--- a/arch/nds32/include/uapi/asm/auxvec.h
+++ b/arch/nds32/include/uapi/asm/auxvec.h
@@ -1,9 +1,16 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_AUXVEC_H
 #define __ASM_AUXVEC_H
 
+/*
+ * This entry gives some information about the FPU initialization
+ * performed by the kernel.
+ */
+#define AT_FPUCW	18	/* Used FPU control word.  */
+
+
 /* VDSO location */
 #define AT_SYSINFO_EHDR	33
 
diff --git a/arch/nds32/include/uapi/asm/byteorder.h b/arch/nds32/include/uapi/asm/byteorder.h
index a23f6f3..c264ef1 100644
--- a/arch/nds32/include/uapi/asm/byteorder.h
+++ b/arch/nds32/include/uapi/asm/byteorder.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __NDS32_BYTEORDER_H__
diff --git a/arch/nds32/include/uapi/asm/cachectl.h b/arch/nds32/include/uapi/asm/cachectl.h
index 4cdca9b..31b9b43 100644
--- a/arch/nds32/include/uapi/asm/cachectl.h
+++ b/arch/nds32/include/uapi/asm/cachectl.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 1994, 1995, 1996 by Ralf Baechle
 // Copyright (C) 2005-2017 Andes Technology Corporation
 #ifndef	_ASM_CACHECTL
diff --git a/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h b/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
new file mode 100644
index 0000000..f17396d
--- /dev/null
+++ b/arch/nds32/include/uapi/asm/fp_udfiex_crtl.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (C) 2005-2019 Andes Technology Corporation */
+#ifndef	_FP_UDF_IEX_CRTL_H
+#define	_FP_UDF_IEX_CRTL_H
+
+/*
+ * The cmd list of sys_fp_udfiex_crtl()
+ */
+/* Disable UDF or IEX trap based on the content of parameter act */
+#define DISABLE_UDF_IEX_TRAP	0
+/* Enable UDF or IEX trap based on the content of parameter act */
+#define ENABLE_UDF_IEX_TRAP	1
+/* Get current status of UDF and IEX trap */
+#define GET_UDF_IEX_TRAP	2
+
+#endif /* _FP_UDF_IEX_CRTL_H */
diff --git a/arch/nds32/include/uapi/asm/param.h b/arch/nds32/include/uapi/asm/param.h
index e3fb723..48d0032 100644
--- a/arch/nds32/include/uapi/asm/param.h
+++ b/arch/nds32/include/uapi/asm/param.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __ASM_NDS32_PARAM_H
diff --git a/arch/nds32/include/uapi/asm/ptrace.h b/arch/nds32/include/uapi/asm/ptrace.h
index 358c99e..d76217c 100644
--- a/arch/nds32/include/uapi/asm/ptrace.h
+++ b/arch/nds32/include/uapi/asm/ptrace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef __UAPI_ASM_NDS32_PTRACE_H
diff --git a/arch/nds32/include/uapi/asm/sigcontext.h b/arch/nds32/include/uapi/asm/sigcontext.h
index 00567b2..6c1e664 100644
--- a/arch/nds32/include/uapi/asm/sigcontext.h
+++ b/arch/nds32/include/uapi/asm/sigcontext.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #ifndef _ASMNDS32_SIGCONTEXT_H
@@ -9,6 +9,29 @@
  * before the signal handler was invoked.  Note: only add new entries
  * to the end of the structure.
  */
+struct fpu_struct {
+	unsigned long long fd_regs[32];
+	unsigned long fpcsr;
+	/*
+	 * When CONFIG_SUPPORT_DENORMAL_ARITHMETIC is defined, kernel prevents
+	 * hardware from treating the denormalized output as an underflow case
+	 * and rounding it to a normal number. Hence kernel enables the UDF and
+	 * IEX trap in the fpcsr register to step in the calculation.
+	 * However, the UDF and IEX trap enable bit in $fpcsr also lose
+	 * their use.
+	 *
+	 * UDF_IEX_trap replaces the feature of UDF and IEX trap enable bit in
+	 * $fpcsr to control the trap of underflow and inexact. The bit filed
+	 * of UDF_IEX_trap is the same as $fpcsr, 10th bit is used to enable UDF
+	 * exception trapping and 11th bit is used to enable IEX exception
+	 * trapping.
+	 *
+	 * UDF_IEX_trap is only modified through fp_udfiex_crtl syscall.
+	 * Therefore, UDF_IEX_trap needn't be saved and restored in each
+	 * context switch.
+	 */
+	unsigned long UDF_IEX_trap;
+};
 
 struct zol_struct {
 	unsigned long nds32_lc;	/* $LC */
@@ -54,6 +77,7 @@
 	unsigned long fault_address;
 	unsigned long used_math_flag;
 	/* FPU Registers */
+	struct fpu_struct fpu;
 	struct zol_struct zol;
 };
 
diff --git a/arch/nds32/include/uapi/asm/unistd.h b/arch/nds32/include/uapi/asm/unistd.h
index 6e95901..410795e 100644
--- a/arch/nds32/include/uapi/asm/unistd.h
+++ b/arch/nds32/include/uapi/asm/unistd.h
@@ -1,11 +1,16 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
+#define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYNC_FILE_RANGE2
+#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_TIME32_SYSCALLS
 
 /* Use the standard ABI for syscalls */
 #include <asm-generic/unistd.h>
 
 /* Additional NDS32 specific syscalls. */
 #define __NR_cacheflush		(__NR_arch_specific_syscall)
+#define __NR_fp_udfiex_crtl	(__NR_arch_specific_syscall + 1)
 __SYSCALL(__NR_cacheflush, sys_cacheflush)
+__SYSCALL(__NR_fp_udfiex_crtl, sys_fp_udfiex_crtl)
diff --git a/arch/nds32/kernel/.gitignore b/arch/nds32/kernel/.gitignore
new file mode 100644
index 0000000..c5f676c
--- /dev/null
+++ b/arch/nds32/kernel/.gitignore
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index 27cded3..394df3f 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -1,10 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Makefile for the linux kernel.
 #
 
 CPPFLAGS_vmlinux.lds	:= -DTEXTADDR=$(TEXTADDR)
 AFLAGS_head.o		:= -DTEXTADDR=$(TEXTADDR)
-
 # Object file lists.
 
 obj-y			:= ex-entry.o ex-exit.o ex-scall.o irq.o \
@@ -14,11 +14,15 @@
 
 obj-$(CONFIG_MODULES)		+= nds32_ksyms.o module.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+obj-$(CONFIG_FPU)		+= fpu.o
 obj-$(CONFIG_OF)		+= devtree.o
 obj-$(CONFIG_CACHE_L2)		+= atl2c.o
-
+obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
+obj-$(CONFIG_PM)		+= pm.o sleep.o
 extra-y := head.o vmlinux.lds
 
+CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp
+
 
 obj-y				+= vdso/
 
diff --git a/arch/nds32/kernel/cacheinfo.c b/arch/nds32/kernel/cacheinfo.c
index 0a7bc69..aab98e4 100644
--- a/arch/nds32/kernel/cacheinfo.c
+++ b/arch/nds32/kernel/cacheinfo.c
@@ -13,7 +13,7 @@
 	this_leaf->level = level;
 	this_leaf->type = type;
 	this_leaf->coherency_line_size = CACHE_LINE_SIZE(cache_type);
-	this_leaf->number_of_sets = CACHE_SET(cache_type);;
+	this_leaf->number_of_sets = CACHE_SET(cache_type);
 	this_leaf->ways_of_associativity = CACHE_WAY(cache_type);
 	this_leaf->size = this_leaf->number_of_sets *
 	    this_leaf->coherency_line_size * this_leaf->ways_of_associativity;
diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c
index d0dbd4f..4206d4b 100644
--- a/arch/nds32/kernel/dma.c
+++ b/arch/nds32/kernel/dma.c
@@ -3,327 +3,13 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/string.h>
 #include <linux/dma-noncoherent.h>
-#include <linux/io.h>
 #include <linux/cache.h>
 #include <linux/highmem.h>
-#include <linux/slab.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/proc-fns.h>
 
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static pte_t *consistent_pte;
-static DEFINE_RAW_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- *  struct vm_struct {
- *    struct vm_region	region;
- *    unsigned long	flags;
- *    struct page	**pages;
- *    unsigned int	nr_pages;
- *    unsigned long	phys_addr;
- *  };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- *  struct vm_region vmalloc_head = {
- *	.vm_list	= LIST_HEAD_INIT(vmalloc_head.vm_list),
- *	.vm_start	= VMALLOC_START,
- *	.vm_end		= VMALLOC_END,
- *  };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct arch_vm_region {
-	struct list_head vm_list;
-	unsigned long vm_start;
-	unsigned long vm_end;
-	struct page *vm_pages;
-};
-
-static struct arch_vm_region consistent_head = {
-	.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
-	.vm_start = CONSISTENT_BASE,
-	.vm_end = CONSISTENT_END,
-};
-
-static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
-					      size_t size, int gfp)
-{
-	unsigned long addr = head->vm_start, end = head->vm_end - size;
-	unsigned long flags;
-	struct arch_vm_region *c, *new;
-
-	new = kmalloc(sizeof(struct arch_vm_region), gfp);
-	if (!new)
-		goto out;
-
-	raw_spin_lock_irqsave(&consistent_lock, flags);
-
-	list_for_each_entry(c, &head->vm_list, vm_list) {
-		if ((addr + size) < addr)
-			goto nospc;
-		if ((addr + size) <= c->vm_start)
-			goto found;
-		addr = c->vm_end;
-		if (addr > end)
-			goto nospc;
-	}
-
-found:
-	/*
-	 * Insert this entry _before_ the one we found.
-	 */
-	list_add_tail(&new->vm_list, &c->vm_list);
-	new->vm_start = addr;
-	new->vm_end = addr + size;
-
-	raw_spin_unlock_irqrestore(&consistent_lock, flags);
-	return new;
-
-nospc:
-	raw_spin_unlock_irqrestore(&consistent_lock, flags);
-	kfree(new);
-out:
-	return NULL;
-}
-
-static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
-					     unsigned long addr)
-{
-	struct arch_vm_region *c;
-
-	list_for_each_entry(c, &head->vm_list, vm_list) {
-		if (c->vm_start == addr)
-			goto out;
-	}
-	c = NULL;
-out:
-	return c;
-}
-
-void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	struct page *page;
-	struct arch_vm_region *c;
-	unsigned long order;
-	u64 mask = ~0ULL, limit;
-	pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
-
-	if (!consistent_pte) {
-		pr_err("%s: not initialized\n", __func__);
-		dump_stack();
-		return NULL;
-	}
-
-	if (dev) {
-		mask = dev->coherent_dma_mask;
-
-		/*
-		 * Sanity check the DMA mask - it must be non-zero, and
-		 * must be able to be satisfied by a DMA allocation.
-		 */
-		if (mask == 0) {
-			dev_warn(dev, "coherent DMA mask is unset\n");
-			goto no_page;
-		}
-
-	}
-
-	/*
-	 * Sanity check the allocation size.
-	 */
-	size = PAGE_ALIGN(size);
-	limit = (mask + 1) & ~mask;
-	if ((limit && size >= limit) ||
-	    size >= (CONSISTENT_END - CONSISTENT_BASE)) {
-		pr_warn("coherent allocation too big "
-			"(requested %#x mask %#llx)\n", size, mask);
-		goto no_page;
-	}
-
-	order = get_order(size);
-
-	if (mask != 0xffffffff)
-		gfp |= GFP_DMA;
-
-	page = alloc_pages(gfp, order);
-	if (!page)
-		goto no_page;
-
-	/*
-	 * Invalidate any data that might be lurking in the
-	 * kernel direct-mapped region for device DMA.
-	 */
-	{
-		unsigned long kaddr = (unsigned long)page_address(page);
-		memset(page_address(page), 0, size);
-		cpu_dma_wbinval_range(kaddr, kaddr + size);
-	}
-
-	/*
-	 * Allocate a virtual address in the consistent mapping region.
-	 */
-	c = vm_region_alloc(&consistent_head, size,
-			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
-	if (c) {
-		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
-		struct page *end = page + (1 << order);
-
-		c->vm_pages = page;
-
-		/*
-		 * Set the "dma handle"
-		 */
-		*handle = page_to_phys(page);
-
-		do {
-			BUG_ON(!pte_none(*pte));
-
-			/*
-			 * x86 does not mark the pages reserved...
-			 */
-			SetPageReserved(page);
-			set_pte(pte, mk_pte(page, prot));
-			page++;
-			pte++;
-		} while (size -= PAGE_SIZE);
-
-		/*
-		 * Free the otherwise unused pages.
-		 */
-		while (page < end) {
-			__free_page(page);
-			page++;
-		}
-
-		return (void *)c->vm_start;
-	}
-
-	if (page)
-		__free_pages(page, order);
-no_page:
-	*handle = ~0;
-	return NULL;
-}
-
-void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t handle, unsigned long attrs)
-{
-	struct arch_vm_region *c;
-	unsigned long flags, addr;
-	pte_t *ptep;
-
-	size = PAGE_ALIGN(size);
-
-	raw_spin_lock_irqsave(&consistent_lock, flags);
-
-	c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
-	if (!c)
-		goto no_area;
-
-	if ((c->vm_end - c->vm_start) != size) {
-		pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
-		       __func__, c->vm_end - c->vm_start, size);
-		dump_stack();
-		size = c->vm_end - c->vm_start;
-	}
-
-	ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
-	addr = c->vm_start;
-	do {
-		pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
-		unsigned long pfn;
-
-		ptep++;
-		addr += PAGE_SIZE;
-
-		if (!pte_none(pte) && pte_present(pte)) {
-			pfn = pte_pfn(pte);
-
-			if (pfn_valid(pfn)) {
-				struct page *page = pfn_to_page(pfn);
-
-				/*
-				 * x86 does not mark the pages reserved...
-				 */
-				ClearPageReserved(page);
-
-				__free_page(page);
-				continue;
-			}
-		}
-
-		pr_crit("%s: bad page in kernel page table\n", __func__);
-	} while (size -= PAGE_SIZE);
-
-	flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
-	list_del(&c->vm_list);
-
-	raw_spin_unlock_irqrestore(&consistent_lock, flags);
-
-	kfree(c);
-	return;
-
-no_area:
-	raw_spin_unlock_irqrestore(&consistent_lock, flags);
-	pr_err("%s: trying to free invalid coherent area: %p\n",
-	       __func__, cpu_addr);
-	dump_stack();
-}
-
-/*
- * Initialise the consistent memory allocation.
- */
-static int __init consistent_init(void)
-{
-	pgd_t *pgd;
-	pmd_t *pmd;
-	pte_t *pte;
-	int ret = 0;
-
-	do {
-		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
-		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
-		if (!pmd) {
-			pr_err("%s: no pmd tables\n", __func__);
-			ret = -ENOMEM;
-			break;
-		}
-		/* The first level mapping may be created in somewhere.
-		 * It's not necessary to warn here. */
-		/* WARN_ON(!pmd_none(*pmd)); */
-
-		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
-		if (!pte) {
-			ret = -ENOMEM;
-			break;
-		}
-
-		consistent_pte = pte;
-	} while (0);
-
-	return ret;
-}
-
-core_initcall(consistent_init);
-
 static inline void cache_op(phys_addr_t paddr, size_t size,
 		void (*fn)(unsigned long start, unsigned long end))
 {
@@ -389,3 +75,8 @@
 		BUG();
 	}
 }
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
+{
+	cache_op(page_to_phys(page), size, cpu_dma_wbinval_range);
+}
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
index 21a1440..107d98a 100644
--- a/arch/nds32/kernel/ex-entry.S
+++ b/arch/nds32/kernel/ex-entry.S
@@ -7,6 +7,7 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
+#include <asm/fpu.h>
 
 #ifdef CONFIG_HWZOL
 	.macro push_zol
@@ -15,12 +16,31 @@
 	mfusr	$r16, $LC
 	.endm
 #endif
+	.macro  skip_save_fucop_ctl
+#if defined(CONFIG_FPU)
+skip_fucop_ctl:
+	smw.adm $p0, [$sp], $p0, #0x1
+	j fucop_ctl_done
+#endif
+	.endm
 
 	.macro	save_user_regs
-
+#if defined(CONFIG_FPU)
+	sethi   $p0, hi20(has_fpu)
+	lbsi 	$p0, [$p0+lo12(has_fpu)]
+	beqz	$p0, skip_fucop_ctl
+	mfsr    $p0, $FUCOP_CTL
+	smw.adm $p0, [$sp], $p0, #0x1
+	bclr    $p0, $p0, #FUCOP_CTL_offCP0EN
+	mtsr    $p0, $FUCOP_CTL
+fucop_ctl_done:
+	/* move $SP to the bottom of pt_regs */
+	addi    $sp, $sp, -FUCOP_CTL_OFFSET
+#else
 	smw.adm $sp, [$sp], $sp, #0x1
 	/* move $SP to the bottom of pt_regs */
 	addi    $sp, $sp, -OSP_OFFSET
+#endif
 
 	/* push $r0 ~ $r25 */
 	smw.bim $r0, [$sp], $r25
@@ -79,6 +99,7 @@
 	.long	eh_syscall		!Syscall
 	.long	asm_do_IRQ		!IRQ
 
+	skip_save_fucop_ctl
 common_exception_handler:
 	save_user_regs
 	mfsr	$p0, $ITYPE
@@ -103,7 +124,6 @@
 	mtsr	$r21, $PSW
 	dsb
 	jr	$p1
-
 	/* syscall */
 1:
 	addi	$p1, $p0, #-NDS32_VECTOR_offEXCEPTION
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index f00af92..1df02a7 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -8,6 +8,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/current.h>
+#include <asm/fpu.h>
 
 
 
@@ -22,10 +23,18 @@
 	.macro	restore_user_regs_first
 	setgie.d
 	isb
-
+#if defined(CONFIG_FPU)
+	addi    $sp, $sp, OSP_OFFSET
+	lmw.adm $r12, [$sp], $r25, #0x0
+	sethi   $p0, hi20(has_fpu)
+	lbsi 	$p0, [$p0+lo12(has_fpu)]
+	beqz	$p0, 2f
+	mtsr    $r25, $FUCOP_CTL
+2:
+#else
 	addi	$sp, $sp, FUCOP_CTL_OFFSET
-
 	lmw.adm $r12, [$sp], $r24, #0x0
+#endif
 	mtsr	$r12, $SP_USR
 	mtsr	$r13, $IPC
 #ifdef CONFIG_HWZOL
@@ -154,7 +163,7 @@
 	gie_disable
 	lwi	$t0, [tsk+#TSK_TI_PREEMPT]
 	bnez	$t0, no_work_pending
-need_resched:
+
 	lwi	$t0, [tsk+#TSK_TI_FLAGS]
 	andi	$p1, $t0, #_TIF_NEED_RESCHED
 	beqz	$p1, no_work_pending
@@ -164,7 +173,7 @@
 	beqz	$t0, no_work_pending
 
 	jal	preempt_schedule_irq
-	b	need_resched
+	b	no_work_pending
 #endif
 
 /*
diff --git a/arch/nds32/kernel/ex-scall.S b/arch/nds32/kernel/ex-scall.S
index 36aa87e..270050f 100644
--- a/arch/nds32/kernel/ex-scall.S
+++ b/arch/nds32/kernel/ex-scall.S
@@ -19,11 +19,13 @@
 
 	la	$p0, __entry_task
 	sw	$r1, [$p0]
-	move	$p1, $r0
-	addi	$p1, $p1, #THREAD_CPU_CONTEXT
+	addi	$p1, $r0, #THREAD_CPU_CONTEXT
 	smw.bi 	$r6, [$p1], $r14, #0xb		! push r6~r14, fp, lp, sp
 	move	$r25, $r1
-	addi	$r1, $r1, #THREAD_CPU_CONTEXT
+#if defined(CONFIG_FPU)
+	call	_switch_fpu
+#endif
+	addi	$r1, $r25, #THREAD_CPU_CONTEXT
 	lmw.bi 	$r6, [$r1], $r14, #0xb		! pop r6~r14, fp, lp, sp
 	ret
 
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
new file mode 100644
index 0000000..62bdafb
--- /dev/null
+++ b/arch/nds32/kernel/fpu.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/sched/signal.h>
+#include <asm/processor.h>
+#include <asm/user.h>
+#include <asm/io.h>
+#include <asm/bitfield.h>
+#include <asm/fpu.h>
+
+const struct fpu_struct init_fpuregs = {
+	.fd_regs = {[0 ... 31] = sNAN64},
+	.fpcsr = FPCSR_INIT,
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+	.UDF_IEX_trap = 0
+#endif
+};
+
+void save_fpu(struct task_struct *tsk)
+{
+	unsigned int fpcfg, fpcsr;
+
+	enable_fpu();
+	fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+	switch (fpcfg) {
+	case SP32_DP32_reg:
+		asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
+			      "fsdi $fd30, [%0+0xf0]\n\t"
+			      "fsdi $fd29, [%0+0xe8]\n\t"
+			      "fsdi $fd28, [%0+0xe0]\n\t"
+			      "fsdi $fd27, [%0+0xd8]\n\t"
+			      "fsdi $fd26, [%0+0xd0]\n\t"
+			      "fsdi $fd25, [%0+0xc8]\n\t"
+			      "fsdi $fd24, [%0+0xc0]\n\t"
+			      "fsdi $fd23, [%0+0xb8]\n\t"
+			      "fsdi $fd22, [%0+0xb0]\n\t"
+			      "fsdi $fd21, [%0+0xa8]\n\t"
+			      "fsdi $fd20, [%0+0xa0]\n\t"
+			      "fsdi $fd19, [%0+0x98]\n\t"
+			      "fsdi $fd18, [%0+0x90]\n\t"
+			      "fsdi $fd17, [%0+0x88]\n\t"
+			      "fsdi $fd16, [%0+0x80]\n\t"
+			      :	/* no output */
+			      : "r" (&tsk->thread.fpu)
+			      : "memory");
+		/* fall through */
+	case SP32_DP16_reg:
+		asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
+			      "fsdi $fd14, [%0+0x70]\n\t"
+			      "fsdi $fd13, [%0+0x68]\n\t"
+			      "fsdi $fd12, [%0+0x60]\n\t"
+			      "fsdi $fd11, [%0+0x58]\n\t"
+			      "fsdi $fd10, [%0+0x50]\n\t"
+			      "fsdi $fd9,  [%0+0x48]\n\t"
+			      "fsdi $fd8,  [%0+0x40]\n\t"
+			      :	/* no output */
+			      : "r" (&tsk->thread.fpu)
+			      : "memory");
+		/* fall through */
+	case SP16_DP8_reg:
+		asm volatile ("fsdi $fd7,  [%0+0x38]\n\t"
+			      "fsdi $fd6,  [%0+0x30]\n\t"
+			      "fsdi $fd5,  [%0+0x28]\n\t"
+			      "fsdi $fd4,  [%0+0x20]\n\t"
+			      :	/* no output */
+			      : "r" (&tsk->thread.fpu)
+			      : "memory");
+		/* fall through */
+	case SP8_DP4_reg:
+		asm volatile ("fsdi $fd3,  [%1+0x18]\n\t"
+			      "fsdi $fd2,  [%1+0x10]\n\t"
+			      "fsdi $fd1,  [%1+0x8]\n\t"
+			      "fsdi $fd0,  [%1+0x0]\n\t"
+			      "fmfcsr	%0\n\t"
+			      "swi  %0, [%1+0x100]\n\t"
+			      : "=&r" (fpcsr)
+			      : "r"(&tsk->thread.fpu)
+			      : "memory");
+	}
+	disable_fpu();
+}
+
+void load_fpu(const struct fpu_struct *fpregs)
+{
+	unsigned int fpcfg, fpcsr;
+
+	enable_fpu();
+	fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+	switch (fpcfg) {
+	case SP32_DP32_reg:
+		asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
+			      "fldi $fd30, [%0+0xf0]\n\t"
+			      "fldi $fd29, [%0+0xe8]\n\t"
+			      "fldi $fd28, [%0+0xe0]\n\t"
+			      "fldi $fd27, [%0+0xd8]\n\t"
+			      "fldi $fd26, [%0+0xd0]\n\t"
+			      "fldi $fd25, [%0+0xc8]\n\t"
+			      "fldi $fd24, [%0+0xc0]\n\t"
+			      "fldi $fd23, [%0+0xb8]\n\t"
+			      "fldi $fd22, [%0+0xb0]\n\t"
+			      "fldi $fd21, [%0+0xa8]\n\t"
+			      "fldi $fd20, [%0+0xa0]\n\t"
+			      "fldi $fd19, [%0+0x98]\n\t"
+			      "fldi $fd18, [%0+0x90]\n\t"
+			      "fldi $fd17, [%0+0x88]\n\t"
+			      "fldi $fd16, [%0+0x80]\n\t"
+			      :	/* no output */
+			      : "r" (fpregs));
+		/* fall through */
+	case SP32_DP16_reg:
+		asm volatile ("fldi $fd15, [%0+0x78]\n\t"
+			      "fldi $fd14, [%0+0x70]\n\t"
+			      "fldi $fd13, [%0+0x68]\n\t"
+			      "fldi $fd12, [%0+0x60]\n\t"
+			      "fldi $fd11, [%0+0x58]\n\t"
+			      "fldi $fd10, [%0+0x50]\n\t"
+			      "fldi $fd9,  [%0+0x48]\n\t"
+			      "fldi $fd8,  [%0+0x40]\n\t"
+			      :	/* no output */
+			      : "r" (fpregs));
+		/* fall through */
+	case SP16_DP8_reg:
+		asm volatile ("fldi $fd7,  [%0+0x38]\n\t"
+			      "fldi $fd6,  [%0+0x30]\n\t"
+			      "fldi $fd5,  [%0+0x28]\n\t"
+			      "fldi $fd4,  [%0+0x20]\n\t"
+			      :	/* no output */
+			      : "r" (fpregs));
+		/* fall through */
+	case SP8_DP4_reg:
+		asm volatile ("fldi $fd3,  [%1+0x18]\n\t"
+			      "fldi $fd2,  [%1+0x10]\n\t"
+			      "fldi $fd1,  [%1+0x8]\n\t"
+			      "fldi $fd0,  [%1+0x0]\n\t"
+			      "lwi  %0, [%1+0x100]\n\t"
+			      "fmtcsr	%0\n\t":"=&r" (fpcsr)
+			      : "r"(fpregs));
+	}
+	disable_fpu();
+}
+void store_fpu_for_suspend(void)
+{
+#ifdef CONFIG_LAZY_FPU
+	if (last_task_used_math != NULL)
+		save_fpu(last_task_used_math);
+	last_task_used_math = NULL;
+#else
+	if (!used_math())
+		return;
+	unlazy_fpu(current);
+#endif
+	clear_fpu(task_pt_regs(current));
+}
+inline void do_fpu_context_switch(struct pt_regs *regs)
+{
+	/* Enable to use FPU. */
+
+	if (!user_mode(regs)) {
+		pr_err("BUG: FPU is used in kernel mode.\n");
+		BUG();
+		return;
+	}
+
+	enable_ptreg_fpu(regs);
+#ifdef CONFIG_LAZY_FPU	//Lazy FPU is used
+	if (last_task_used_math == current)
+		return;
+	if (last_task_used_math != NULL)
+		/* Other processes fpu state, save away */
+		save_fpu(last_task_used_math);
+	last_task_used_math = current;
+#endif
+	if (used_math()) {
+		load_fpu(&current->thread.fpu);
+	} else {
+		/* First time FPU user.  */
+		load_fpu(&init_fpuregs);
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+		current->thread.fpu.UDF_IEX_trap = init_fpuregs.UDF_IEX_trap;
+#endif
+		set_used_math();
+	}
+
+}
+
+inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
+{
+	if (fpcsr & FPCSR_mskOVFT)
+		*signo = FPE_FLTOVF;
+#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
+	else if (fpcsr & FPCSR_mskUDFT)
+		*signo = FPE_FLTUND;
+#endif
+	else if (fpcsr & FPCSR_mskIVOT)
+		*signo = FPE_FLTINV;
+	else if (fpcsr & FPCSR_mskDBZT)
+		*signo = FPE_FLTDIV;
+	else if (fpcsr & FPCSR_mskIEXT)
+		*signo = FPE_FLTRES;
+}
+
+inline void handle_fpu_exception(struct pt_regs *regs)
+{
+	unsigned int fpcsr;
+	int si_code = 0, si_signo = SIGFPE;
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+	unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT|FPCSR_mskIEXT;
+#else
+	unsigned long redo_except = FPCSR_mskDNIT;
+#endif
+
+	lose_fpu();
+	fpcsr = current->thread.fpu.fpcsr;
+
+	if (fpcsr & redo_except) {
+		si_signo = do_fpuemu(regs, &current->thread.fpu);
+		fpcsr = current->thread.fpu.fpcsr;
+		if (!si_signo) {
+			current->thread.fpu.fpcsr &= ~(redo_except);
+			goto done;
+		}
+	} else if (fpcsr & FPCSR_mskRIT) {
+		if (!user_mode(regs))
+			do_exit(SIGILL);
+		si_signo = SIGILL;
+	}
+
+	switch (si_signo) {
+	case SIGFPE:
+		fill_sigfpe_signo(fpcsr, &si_code);
+		break;
+	case SIGILL:
+		show_regs(regs);
+		si_code = ILL_COPROC;
+		break;
+	case SIGBUS:
+		si_code = BUS_ADRERR;
+		break;
+	default:
+		break;
+	}
+
+	force_sig_fault(si_signo, si_code,
+			(void __user *)instruction_pointer(regs));
+done:
+	own_fpu();
+}
+
+bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
+{
+	int done = true;
+	/* Coprocessor disabled exception */
+	if (subtype == FPU_DISABLE_EXCEPTION) {
+		preempt_disable();
+		do_fpu_context_switch(regs);
+		preempt_enable();
+	}
+	/* Coprocessor exception such as underflow and overflow */
+	else if (subtype == FPU_EXCEPTION)
+		handle_fpu_exception(regs);
+	else
+		done = false;
+	return done;
+}
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 8a41372..fd2a54b 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -7,7 +7,6 @@
 #ifndef CONFIG_DYNAMIC_FTRACE
 extern void (*ftrace_trace_function)(unsigned long, unsigned long,
 				     struct ftrace_ops*, struct pt_regs*);
-extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
 extern void ftrace_graph_caller(void);
 
 noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
index c5fdae1..fcefb62 100644
--- a/arch/nds32/kernel/head.S
+++ b/arch/nds32/kernel/head.S
@@ -7,7 +7,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
-#include <asm/sizes.h>
+#include <linux/sizes.h>
 #include <asm/thread_info.h>
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
@@ -123,21 +123,12 @@
         andi    $r0, $r0, MMU_CFG_mskTBS
         srli    $r6, $r6, MMU_CFG_offTBW
         srli    $r0, $r0, MMU_CFG_offTBS
-        /*
-         * we just map the kernel to the maximum way - 1 of tlb
-         * reserver one way for UART VA mapping
-         * it will cause page fault if UART mapping cover the kernel mapping
-         *
-         * direct mapping is not supported now.
-         */
-        li      $r2, 't'
-        beqz    $r6, __error                 ! MMU_CFG.TBW = 0 is direct mappin
+	addi    $r6, $r6, #0x1               ! MMU_CFG.TBW value -> meaning
         addi    $r0, $r0, #0x2               ! MMU_CFG.TBS value -> meaning
         sll     $r0, $r6, $r0                ! entries = k-way * n-set
         mul     $r6, $r0, $r5                ! max size = entries * page size
         /* check kernel image size */
         la      $r3, (_end - PAGE_OFFSET)
-        li      $r2, 's'
         bgt     $r3, $r6, __error
 
 	li      $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr)
@@ -160,7 +151,7 @@
 #endif
 	mtsr    $r3, $TLB_MISC
 
-	mfsr    $r0, $MISC_CTL      ! Enable BTB and RTP and shadow sp
+	mfsr    $r0, $MISC_CTL      ! Enable BTB, RTP, shadow sp, and HW_PRE
 	ori     $r0, $r0, #MISC_init
 	mtsr    $r0, $MISC_CTL
 
diff --git a/arch/nds32/kernel/nds32_ksyms.c b/arch/nds32/kernel/nds32_ksyms.c
index 5ecebd0..20719e4 100644
--- a/arch/nds32/kernel/nds32_ksyms.c
+++ b/arch/nds32/kernel/nds32_ksyms.c
@@ -23,9 +23,3 @@
 EXPORT_SYMBOL(__arch_copy_from_user);
 EXPORT_SYMBOL(__arch_copy_to_user);
 EXPORT_SYMBOL(__arch_clear_user);
-
-/* cache handling */
-EXPORT_SYMBOL(cpu_icache_inval_all);
-EXPORT_SYMBOL(cpu_dcache_wbinval_all);
-EXPORT_SYMBOL(cpu_dma_inval_range);
-EXPORT_SYMBOL(cpu_dma_wb_range);
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
new file mode 100644
index 0000000..334c2a6
--- /dev/null
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -0,0 +1,1521 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008-2017 Andes Technology Corporation
+ *
+ * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/sched/clock.h>
+#include <linux/percpu-defs.h>
+
+#include <asm/pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/nds32.h>
+#include <asm/stacktrace.h>
+#include <asm/perf_event.h>
+#include <nds32_intrinsic.h>
+
+/* Set at runtime when we know what CPU type we are. */
+static struct nds32_pmu *cpu_pmu;
+
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
+static struct platform_device_id cpu_pmu_plat_device_ids[] = {
+	{.name = "nds32-pfm"},
+	{},
+};
+
+static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
+				  [PERF_COUNT_HW_CACHE_MAX]
+				  [PERF_COUNT_HW_CACHE_OP_MAX]
+				  [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
+{
+	unsigned int cache_type, cache_op, cache_result, ret;
+
+	cache_type = (config >> 0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >> 8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+	if (ret == CACHE_OP_UNSUPPORTED)
+		return -ENOENT;
+
+	return ret;
+}
+
+static int
+nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+		       u64 config)
+{
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -ENOENT;
+
+	mapping = (*event_map)[config];
+	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+	int ev_type = (int)(config & raw_event_mask);
+	int idx = config >> 8;
+
+	switch (idx) {
+	case 0:
+		ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
+		if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
+			return -ENOENT;
+		break;
+	case 1:
+		ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
+		if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
+			return -ENOENT;
+		break;
+	case 2:
+		ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
+		if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	return ev_type;
+}
+
+int
+nds32_pmu_map_event(struct perf_event *event,
+		    const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+		    const unsigned int (*cache_map)
+		    [PERF_COUNT_HW_CACHE_MAX]
+		    [PERF_COUNT_HW_CACHE_OP_MAX]
+		    [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
+{
+	u64 config = event->attr.config;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		return nds32_pmu_map_hw_event(event_map, config);
+	case PERF_TYPE_HW_CACHE:
+		return nds32_pmu_map_cache_event(cache_map, config);
+	case PERF_TYPE_RAW:
+		return nds32_pmu_map_raw_event(raw_event_mask, config);
+	}
+
+	return -ENOENT;
+}
+
+static int nds32_spav3_map_event(struct perf_event *event)
+{
+	return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
+				&nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
+}
+
+static inline u32 nds32_pfm_getreset_flags(void)
+{
+	/* Read overflow status */
+	u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 old_val = val;
+
+	/* Write overflow bit to clear status, and others keep it 0 */
+	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+	__nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
+
+	return old_val;
+}
+
+static inline int nds32_pfm_has_overflowed(u32 pfm)
+{
+	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+	return pfm & ov_flag;
+}
+
+static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
+{
+	u32 mask = 0;
+
+	switch (idx) {
+	case 0:
+		mask = PFM_CTL_OVF[0];
+		break;
+	case 1:
+		mask = PFM_CTL_OVF[1];
+		break;
+	case 2:
+		mask = PFM_CTL_OVF[2];
+		break;
+	default:
+		pr_err("%s index wrong\n", __func__);
+		break;
+	}
+	return pfm & mask;
+}
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int nds32_pmu_event_set_period(struct perf_event *event)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	s64 left = local64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	/* The period may have been changed by PERF_EVENT_IOC_PERIOD */
+	if (unlikely(period != hwc->last_period))
+		left = period - (hwc->last_period - left);
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		local64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (left > (s64)nds32_pmu->max_period)
+		left = nds32_pmu->max_period;
+
+	/*
+	 * The hw event starts counting from this event offset,
+	 * mark it to be able to extract future "deltas":
+	 */
+	local64_set(&hwc->prev_count, (u64)(-left));
+
+	nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
+{
+	u32 pfm;
+	struct perf_sample_data data;
+	struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
+	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+	struct pt_regs *regs;
+	int idx;
+	/*
+	 * Get and reset the IRQ flags
+	 */
+	pfm = nds32_pfm_getreset_flags();
+
+	/*
+	 * Did an overflow occur?
+	 */
+	if (!nds32_pfm_has_overflowed(pfm))
+		return IRQ_NONE;
+
+	/*
+	 * Handle the counter(s) overflow(s)
+	 */
+	regs = get_irq_regs();
+
+	nds32_pmu_stop(cpu_pmu);
+	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+
+		/* Ignore if we don't have an event. */
+		if (!event)
+			continue;
+
+		/*
+		 * We have a single interrupt for all counters. Check that
+		 * each counter has overflowed before we process it.
+		 */
+		if (!nds32_pfm_counter_has_overflowed(pfm, idx))
+			continue;
+
+		hwc = &event->hw;
+		nds32_pmu_event_update(event);
+		perf_sample_data_init(&data, 0, hwc->last_period);
+		if (!nds32_pmu_event_set_period(event))
+			continue;
+
+		if (perf_event_overflow(event, &data, regs))
+			cpu_pmu->disable(event);
+	}
+	nds32_pmu_start(cpu_pmu);
+	/*
+	 * Handle the pending perf events.
+	 *
+	 * Note: this call *must* be run with interrupts disabled. For
+	 * platforms that can have the PMU interrupts raised as an NMI, this
+	 * will not work.
+	 */
+	irq_work_run();
+
+	return IRQ_HANDLED;
+}
+
+static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
+{
+	return ((idx >= 0) && (idx < cpu_pmu->num_events));
+}
+
+static inline int nds32_pfm_disable_counter(int idx)
+{
+	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 mask = 0;
+
+	mask = PFM_CTL_EN[idx];
+	val &= ~mask;
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+	return idx;
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
+				      struct perf_event_attr *attr)
+{
+	unsigned long config_base = 0;
+	int idx = event->idx;
+	unsigned long no_kernel_tracing = 0;
+	unsigned long no_user_tracing = 0;
+	/* If index is -1, do not do anything */
+	if (idx == -1)
+		return 0;
+
+	no_kernel_tracing = PFM_CTL_KS[idx];
+	no_user_tracing = PFM_CTL_KU[idx];
+	/*
+	 * Default: enable both kernel and user mode tracing.
+	 */
+	if (attr->exclude_user)
+		config_base |= no_user_tracing;
+
+	if (attr->exclude_kernel)
+		config_base |= no_kernel_tracing;
+
+	/*
+	 * Install the filter into config_base as this is used to
+	 * construct the event type.
+	 */
+	event->config_base |= config_base;
+	return 0;
+}
+
+static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
+{
+	u32 offset = 0;
+	u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 ev_mask = 0;
+	u32 no_kernel_mask = 0;
+	u32 no_user_mask = 0;
+	u32 val;
+
+	offset = PFM_CTL_OFFSEL[idx];
+	/* Clear previous mode selection, and write new one */
+	no_kernel_mask = PFM_CTL_KS[idx];
+	no_user_mask = PFM_CTL_KU[idx];
+	ori_val &= ~no_kernel_mask;
+	ori_val &= ~no_user_mask;
+	if (evnum & no_kernel_mask)
+		ori_val |= no_kernel_mask;
+
+	if (evnum & no_user_mask)
+		ori_val |= no_user_mask;
+
+	/* Clear previous event selection */
+	ev_mask = PFM_CTL_SEL[idx];
+	ori_val &= ~ev_mask;
+	evnum &= SOFTWARE_EVENT_MASK;
+
+	/* undo the linear mapping */
+	evnum = get_converted_evet_hw_num(evnum);
+	val = ori_val | (evnum << offset);
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+}
+
+static inline int nds32_pfm_enable_counter(int idx)
+{
+	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 mask = 0;
+
+	mask = PFM_CTL_EN[idx];
+	val |= mask;
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+	return idx;
+}
+
+static inline int nds32_pfm_enable_intens(int idx)
+{
+	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 mask = 0;
+
+	mask = PFM_CTL_IE[idx];
+	val |= mask;
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+	return idx;
+}
+
+static inline int nds32_pfm_disable_intens(int idx)
+{
+	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	u32 mask = 0;
+
+	mask = PFM_CTL_IE[idx];
+	val &= ~mask;
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+	return idx;
+}
+
+static int event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+	/* Other modes NDS32 does not support */
+	return attr->exclude_user || attr->exclude_kernel;
+}
+
+static void nds32_pmu_enable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	unsigned int evnum = 0;
+	struct hw_perf_event *hwc = &event->hw;
+	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
+
+	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU enabling wrong pfm counter IRQ enable\n");
+		return;
+	}
+
+	/*
+	 * Enable counter and interrupt, and set the counter to count
+	 * the event that we're interested in.
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	nds32_pfm_disable_counter(idx);
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!cpu_pmu->set_event_filter ||
+	     cpu_pmu->set_event_filter(hwc, &event->attr)) &&
+	     event_requires_mode_exclusion(&event->attr)) {
+		pr_notice
+		("NDS32 performance counters do not support mode exclusion\n");
+		hwc->config_base = 0;
+	}
+	/* Write event */
+	evnum = hwc->config_base;
+	nds32_pfm_write_evtsel(idx, evnum);
+
+	/*
+	 * Enable interrupt for this counter
+	 */
+	nds32_pfm_enable_intens(idx);
+
+	/*
+	 * Enable counter
+	 */
+	nds32_pfm_enable_counter(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_disable_event(struct perf_event *event)
+{
+	unsigned long flags;
+	struct hw_perf_event *hwc = &event->hw;
+	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+	int idx = hwc->idx;
+
+	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
+		return;
+	}
+
+	/*
+	 * Disable counter and interrupt
+	 */
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/*
+	 * Disable counter
+	 */
+	nds32_pfm_disable_counter(idx);
+
+	/*
+	 * Disable interrupt for this counter
+	 */
+	nds32_pfm_disable_intens(idx);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 nds32_pmu_read_counter(struct perf_event *event)
+{
+	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	u32 count = 0;
+
+	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU reading wrong counter %d\n", idx);
+	} else {
+		switch (idx) {
+		case PFMC0:
+			count = __nds32__mfsr(NDS32_SR_PFMC0);
+			break;
+		case PFMC1:
+			count = __nds32__mfsr(NDS32_SR_PFMC1);
+			break;
+		case PFMC2:
+			count = __nds32__mfsr(NDS32_SR_PFMC2);
+			break;
+		default:
+			pr_err
+			    ("%s: CPU has no performance counters %d\n",
+			     __func__, idx);
+		}
+	}
+	return count;
+}
+
+static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
+{
+	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+		pr_err("CPU writing wrong counter %d\n", idx);
+	} else {
+		switch (idx) {
+		case PFMC0:
+			__nds32__mtsr_isb(value, NDS32_SR_PFMC0);
+			break;
+		case PFMC1:
+			__nds32__mtsr_isb(value, NDS32_SR_PFMC1);
+			break;
+		case PFMC2:
+			__nds32__mtsr_isb(value, NDS32_SR_PFMC2);
+			break;
+		default:
+			pr_err
+			    ("%s: CPU has no performance counters %d\n",
+			     __func__, idx);
+		}
+	}
+}
+
+static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+				   struct perf_event *event)
+{
+	int idx;
+	struct hw_perf_event *hwc = &event->hw;
+	/*
+	 * Current implementation maps cycles, instruction count and cache-miss
+	 * to specific counter.
+	 * However, multiple of the 3 counters are able to count these events.
+	 *
+	 *
+	 * SOFTWARE_EVENT_MASK mask for getting event num ,
+	 * This is defined by Jia-Rung, you can change the polocies.
+	 * However, do not exceed 8 bits. This is hardware specific.
+	 * The last number is SPAv3_2_SEL_LAST.
+	 */
+	unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
+
+	idx = get_converted_event_idx(evtype);
+	/*
+	 * Try to get the counter for correpsonding event
+	 */
+	if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+		if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+			return NDS32_IDX_COUNTER0;
+		if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+			return NDS32_IDX_COUNTER1;
+	} else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+		else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+			return NDS32_IDX_COUNTER1;
+		else if (!test_and_set_bit
+			 (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return NDS32_IDX_CYCLE_COUNTER;
+	} else {
+		if (!test_and_set_bit(idx, cpuc->used_mask))
+			return idx;
+	}
+	return -EAGAIN;
+}
+
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
+{
+	unsigned long flags;
+	unsigned int val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Enable all counters , NDS PFM has 3 counters */
+	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
+{
+	unsigned long flags;
+	unsigned int val;
+	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+	raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+	/* Disable all counters , NDS PFM has 3 counters */
+	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+	val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_reset(void *info)
+{
+	u32 val = 0;
+
+	val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+	__nds32__mtsr(val, NDS32_SR_PFM_CTL);
+	__nds32__mtsr(0, NDS32_SR_PFM_CTL);
+	__nds32__mtsr(0, NDS32_SR_PFMC0);
+	__nds32__mtsr(0, NDS32_SR_PFMC1);
+	__nds32__mtsr(0, NDS32_SR_PFMC2);
+}
+
+static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+	cpu_pmu->handle_irq = nds32_pmu_handle_irq;
+	cpu_pmu->enable = nds32_pmu_enable_event;
+	cpu_pmu->disable = nds32_pmu_disable_event;
+	cpu_pmu->read_counter = nds32_pmu_read_counter;
+	cpu_pmu->write_counter = nds32_pmu_write_counter;
+	cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
+	cpu_pmu->start = nds32_pmu_start;
+	cpu_pmu->stop = nds32_pmu_stop;
+	cpu_pmu->reset = nds32_pmu_reset;
+	cpu_pmu->max_period = 0xFFFFFFFF;	/* Maximum counts */
+};
+
+static u32 nds32_read_num_pfm_events(void)
+{
+	/* NDS32 SPAv3 PMU support 3 counter */
+	return 3;
+}
+
+static int device_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+	nds32_pmu_init(cpu_pmu);
+	/*
+	 * This name should be devive-specific name, whatever you like :)
+	 * I think "PMU" will be a good generic name.
+	 */
+	cpu_pmu->name = "nds32v3-pmu";
+	cpu_pmu->map_event = nds32_spav3_map_event;
+	cpu_pmu->num_events = nds32_read_num_pfm_events();
+	cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
+	return 0;
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct nds32_pmu *pmu)
+{
+	int ret;
+
+	get_cpu();
+	ret = -ENODEV;
+	/*
+	 * If ther are various CPU types with its own PMU, initialize with
+	 *
+	 * the corresponding one
+	 */
+	device_pmu_init(pmu);
+	put_cpu();
+	return ret;
+}
+
+static void nds32_pmu_enable(struct pmu *pmu)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+	int enabled = bitmap_weight(hw_events->used_mask,
+				    nds32_pmu->num_events);
+
+	if (enabled)
+		nds32_pmu->start(nds32_pmu);
+}
+
+static void nds32_pmu_disable(struct pmu *pmu)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+
+	nds32_pmu->stop(nds32_pmu);
+}
+
+static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
+{
+	nds32_pmu->free_irq(nds32_pmu);
+	pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
+}
+
+static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
+{
+	struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
+	int ret;
+	u64 start_clock, finish_clock;
+
+	start_clock = local_clock();
+	ret = nds32_pmu->handle_irq(irq, dev);
+	finish_clock = local_clock();
+
+	perf_sample_event_took(finish_clock - start_clock);
+	return ret;
+}
+
+static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
+{
+	int err;
+	struct platform_device *pmu_device = nds32_pmu->plat_device;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	pm_runtime_get_sync(&pmu_device->dev);
+	err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
+	if (err) {
+		nds32_pmu_release_hardware(nds32_pmu);
+		return err;
+	}
+
+	return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+	       struct perf_event *event)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+
+	if (is_software_event(event))
+		return 1;
+
+	if (event->pmu != pmu)
+		return 0;
+
+	if (event->state < PERF_EVENT_STATE_OFF)
+		return 1;
+
+	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+		return 1;
+
+	return nds32_pmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+	struct perf_event *sibling, *leader = event->group_leader;
+	struct pmu_hw_events fake_pmu;
+	DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
+	/*
+	 * Initialize the fake PMU. We only need to populate the
+	 * used_mask for the purposes of validation.
+	 */
+	memset(fake_used_mask, 0, sizeof(fake_used_mask));
+
+	if (!validate_event(event->pmu, &fake_pmu, leader))
+		return -EINVAL;
+
+	for_each_sibling_event(sibling, leader) {
+		if (!validate_event(event->pmu, &fake_pmu, sibling))
+			return -EINVAL;
+	}
+
+	if (!validate_event(event->pmu, &fake_pmu, event))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	int mapping;
+
+	mapping = nds32_pmu->map_event(event);
+
+	if (mapping < 0) {
+		pr_debug("event %x:%llx not supported\n", event->attr.type,
+			 event->attr.config);
+		return mapping;
+	}
+
+	/*
+	 * We don't assign an index until we actually place the event onto
+	 * hardware. Use -1 to signify that we haven't decided where to put it
+	 * yet. For SMP systems, each core has it's own PMU so we can't do any
+	 * clever allocation or constraints checking at this point.
+	 */
+	hwc->idx = -1;
+	hwc->config_base = 0;
+	hwc->config = 0;
+	hwc->event_base = 0;
+
+	/*
+	 * Check whether we need to exclude the counter from certain modes.
+	 */
+	if ((!nds32_pmu->set_event_filter ||
+	     nds32_pmu->set_event_filter(hwc, &event->attr)) &&
+	    event_requires_mode_exclusion(&event->attr)) {
+		pr_debug
+			("NDS performance counters do not support mode exclusion\n");
+		return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Store the event encoding into the config_base field.
+	 */
+	hwc->config_base |= (unsigned long)mapping;
+
+	if (!hwc->sample_period) {
+		/*
+		 * For non-sampling runs, limit the sample_period to half
+		 * of the counter width. That way, the new counter value
+		 * is far less likely to overtake the previous one unless
+		 * you have some serious IRQ latency issues.
+		 */
+		hwc->sample_period = nds32_pmu->max_period >> 1;
+		hwc->last_period = hwc->sample_period;
+		local64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	if (event->group_leader != event) {
+		if (validate_group(event) != 0)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nds32_pmu_event_init(struct perf_event *event)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	int err = 0;
+	atomic_t *active_events = &nds32_pmu->active_events;
+
+	/* does not support taken branch sampling */
+	if (has_branch_stack(event))
+		return -EOPNOTSUPP;
+
+	if (nds32_pmu->map_event(event) == -ENOENT)
+		return -ENOENT;
+
+	if (!atomic_inc_not_zero(active_events)) {
+		if (atomic_read(active_events) == 0) {
+			/* Register irq handler */
+			err = nds32_pmu_reserve_hardware(nds32_pmu);
+		}
+
+		if (!err)
+			atomic_inc(active_events);
+	}
+
+	if (err)
+		return err;
+
+	err = __hw_perf_event_init(event);
+
+	return err;
+}
+
+static void nds32_start(struct perf_event *event, int flags)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	/*
+	 * NDS pmu always has to reprogram the period, so ignore
+	 * PERF_EF_RELOAD, see the comment below.
+	 */
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+	hwc->state = 0;
+	/* Set the period for the event. */
+	nds32_pmu_event_set_period(event);
+
+	nds32_pmu->enable(event);
+}
+
+static int nds32_pmu_add(struct perf_event *event, int flags)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+	int err = 0;
+
+	perf_pmu_disable(event->pmu);
+
+	/* If we don't have a space for the counter then finish early. */
+	idx = nds32_pmu->get_event_idx(hw_events, event);
+	if (idx < 0) {
+		err = idx;
+		goto out;
+	}
+
+	/*
+	 * If there is an event in the counter we are going to use then make
+	 * sure it is disabled.
+	 */
+	event->hw.idx = idx;
+	nds32_pmu->disable(event);
+	hw_events->events[idx] = event;
+
+	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	if (flags & PERF_EF_START)
+		nds32_start(event, PERF_EF_RELOAD);
+
+	/* Propagate our changes to the userspace mapping. */
+	perf_event_update_userpage(event);
+
+out:
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+u64 nds32_pmu_event_update(struct perf_event *event)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 delta, prev_raw_count, new_raw_count;
+
+again:
+	prev_raw_count = local64_read(&hwc->prev_count);
+	new_raw_count = nds32_pmu->read_counter(event);
+
+	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			    new_raw_count) != prev_raw_count) {
+		goto again;
+	}
+	/*
+	 * Whether overflow or not, "unsigned substraction"
+	 * will always get their delta
+	 */
+	delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
+
+	local64_add(delta, &event->count);
+	local64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void nds32_stop(struct perf_event *event, int flags)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	/*
+	 * NDS pmu always has to update the counter, so ignore
+	 * PERF_EF_UPDATE, see comments in nds32_start().
+	 */
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		nds32_pmu->disable(event);
+		nds32_pmu_event_update(event);
+		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	}
+}
+
+static void nds32_pmu_del(struct perf_event *event, int flags)
+{
+	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	nds32_stop(event, PERF_EF_UPDATE);
+	hw_events->events[idx] = NULL;
+	clear_bit(idx, hw_events->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void nds32_pmu_read(struct perf_event *event)
+{
+	nds32_pmu_event_update(event);
+}
+
+/* Please refer to SPAv3 for more hardware specific details */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *nds32_arch_formats_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group nds32_pmu_format_group = {
+	.name = "format",
+	.attrs = nds32_arch_formats_attr,
+};
+
+static ssize_t nds32_pmu_cpumask_show(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	return 0;
+}
+
+static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
+
+static struct attribute *nds32_pmu_common_attrs[] = {
+	&dev_attr_cpus.attr,
+	NULL,
+};
+
+static struct attribute_group nds32_pmu_common_group = {
+	.attrs = nds32_pmu_common_attrs,
+};
+
+static const struct attribute_group *nds32_pmu_attr_groups[] = {
+	&nds32_pmu_format_group,
+	&nds32_pmu_common_group,
+	NULL,
+};
+
+static void nds32_init(struct nds32_pmu *nds32_pmu)
+{
+	atomic_set(&nds32_pmu->active_events, 0);
+
+	nds32_pmu->pmu = (struct pmu) {
+		.pmu_enable = nds32_pmu_enable,
+		.pmu_disable = nds32_pmu_disable,
+		.attr_groups = nds32_pmu_attr_groups,
+		.event_init = nds32_pmu_event_init,
+		.add = nds32_pmu_add,
+		.del = nds32_pmu_del,
+		.start = nds32_start,
+		.stop = nds32_stop,
+		.read = nds32_pmu_read,
+	};
+}
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
+{
+	nds32_init(nds32_pmu);
+	pm_runtime_enable(&nds32_pmu->plat_device->dev);
+	pr_info("enabled with %s PMU driver, %d counters available\n",
+		nds32_pmu->name, nds32_pmu->num_events);
+	return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
+}
+
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+{
+	return this_cpu_ptr(&cpu_hw_events);
+}
+
+static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
+{
+	int err, irq, irqs;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+	if (!pmu_device)
+		return -ENODEV;
+
+	irqs = min(pmu_device->num_resources, num_possible_cpus());
+	if (irqs < 1) {
+		pr_err("no irqs for PMUs defined\n");
+		return -ENODEV;
+	}
+
+	irq = platform_get_irq(pmu_device, 0);
+	err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
+			  cpu_pmu);
+	if (err) {
+		pr_err("unable to request IRQ%d for NDS PMU counters\n",
+		       irq);
+		return err;
+	}
+	return 0;
+}
+
+static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
+{
+	int irq;
+	struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+	irq = platform_get_irq(pmu_device, 0);
+	if (irq >= 0)
+		free_irq(irq, cpu_pmu);
+}
+
+static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+	int cpu;
+	struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+
+	raw_spin_lock_init(&events->pmu_lock);
+
+	cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
+	cpu_pmu->request_irq = cpu_pmu_request_irq;
+	cpu_pmu->free_irq = cpu_pmu_free_irq;
+
+	/* Ensure the PMU has sane values out of reset. */
+	if (cpu_pmu->reset)
+		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+}
+
+const static struct of_device_id cpu_pmu_of_device_ids[] = {
+	{.compatible = "andestech,nds32v3-pmu",
+	 .data = device_pmu_init},
+	{},
+};
+
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id;
+	int (*init_fn)(struct nds32_pmu *nds32_pmu);
+	struct device_node *node = pdev->dev.of_node;
+	struct nds32_pmu *pmu;
+	int ret = -ENODEV;
+
+	if (cpu_pmu) {
+		pr_notice("[perf] attempt to register multiple PMU devices!\n");
+		return -ENOSPC;
+	}
+
+	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+	if (!pmu)
+		return -ENOMEM;
+
+	of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
+	if (node && of_id) {
+		init_fn = of_id->data;
+		ret = init_fn(pmu);
+	} else {
+		ret = probe_current_pmu(pmu);
+	}
+
+	if (ret) {
+		pr_notice("[perf] failed to probe PMU!\n");
+		goto out_free;
+	}
+
+	cpu_pmu = pmu;
+	cpu_pmu->plat_device = pdev;
+	cpu_pmu_init(cpu_pmu);
+	ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
+
+	if (!ret)
+		return 0;
+
+out_free:
+	pr_notice("[perf] failed to register PMU devices!\n");
+	kfree(pmu);
+	return ret;
+}
+
+static struct platform_driver cpu_pmu_driver = {
+	.driver = {
+		   .name = "nds32-pfm",
+		   .of_match_table = cpu_pmu_of_device_ids,
+		   },
+	.probe = cpu_pmu_device_probe,
+	.id_table = cpu_pmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+	int err = 0;
+
+	err = platform_driver_register(&cpu_pmu_driver);
+	if (err)
+		pr_notice("[perf] PMU initialization failed\n");
+	else
+		pr_notice("[perf] PMU initialization done\n");
+
+	return err;
+}
+
+device_initcall(register_pmu_driver);
+
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+	int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+	/* 0x3 means misalignment */
+	if (!kstack_end((void *)frame->fp) &&
+	    !((unsigned long)frame->fp & 0x3) &&
+	    ((unsigned long)frame->fp >= TASK_SIZE)) {
+		/*
+		 *	The array index is based on the ABI, the below graph
+		 *	illustrate the reasons.
+		 *	Function call procedure: "smw" and "lmw" will always
+		 *	update SP and FP for you automatically.
+		 *
+		 *	Stack                                 Relative Address
+		 *	|  |                                          0
+		 *	----
+		 *	|LP| <-- SP(before smw)  <-- FP(after smw)   -1
+		 *	----
+		 *	|FP|                                         -2
+		 *	----
+		 *	|  | <-- SP(after smw)                       -3
+		 */
+		frame->lp = ((unsigned long *)frame->fp)[-1];
+		frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+		/* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+		if (__kernel_text_address(frame->lp))
+			frame->lp = ftrace_graph_ret_addr
+						(NULL, &graph, frame->lp, NULL);
+
+		return 0;
+	} else {
+		return -EPERM;
+	}
+#else
+	/*
+	 * You can refer to arch/nds32/kernel/traps.c:__dump()
+	 * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+	 * And, the "sp" is not always correct.
+	 *
+	 *   Stack                                 Relative Address
+	 *   |  |                                          0
+	 *   ----
+	 *   |LP| <-- SP(before smw)                      -1
+	 *   ----
+	 *   |  | <-- SP(after smw)                       -2
+	 *   ----
+	 */
+	if (!kstack_end((void *)frame->sp)) {
+		frame->lp = ((unsigned long *)frame->sp)[1];
+		/* TODO: How to deal with the value in first
+		 * "sp" is not correct?
+		 */
+		if (__kernel_text_address(frame->lp))
+			frame->lp = ftrace_graph_ret_addr
+						(tsk, &graph, frame->lp, NULL);
+
+		frame->sp = ((unsigned long *)frame->sp) + 1;
+
+		return 0;
+	} else {
+		return -EPERM;
+	}
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+		int (*fn_record)(struct stackframe *, void *),
+		void *data)
+{
+	while (1) {
+		int ret;
+
+		if (fn_record(frame, data))
+			break;
+
+		ret = unwind_frame_kernel(frame);
+		if (ret < 0)
+			break;
+	}
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(struct stackframe *fr, void *data)
+{
+	struct perf_callchain_entry_ctx *entry = data;
+
+	perf_callchain_store(entry, fr->lp);
+	return 0;
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
+{
+	struct frame_tail buftail;
+	unsigned long lp = 0;
+	unsigned long *user_frame_tail =
+		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+	/* Check accessibility of one struct frame_tail beyond */
+	if (!access_ok(user_frame_tail, sizeof(buftail)))
+		return 0;
+	if (__copy_from_user_inatomic
+		(&buftail, user_frame_tail, sizeof(buftail)))
+		return 0;
+
+	/*
+	 * Refer to unwind_frame_kernel() for more illurstration
+	 */
+	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
+	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
+	perf_callchain_store(entry, lp);
+	return fp;
+}
+
+static unsigned long
+user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
+			unsigned long fp)
+{
+	struct frame_tail_opt_size buftail;
+	unsigned long lp = 0;
+
+	unsigned long *user_frame_tail =
+		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+	/* Check accessibility of one struct frame_tail beyond */
+	if (!access_ok(user_frame_tail, sizeof(buftail)))
+		return 0;
+	if (__copy_from_user_inatomic
+		(&buftail, user_frame_tail, sizeof(buftail)))
+		return 0;
+
+	/*
+	 * Refer to unwind_frame_kernel() for more illurstration
+	 */
+	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
+	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
+
+	perf_callchain_store(entry, lp);
+	return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ */
+unsigned long leaf_fp;
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+		    struct pt_regs *regs)
+{
+	unsigned long fp = 0;
+	unsigned long gp = 0;
+	unsigned long lp = 0;
+	unsigned long sp = 0;
+	unsigned long *user_frame_tail;
+
+	leaf_fp = 0;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+
+	perf_callchain_store(entry, regs->ipc);
+	fp = regs->fp;
+	gp = regs->gp;
+	lp = regs->lp;
+	sp = regs->sp;
+	if (entry->nr < PERF_MAX_STACK_DEPTH &&
+	    (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
+		user_frame_tail =
+			(unsigned long *)(fp - (unsigned long)sizeof(fp));
+
+		if (!access_ok(user_frame_tail, sizeof(fp)))
+			return;
+
+		if (__copy_from_user_inatomic
+			(&leaf_fp, user_frame_tail, sizeof(fp)))
+			return;
+
+		if (leaf_fp == lp) {
+			/*
+			 * Maybe this is non leaf function
+			 * with optimize for size,
+			 * or maybe this is the function
+			 * with optimize for size
+			 */
+			struct frame_tail buftail;
+
+			user_frame_tail =
+				(unsigned long *)(fp -
+					(unsigned long)sizeof(buftail));
+
+			if (!access_ok(user_frame_tail, sizeof(buftail)))
+				return;
+
+			if (__copy_from_user_inatomic
+				(&buftail, user_frame_tail, sizeof(buftail)))
+				return;
+
+			if (buftail.stack_fp == gp) {
+				/* non leaf function with optimize
+				 * for size condition
+				 */
+				struct frame_tail_opt_size buftail_opt_size;
+
+				user_frame_tail =
+					(unsigned long *)(fp - (unsigned long)
+						sizeof(buftail_opt_size));
+
+				if (!access_ok(user_frame_tail,
+					       sizeof(buftail_opt_size)))
+					return;
+
+				if (__copy_from_user_inatomic
+				   (&buftail_opt_size, user_frame_tail,
+				   sizeof(buftail_opt_size)))
+					return;
+
+				perf_callchain_store(entry, lp);
+				fp = buftail_opt_size.stack_fp;
+
+				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+				       (unsigned long)fp &&
+						!((unsigned long)fp & 0x7) &&
+						fp > sp) {
+					sp = fp;
+					fp = user_backtrace_opt_size(entry, fp);
+				}
+
+			} else {
+				/* this is the function
+				 * without optimize for size
+				 */
+				fp = buftail.stack_fp;
+				perf_callchain_store(entry, lp);
+				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+				       (unsigned long)fp &&
+						!((unsigned long)fp & 0x7) &&
+						fp > sp) {
+					sp = fp;
+					fp = user_backtrace(entry, fp);
+				}
+			}
+		} else {
+			/* this is leaf function */
+			fp = leaf_fp;
+			perf_callchain_store(entry, lp);
+
+			/* previous function callcahin  */
+			while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+			       (unsigned long)fp &&
+				   !((unsigned long)fp & 0x7) && fp > sp) {
+				sp = fp;
+				fp = user_backtrace(entry, fp);
+			}
+		}
+		return;
+	}
+}
+
+/* This will be called when the target is in kernel mode */
+void
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+		      struct pt_regs *regs)
+{
+	struct stackframe fr;
+
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* We don't support guest os callchain now */
+		return;
+	}
+	fr.fp = regs->fp;
+	fr.lp = regs->lp;
+	fr.sp = regs->sp;
+	walk_stackframe(&fr, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	/* However, NDS32 does not support virtualization */
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		return perf_guest_cbs->get_guest_ip();
+
+	return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+
+	/* However, NDS32 does not support virtualization */
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
+	return misc;
+}
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
new file mode 100644
index 0000000..ffa8040
--- /dev/null
+++ b/arch/nds32/kernel/pm.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
+#include <linux/printk.h>
+#include <asm/suspend.h>
+#include <nds32_intrinsic.h>
+
+unsigned int resume_addr;
+unsigned int *phy_addr_sp_tmp;
+
+static void nds32_suspend2ram(void)
+{
+	pgd_t *pgdv;
+	pud_t *pudv;
+	pmd_t *pmdv;
+	pte_t *ptev;
+
+	pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
+		L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
+
+	pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+	pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
+	ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
+
+	resume_addr = ((*ptev) & TLB_DATA_mskPPN)
+			| ((unsigned int)cpu_resume & 0x00000fff);
+
+	suspend2ram();
+}
+
+static void nds32_suspend_cpu(void)
+{
+	while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
+		__asm__ volatile ("standby no_wake_grant\n\t");
+}
+
+static int nds32_pm_valid(suspend_state_t state)
+{
+	switch (state) {
+	case PM_SUSPEND_ON:
+	case PM_SUSPEND_STANDBY:
+	case PM_SUSPEND_MEM:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int nds32_pm_enter(suspend_state_t state)
+{
+	pr_debug("%s:state:%d\n", __func__, state);
+	switch (state) {
+	case PM_SUSPEND_STANDBY:
+		nds32_suspend_cpu();
+		return 0;
+	case PM_SUSPEND_MEM:
+		nds32_suspend2ram();
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct platform_suspend_ops nds32_pm_ops = {
+	.valid = nds32_pm_valid,
+	.enter = nds32_pm_enter,
+};
+
+static int __init nds32_pm_init(void)
+{
+	pr_debug("Enter %s\n", __func__);
+	suspend_set_ops(&nds32_pm_ops);
+	return 0;
+}
+late_initcall(nds32_pm_init);
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index 65fda98..9712fd4 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -9,15 +9,16 @@
 #include <linux/uaccess.h>
 #include <asm/elf.h>
 #include <asm/proc-fns.h>
+#include <asm/fpu.h>
 #include <linux/ptrace.h>
 #include <linux/reboot.h>
 
-extern void setup_mm_for_reboot(char mode);
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_dir_cpu;
-EXPORT_SYMBOL(proc_dir_cpu);
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+struct task_struct *last_task_used_math;
 #endif
 
+extern void setup_mm_for_reboot(char mode);
+
 extern inline void arch_reset(char mode)
 {
 	if (mode == 's') {
@@ -120,20 +121,36 @@
 		regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
 	pr_info("  IRQs o%s  Segment %s\n",
 		interrupts_enabled(regs) ? "n" : "ff",
-		segment_eq(get_fs(), get_ds())? "kernel" : "user");
+		segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user");
 }
 
 EXPORT_SYMBOL(show_regs);
 
+void exit_thread(struct task_struct *tsk)
+{
+#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU)
+	if (last_task_used_math == tsk)
+		last_task_used_math = NULL;
+#endif
+}
+
 void flush_thread(void)
 {
+#if defined(CONFIG_FPU)
+	clear_fpu(task_pt_regs(current));
+	clear_used_math();
+# ifdef CONFIG_LAZY_FPU
+	if (last_task_used_math == current)
+		last_task_used_math = NULL;
+# endif
+#endif
 }
 
 DEFINE_PER_CPU(struct task_struct *, __entry_task);
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p)
+		unsigned long stk_sz, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
@@ -159,6 +176,22 @@
 	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
 	p->thread.cpu_context.sp = (unsigned long)childregs;
 
+#if IS_ENABLED(CONFIG_FPU)
+	if (used_math()) {
+# if !IS_ENABLED(CONFIG_LAZY_FPU)
+		unlazy_fpu(current);
+# else
+		preempt_disable();
+		if (last_task_used_math == current)
+			save_fpu(current);
+		preempt_enable();
+# endif
+		p->thread.fpu = current->thread.fpu;
+		clear_fpu(task_pt_regs(p));
+		set_stopped_child_used_math(p);
+	}
+#endif
+
 #ifdef CONFIG_HWZOL
 	childregs->lb = 0;
 	childregs->le = 0;
@@ -168,12 +201,33 @@
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_FPU)
+struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next)
+{
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+	unlazy_fpu(prev);
+#endif
+	if (!(next->flags & PF_KTHREAD))
+		clear_fpu(task_pt_regs(next));
+	return prev;
+}
+#endif
+
 /*
  * fill in the fpe structure for a core dump...
  */
 int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 {
 	int fpvalid = 0;
+#if IS_ENABLED(CONFIG_FPU)
+	struct task_struct *tsk = current;
+
+	fpvalid = tsk_used_math(tsk);
+	if (fpvalid) {
+		lose_fpu();
+		memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu));
+	}
+#endif
 	return fpvalid;
 }
 
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 63a1a5e..31d29d9 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -2,9 +2,8 @@
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <linux/cpu.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/memblock.h>
+#include <linux/seq_file.h>
 #include <linux/console.h>
 #include <linux/screen_info.h>
 #include <linux/delay.h>
@@ -16,6 +15,7 @@
 #include <asm/proc-fns.h>
 #include <asm/cache_info.h>
 #include <asm/elf.h>
+#include <asm/fpu.h>
 #include <nds32_intrinsic.h>
 
 #define HWCAP_MFUSR_PC		0x000001
@@ -39,8 +39,10 @@
 #define HWCAP_FPU_DP		0x040000
 #define HWCAP_V2		0x080000
 #define HWCAP_DX_REGS		0x100000
+#define HWCAP_HWPRE		0x200000
 
 unsigned long cpu_id, cpu_rev, cpu_cfgid;
+bool has_fpu = false;
 char cpu_series;
 char *endianness = NULL;
 
@@ -71,8 +73,10 @@
 	"div",
 	"mac",
 	"l2c",
-	"dx_regs",
+	"fpu_dp",
 	"v2",
+	"dx_regs",
+	"hw_pre",
 	NULL,
 };
 
@@ -137,6 +141,11 @@
 		    (aliasing_num - 1) << PAGE_SHIFT;
 	}
 #endif
+#ifdef CONFIG_FPU
+	/* Disable fpu and enable when it is used. */
+	if (has_fpu)
+		disable_fpu();
+#endif
 }
 
 static void __init setup_cpuinfo(void)
@@ -181,9 +190,10 @@
 	if (cpu_cfgid & 0x0004)
 		elf_hwcap |= HWCAP_EXT2;
 
-	if (cpu_cfgid & 0x0008)
+	if (cpu_cfgid & 0x0008) {
 		elf_hwcap |= HWCAP_FPU;
-
+		has_fpu = true;
+	}
 	if (cpu_cfgid & 0x0010)
 		elf_hwcap |= HWCAP_STRING;
 
@@ -213,6 +223,11 @@
 	if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C)
 		elf_hwcap |= HWCAP_L2C;
 
+#ifdef CONFIG_HW_PRE
+	if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN)
+		elf_hwcap |= HWCAP_HWPRE;
+#endif
+
 	tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL);
 	if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE))
 		tmp |= CACHE_CTL_mskDC_EN;
diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
index 5d01f6e..330b19f 100644
--- a/arch/nds32/kernel/signal.c
+++ b/arch/nds32/kernel/signal.c
@@ -12,6 +12,7 @@
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <asm/ptrace.h>
 #include <asm/vdso.h>
@@ -20,6 +21,60 @@
 	struct siginfo info;
 	struct ucontext uc;
 };
+#if IS_ENABLED(CONFIG_FPU)
+static inline int restore_sigcontext_fpu(struct pt_regs *regs,
+					 struct sigcontext __user *sc)
+{
+	struct task_struct *tsk = current;
+	unsigned long used_math_flag;
+	int ret = 0;
+
+	clear_used_math();
+	__get_user_error(used_math_flag, &sc->used_math_flag, ret);
+
+	if (!used_math_flag)
+		return 0;
+	set_used_math();
+
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	preempt_disable();
+	if (current == last_task_used_math) {
+		last_task_used_math = NULL;
+		disable_ptreg_fpu(regs);
+	}
+	preempt_enable();
+#else
+	clear_fpu(regs);
+#endif
+
+	return __copy_from_user(&tsk->thread.fpu, &sc->fpu,
+				sizeof(struct fpu_struct));
+}
+
+static inline int setup_sigcontext_fpu(struct pt_regs *regs,
+				       struct sigcontext __user *sc)
+{
+	struct task_struct *tsk = current;
+	int ret = 0;
+
+	__put_user_error(used_math(), &sc->used_math_flag, ret);
+
+	if (!used_math())
+		return ret;
+
+	preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+	if (last_task_used_math == tsk)
+		save_fpu(last_task_used_math);
+#else
+	unlazy_fpu(tsk);
+#endif
+	ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu,
+			     sizeof(struct fpu_struct));
+	preempt_enable();
+	return ret;
+}
+#endif
 
 static int restore_sigframe(struct pt_regs *regs,
 			    struct rt_sigframe __user * sf)
@@ -69,7 +124,9 @@
 	__get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
 	__get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
-
+#if IS_ENABLED(CONFIG_FPU)
+	err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
@@ -94,7 +151,7 @@
 
 	frame = (struct rt_sigframe __user *)regs->sp;
 
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+	if (!access_ok(frame, sizeof(*frame)))
 		goto badframe;
 
 	if (restore_sigframe(regs, frame))
@@ -106,7 +163,7 @@
 	return regs->uregs[0];
 
 badframe:
-	force_sig(SIGSEGV, current);
+	force_sig(SIGSEGV);
 	return 0;
 }
 
@@ -153,6 +210,9 @@
 	__put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
 	__put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
+#if IS_ENABLED(CONFIG_FPU)
+	err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
 
 	__put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
 			 err);
@@ -215,7 +275,7 @@
 	    get_sigframe(ksig, regs, sizeof(*frame));
 	int err = 0;
 
-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+	if (!access_ok(frame, sizeof(*frame)))
 		return -EFAULT;
 
 	__put_user_error(0, &frame->uc.uc_flags, err);
@@ -256,6 +316,7 @@
 				regs->uregs[0] = -EINTR;
 				break;
 			}
+			/* Else, fall through */
 		case -ERESTARTNOINTR:
 			regs->uregs[0] = regs->orig_r0;
 			regs->ipc -= 4;
@@ -300,6 +361,7 @@
 		switch (regs->uregs[0]) {
 		case -ERESTART_RESTARTBLOCK:
 			regs->uregs[15] = __NR_restart_syscall;
+			/* Fall through */
 		case -ERESTARTNOHAND:
 		case -ERESTARTSYS:
 		case -ERESTARTNOINTR:
diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S
new file mode 100644
index 0000000..ca4e61f
--- /dev/null
+++ b/arch/nds32/kernel/sleep.S
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <asm/memory.h>
+
+.data
+.global sp_tmp
+sp_tmp:
+.long
+
+.text
+.globl suspend2ram
+.globl cpu_resume
+
+suspend2ram:
+	pushm   $r0, $r31
+#if defined(CONFIG_HWZOL)
+	mfusr   $r0, $lc
+	mfusr   $r1, $le
+	mfusr   $r2, $lb
+#endif
+	mfsr	$r3, $mr0
+	mfsr    $r4, $mr1
+	mfsr    $r5, $mr4
+	mfsr    $r6, $mr6
+	mfsr    $r7, $mr7
+	mfsr    $r8, $mr8
+	mfsr    $r9, $ir0
+	mfsr    $r10, $ir1
+	mfsr    $r11, $ir2
+	mfsr    $r12, $ir3
+	mfsr    $r13, $ir9
+	mfsr    $r14, $ir10
+	mfsr    $r15, $ir12
+	mfsr    $r16, $ir13
+	mfsr    $r17, $ir14
+	mfsr    $r18, $ir15
+	pushm   $r0, $r19
+#if defined(CONFIG_FPU)
+	jal	store_fpu_for_suspend
+#endif
+	tlbop	FlushAll
+	isb
+
+	// transfer $sp from va to pa
+	sethi	$r0, hi20(PAGE_OFFSET)
+	ori	$r0, $r0, lo12(PAGE_OFFSET)
+	movi	$r2, PHYS_OFFSET
+	sub	$r1, $sp, $r0
+	add	$r2, $r1, $r2
+
+	// store pa($sp) to sp_tmp
+	sethi 	$r1, hi20(sp_tmp)
+	swi	$r2, [$r1 + lo12(sp_tmp)]
+
+	pushm	$r16, $r25
+	pushm	$r29, $r30
+#ifdef	CONFIG_CACHE_L2
+	jal	dcache_wb_all_level
+#else
+	jal	cpu_dcache_wb_all
+#endif
+	popm	$r29, $r30
+	popm	$r16, $r25
+
+	// get wake_mask and loop in standby
+	la	$r1, wake_mask
+	lwi	$r1, [$r1]
+self_loop:
+	standby wake_grant
+	mfsr	$r2, $ir15
+	and	$r2, $r1, $r2
+	beqz	$r2, self_loop
+
+	// set ipc to resume address
+	la	$r1, resume_addr
+	lwi	$r1, [$r1]
+	mtsr	$r1, $ipc
+	isb
+
+	// reset psw, turn off the address translation
+	li      $r2, 0x7000a
+	mtsr    $r2, $ipsw
+	isb
+
+	iret
+cpu_resume:
+	// translate the address of sp_tmp variable to pa
+	la	$r1, sp_tmp
+	sethi   $r0, hi20(PAGE_OFFSET)
+	ori     $r0, $r0, lo12(PAGE_OFFSET)
+	movi    $r2, PHYS_OFFSET
+	sub     $r1, $r1, $r0
+	add     $r1, $r1, $r2
+
+	// access the sp_tmp to get stack pointer
+	lwi	$sp, [$r1]
+
+	popm	$r0, $r19
+#if defined(CONFIG_HWZOL)
+	mtusr   $r0, $lb
+	mtusr   $r1, $lc
+	mtusr   $r2, $le
+#endif
+	mtsr	$r3, $mr0
+	mtsr    $r4, $mr1
+	mtsr    $r5, $mr4
+	mtsr    $r6, $mr6
+	mtsr    $r7, $mr7
+	mtsr    $r8, $mr8
+	// set original psw to ipsw
+	mtsr    $r9, $ir1
+
+	mtsr    $r11, $ir2
+	mtsr    $r12, $ir3
+
+	// set ipc to RR
+	la	$r13, RR
+	mtsr	$r13, $ir9
+
+	mtsr    $r14, $ir10
+	mtsr    $r15, $ir12
+	mtsr    $r16, $ir13
+	mtsr    $r17, $ir14
+	mtsr    $r18, $ir15
+	popm    $r0, $r31
+
+	isb
+	iret
+RR:
+	ret
diff --git a/arch/nds32/kernel/sys_nds32.c b/arch/nds32/kernel/sys_nds32.c
index 9de93ab..cb2d1e2 100644
--- a/arch/nds32/kernel/sys_nds32.c
+++ b/arch/nds32/kernel/sys_nds32.c
@@ -6,6 +6,8 @@
 
 #include <asm/cachectl.h>
 #include <asm/proc-fns.h>
+#include <asm/fpu.h>
+#include <asm/fp_udfiex_crtl.h>
 
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
 	       unsigned long, prot, unsigned long, flags,
@@ -48,3 +50,35 @@
 
 	return 0;
 }
+
+SYSCALL_DEFINE2(fp_udfiex_crtl, unsigned int, cmd, unsigned int, act)
+{
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+	int old_udf_iex;
+
+	if (!used_math()) {
+		load_fpu(&init_fpuregs);
+		current->thread.fpu.UDF_IEX_trap = init_fpuregs.UDF_IEX_trap;
+		set_used_math();
+	}
+
+	old_udf_iex = current->thread.fpu.UDF_IEX_trap;
+	act &= (FPCSR_mskUDFE | FPCSR_mskIEXE);
+
+	switch (cmd) {
+	case DISABLE_UDF_IEX_TRAP:
+		current->thread.fpu.UDF_IEX_trap &= ~act;
+		break;
+	case ENABLE_UDF_IEX_TRAP:
+		current->thread.fpu.UDF_IEX_trap |= act;
+		break;
+	case GET_UDF_IEX_TRAP:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return old_udf_iex;
+#else
+	return -ENOTSUPP;
+#endif
+}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 1496aab..f4d386b 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -12,6 +12,7 @@
 
 #include <asm/proc-fns.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <linux/ptrace.h>
 #include <nds32_intrinsic.h>
@@ -204,7 +205,7 @@
 	}
 
 	force_sig_fault(SIGILL, ILL_ILLTRP,
-			(void __user *)instruction_pointer(regs) - 4, current);
+			(void __user *)instruction_pointer(regs) - 4);
 	die_if_kernel("Oops - bad syscall", regs, n);
 	return regs->uregs[0];
 }
@@ -254,14 +255,15 @@
 	cpu_cache_wbinval_page(base, true);
 }
 
-void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
-		  int error_code, int si_code)
+static void send_sigtrap(struct pt_regs *regs, int error_code, int si_code)
 {
+	struct task_struct *tsk = current;
+
 	tsk->thread.trap_no = ENTRY_DEBUG_RELATED;
 	tsk->thread.error_code = error_code;
 
 	force_sig_fault(SIGTRAP, si_code,
-			(void __user *)instruction_pointer(regs), tsk);
+			(void __user *)instruction_pointer(regs));
 }
 
 void do_debug_trap(unsigned long entry, unsigned long addr,
@@ -273,7 +275,7 @@
 
 	if (user_mode(regs)) {
 		/* trap_signal */
-		send_sigtrap(current, regs, 0, TRAP_BRKPT);
+		send_sigtrap(regs, 0, TRAP_BRKPT);
 	} else {
 		/* kernel_trap */
 		if (!fixup_exception(regs))
@@ -287,7 +289,7 @@
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGKILL);
-	force_sig(SIGKILL, current);
+	force_sig(SIGKILL);
 }
 
 void unhandled_exceptions(unsigned long entry, unsigned long addr,
@@ -298,7 +300,7 @@
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGKILL);
-	force_sig(SIGKILL, current);
+	force_sig(SIGKILL);
 }
 
 extern int do_page_fault(unsigned long entry, unsigned long addr,
@@ -325,7 +327,7 @@
 	show_regs(regs);
 	if (!user_mode(regs))
 		do_exit(SIGILL);
-	force_sig(SIGILL, current);
+	force_sig(SIGILL);
 }
 
 #ifdef CONFIG_ALIGNMENT_TRAP
@@ -357,6 +359,21 @@
 	} else if (type == ETYPE_RESERVED_INSTRUCTION) {
 		/* Reserved instruction */
 		do_revinsn(regs);
+	} else if (type == ETYPE_COPROCESSOR) {
+		/* Coprocessor */
+#if IS_ENABLED(CONFIG_FPU)
+		unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST);
+		unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID);
+
+		if ((cpid == FPU_CPID) &&
+		    (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) {
+			unsigned int subtype = (itype & ITYPE_mskSTYPE);
+
+			if (true == do_fpu_exception(subtype, regs))
+				return;
+		}
+#endif
+		unhandled_exceptions(entry, addr, type, regs);
 	} else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) {
 		/* trap, used on v3 EDM target debugging workaround */
 		/*
diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c
index 016f158..90bcae6 100644
--- a/arch/nds32/kernel/vdso.c
+++ b/arch/nds32/kernel/vdso.c
@@ -220,6 +220,7 @@
 	vdso_data->xtime_coarse_sec = tk->xtime_sec;
 	vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >>
 	    tk->tkr_mono.shift;
+	vdso_data->hrtimer_res = hrtimer_resolution;
 	vdso_write_end(vdso_data);
 }
 
diff --git a/arch/nds32/kernel/vdso/.gitignore b/arch/nds32/kernel/vdso/.gitignore
new file mode 100644
index 0000000..f8b69d8
--- /dev/null
+++ b/arch/nds32/kernel/vdso/.gitignore
@@ -0,0 +1 @@
+vdso.lds
diff --git a/arch/nds32/kernel/vdso/Makefile b/arch/nds32/kernel/vdso/Makefile
index e6c50a7..7c3c1cc 100644
--- a/arch/nds32/kernel/vdso/Makefile
+++ b/arch/nds32/kernel/vdso/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 #
 # Building a vDSO image for AArch64.
 #
@@ -11,10 +12,8 @@
 targets := $(obj-vdso) vdso.so vdso.so.dbg
 obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
 
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
-		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-ccflags-y += -fPIC -Wl,-shared -g
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib -fPIC -Wl,-shared -g \
+	-Wl,-soname=linux-vdso.so.1 -Wl,--hash-style=sysv
 
 # Disable gcov profiling for VDSO code
 GCOV_PROFILE := n
@@ -28,7 +27,7 @@
 $(obj)/vdso.o : $(obj)/vdso.so
 
 # Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
 	$(call if_changed,vdsold)
 
 
@@ -40,9 +39,7 @@
 # Generate VDSO offsets using helper script
 gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
 quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
-	$(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-endef
+      cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
 
 include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
 	$(call if_changed,vdsosym)
@@ -65,7 +62,7 @@
 
 # Actual build commands
 quiet_cmd_vdsold = VDSOL   $@
-      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
+      cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $(real-prereqs) -o $@
 quiet_cmd_vdsoas = VDSOA   $@
       cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
 quiet_cmd_vdsocc = VDSOA   $@
diff --git a/arch/nds32/kernel/vdso/gettimeofday.c b/arch/nds32/kernel/vdso/gettimeofday.c
index 038721a..b025818 100644
--- a/arch/nds32/kernel/vdso/gettimeofday.c
+++ b/arch/nds32/kernel/vdso/gettimeofday.c
@@ -208,6 +208,8 @@
 
 notrace int __vdso_clock_getres(clockid_t clk_id, struct timespec *res)
 {
+	struct vdso_data *vdata = __get_datapage();
+
 	if (res == NULL)
 		return 0;
 	switch (clk_id) {
@@ -215,7 +217,7 @@
 	case CLOCK_MONOTONIC:
 	case CLOCK_MONOTONIC_RAW:
 		res->tv_sec = 0;
-		res->tv_nsec = CLOCK_REALTIME_RES;
+		res->tv_nsec = vdata->hrtimer_res;
 		break;
 	case CLOCK_REALTIME_COARSE:
 	case CLOCK_MONOTONIC_COARSE:
diff --git a/arch/nds32/lib/Makefile b/arch/nds32/lib/Makefile
index 0f98401..dddbc15 100644
--- a/arch/nds32/lib/Makefile
+++ b/arch/nds32/lib/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
 lib-y		:= copy_page.o memcpy.o memmove.o   \
 		   memset.o memzero.o \
 		   copy_from_user.o copy_to_user.o clear_user.o
diff --git a/arch/nds32/math-emu/Makefile b/arch/nds32/math-emu/Makefile
new file mode 100644
index 0000000..3bed7e5
--- /dev/null
+++ b/arch/nds32/math-emu/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Linux/nds32 kernel FPU emulation.
+#
+
+obj-y	:= fpuemu.o \
+	   fdivd.o fmuld.o fsubd.o faddd.o fs2d.o fsqrtd.o fcmpd.o fnegs.o \
+	   fd2si.o fd2ui.o fd2siz.o fd2uiz.o fsi2d.o fui2d.o \
+	   fdivs.o fmuls.o fsubs.o fadds.o fd2s.o fsqrts.o fcmps.o fnegd.o \
+	   fs2si.o fs2ui.o fs2siz.o fs2uiz.o fsi2s.o fui2s.o
diff --git a/arch/nds32/math-emu/faddd.c b/arch/nds32/math-emu/faddd.c
new file mode 100644
index 0000000..f7fd4e3
--- /dev/null
+++ b/arch/nds32/math-emu/faddd.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void faddd(void *ft, void *fa, void *fb)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+	FP_UNPACK_DP(B, fb);
+
+	FP_ADD_D(R, A, B);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fadds.c b/arch/nds32/math-emu/fadds.c
new file mode 100644
index 0000000..f5af6ca
--- /dev/null
+++ b/arch/nds32/math-emu/fadds.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fadds(void *ft, void *fa, void *fb)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(B);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+	FP_UNPACK_SP(B, fb);
+
+	FP_ADD_S(R, A, B);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fcmpd.c b/arch/nds32/math-emu/fcmpd.c
new file mode 100644
index 0000000..0ea225a
--- /dev/null
+++ b/arch/nds32/math-emu/fcmpd.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+int fcmpd(void *ft, void *fa, void *fb, int cmpop)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_EX;
+	long cmp;
+
+	FP_UNPACK_DP(A, fa);
+	FP_UNPACK_DP(B, fb);
+
+	FP_CMP_D(cmp, A, B, SF_CUN);
+	cmp += 2;
+	if (cmp == SF_CGT)
+		*(long *)ft = 0;
+	else
+		*(long *)ft = (cmp & cmpop) ? 1 : 0;
+
+	return 0;
+}
diff --git a/arch/nds32/math-emu/fcmps.c b/arch/nds32/math-emu/fcmps.c
new file mode 100644
index 0000000..6814807
--- /dev/null
+++ b/arch/nds32/math-emu/fcmps.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+int fcmps(void *ft, void *fa, void *fb, int cmpop)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(B);
+	FP_DECL_EX;
+	long cmp;
+
+	FP_UNPACK_SP(A, fa);
+	FP_UNPACK_SP(B, fb);
+
+	FP_CMP_S(cmp, A, B, SF_CUN);
+	cmp += 2;
+	if (cmp == SF_CGT)
+		*(int *)ft = 0x0;
+	else
+		*(int *)ft = (cmp & cmpop) ? 0x1 : 0x0;
+
+	return 0;
+}
diff --git a/arch/nds32/math-emu/fd2s.c b/arch/nds32/math-emu/fd2s.c
new file mode 100644
index 0000000..1328371
--- /dev/null
+++ b/arch/nds32/math-emu/fd2s.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+#include <math-emu/soft-fp.h>
+void fd2s(void *ft, void *fa)
+{
+	FP_DECL_D(A);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	FP_CONV(S, D, 1, 2, R, A);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fd2si.c b/arch/nds32/math-emu/fd2si.c
new file mode 100644
index 0000000..fae3e16
--- /dev/null
+++ b/arch/nds32/math-emu/fd2si.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fd2si(void *ft, void *fa)
+{
+	int r;
+
+	FP_DECL_D(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_ROUND_D(r, A, 32, 1);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(int *)ft = r;
+	}
+
+}
diff --git a/arch/nds32/math-emu/fd2siz.c b/arch/nds32/math-emu/fd2siz.c
new file mode 100644
index 0000000..92fe677
--- /dev/null
+++ b/arch/nds32/math-emu/fd2siz.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fd2si_z(void *ft, void *fa)
+{
+	int r;
+
+	FP_DECL_D(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_D(r, A, 32, 1);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(int *)ft = r;
+	}
+
+}
diff --git a/arch/nds32/math-emu/fd2ui.c b/arch/nds32/math-emu/fd2ui.c
new file mode 100644
index 0000000..a0423b6
--- /dev/null
+++ b/arch/nds32/math-emu/fd2ui.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fd2ui(void *ft, void *fa)
+{
+	unsigned int r;
+
+	FP_DECL_D(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(unsigned int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_ROUND_D(r, A, 32, 0);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(unsigned int *)ft = r;
+	}
+
+}
diff --git a/arch/nds32/math-emu/fd2uiz.c b/arch/nds32/math-emu/fd2uiz.c
new file mode 100644
index 0000000..8ae17cf
--- /dev/null
+++ b/arch/nds32/math-emu/fd2uiz.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fd2ui_z(void *ft, void *fa)
+{
+	unsigned int r;
+
+	FP_DECL_D(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(unsigned int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_D(r, A, 32, 0);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(unsigned int *)ft = r;
+	}
+
+}
diff --git a/arch/nds32/math-emu/fdivd.c b/arch/nds32/math-emu/fdivd.c
new file mode 100644
index 0000000..458e7e9
--- /dev/null
+++ b/arch/nds32/math-emu/fdivd.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fdivd(void *ft, void *fa, void *fb)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+	FP_UNPACK_DP(B, fb);
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
+		FP_SET_EXCEPTION(FP_EX_DIVZERO);
+
+	FP_DIV_D(R, A, B);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fdivs.c b/arch/nds32/math-emu/fdivs.c
new file mode 100644
index 0000000..c7d2021
--- /dev/null
+++ b/arch/nds32/math-emu/fdivs.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fdivs(void *ft, void *fa, void *fb)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(B);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+	FP_UNPACK_SP(B, fb);
+
+	if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO)
+		FP_SET_EXCEPTION(FP_EX_DIVZERO);
+
+	FP_DIV_S(R, A, B);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fmuld.c b/arch/nds32/math-emu/fmuld.c
new file mode 100644
index 0000000..f3c77a4
--- /dev/null
+++ b/arch/nds32/math-emu/fmuld.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fmuld(void *ft, void *fa, void *fb)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+	FP_UNPACK_DP(B, fb);
+
+	FP_MUL_D(R, A, B);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fmuls.c b/arch/nds32/math-emu/fmuls.c
new file mode 100644
index 0000000..cf150df
--- /dev/null
+++ b/arch/nds32/math-emu/fmuls.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fmuls(void *ft, void *fa, void *fb)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(B);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+	FP_UNPACK_SP(B, fb);
+
+	FP_MUL_S(R, A, B);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fnegd.c b/arch/nds32/math-emu/fnegd.c
new file mode 100644
index 0000000..de7ea6a
--- /dev/null
+++ b/arch/nds32/math-emu/fnegd.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fnegd(void *ft, void *fa)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	FP_NEG_D(R, A);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fnegs.c b/arch/nds32/math-emu/fnegs.c
new file mode 100644
index 0000000..07270b3
--- /dev/null
+++ b/arch/nds32/math-emu/fnegs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fnegs(void *ft, void *fa)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	FP_NEG_S(R, A);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fpuemu.c b/arch/nds32/math-emu/fpuemu.c
new file mode 100644
index 0000000..46558a1
--- /dev/null
+++ b/arch/nds32/math-emu/fpuemu.c
@@ -0,0 +1,406 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <asm/bitfield.h>
+#include <asm/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <asm/fpuemu.h>
+#include <asm/nds32_fpu_inst.h>
+
+#define DPFROMREG(dp, x) (dp = (void *)((unsigned long *)fpu_reg + 2*x))
+#ifdef __NDS32_EL__
+#define SPFROMREG(sp, x)\
+	((sp) = (void *)((unsigned long *)fpu_reg + (x^1)))
+#else
+#define SPFROMREG(sp, x) ((sp) = (void *)((unsigned long *)fpu_reg + x))
+#endif
+
+#define DEF3OP(name, p, f1, f2) \
+void fpemu_##name##p(void *ft, void *fa, void *fb) \
+{ \
+	f1(fa, fa, fb); \
+	f2(ft, ft, fa); \
+}
+
+#define DEF3OPNEG(name, p, f1, f2, f3) \
+void fpemu_##name##p(void *ft, void *fa, void *fb) \
+{ \
+	f1(fa, fa, fb); \
+	f2(ft, ft, fa); \
+	f3(ft, ft); \
+}
+DEF3OP(fmadd, s, fmuls, fadds);
+DEF3OP(fmsub, s, fmuls, fsubs);
+DEF3OP(fmadd, d, fmuld, faddd);
+DEF3OP(fmsub, d, fmuld, fsubd);
+DEF3OPNEG(fnmadd, s, fmuls, fadds, fnegs);
+DEF3OPNEG(fnmsub, s, fmuls, fsubs, fnegs);
+DEF3OPNEG(fnmadd, d, fmuld, faddd, fnegd);
+DEF3OPNEG(fnmsub, d, fmuld, fsubd, fnegd);
+
+static const unsigned char cmptab[8] = {
+	SF_CEQ,
+	SF_CEQ,
+	SF_CLT,
+	SF_CLT,
+	SF_CLT | SF_CEQ,
+	SF_CLT | SF_CEQ,
+	SF_CUN,
+	SF_CUN
+};
+
+enum ARGTYPE {
+	S1S = 1,
+	S2S,
+	S1D,
+	CS,
+	D1D,
+	D2D,
+	D1S,
+	CD
+};
+union func_t {
+	void (*t)(void *ft, void *fa, void *fb);
+	void (*b)(void *ft, void *fa);
+};
+/*
+ * Emulate a single FPU arithmetic instruction.
+ */
+static int fpu_emu(struct fpu_struct *fpu_reg, unsigned long insn)
+{
+	int rfmt;		/* resulting format */
+	union func_t func;
+	int ftype = 0;
+
+	switch (rfmt = NDS32Insn_OPCODE_COP0(insn)) {
+	case fs1_op:{
+			switch (NDS32Insn_OPCODE_BIT69(insn)) {
+			case fadds_op:
+				func.t = fadds;
+				ftype = S2S;
+				break;
+			case fsubs_op:
+				func.t = fsubs;
+				ftype = S2S;
+				break;
+			case fmadds_op:
+				func.t = fpemu_fmadds;
+				ftype = S2S;
+				break;
+			case fmsubs_op:
+				func.t = fpemu_fmsubs;
+				ftype = S2S;
+				break;
+			case fnmadds_op:
+				func.t = fpemu_fnmadds;
+				ftype = S2S;
+				break;
+			case fnmsubs_op:
+				func.t = fpemu_fnmsubs;
+				ftype = S2S;
+				break;
+			case fmuls_op:
+				func.t = fmuls;
+				ftype = S2S;
+				break;
+			case fdivs_op:
+				func.t = fdivs;
+				ftype = S2S;
+				break;
+			case fs1_f2op_op:
+				switch (NDS32Insn_OPCODE_BIT1014(insn)) {
+				case fs2d_op:
+					func.b = fs2d;
+					ftype = S1D;
+					break;
+				case fs2si_op:
+					func.b = fs2si;
+					ftype = S1S;
+					break;
+				case fs2si_z_op:
+					func.b = fs2si_z;
+					ftype = S1S;
+					break;
+				case fs2ui_op:
+					func.b = fs2ui;
+					ftype = S1S;
+					break;
+				case fs2ui_z_op:
+					func.b = fs2ui_z;
+					ftype = S1S;
+					break;
+				case fsi2s_op:
+					func.b = fsi2s;
+					ftype = S1S;
+					break;
+				case fui2s_op:
+					func.b = fui2s;
+					ftype = S1S;
+					break;
+				case fsqrts_op:
+					func.b = fsqrts;
+					ftype = S1S;
+					break;
+				default:
+					return SIGILL;
+				}
+				break;
+			default:
+				return SIGILL;
+			}
+			break;
+		}
+	case fs2_op:
+		switch (NDS32Insn_OPCODE_BIT69(insn)) {
+		case fcmpeqs_op:
+		case fcmpeqs_e_op:
+		case fcmplts_op:
+		case fcmplts_e_op:
+		case fcmples_op:
+		case fcmples_e_op:
+		case fcmpuns_op:
+		case fcmpuns_e_op:
+			ftype = CS;
+			break;
+		default:
+			return SIGILL;
+		}
+		break;
+	case fd1_op:{
+			switch (NDS32Insn_OPCODE_BIT69(insn)) {
+			case faddd_op:
+				func.t = faddd;
+				ftype = D2D;
+				break;
+			case fsubd_op:
+				func.t = fsubd;
+				ftype = D2D;
+				break;
+			case fmaddd_op:
+				func.t = fpemu_fmaddd;
+				ftype = D2D;
+				break;
+			case fmsubd_op:
+				func.t = fpemu_fmsubd;
+				ftype = D2D;
+				break;
+			case fnmaddd_op:
+				func.t = fpemu_fnmaddd;
+				ftype = D2D;
+				break;
+			case fnmsubd_op:
+				func.t = fpemu_fnmsubd;
+				ftype = D2D;
+				break;
+			case fmuld_op:
+				func.t = fmuld;
+				ftype = D2D;
+				break;
+			case fdivd_op:
+				func.t = fdivd;
+				ftype = D2D;
+				break;
+			case fd1_f2op_op:
+				switch (NDS32Insn_OPCODE_BIT1014(insn)) {
+				case fd2s_op:
+					func.b = fd2s;
+					ftype = D1S;
+					break;
+				case fd2si_op:
+					func.b = fd2si;
+					ftype = D1S;
+					break;
+				case fd2si_z_op:
+					func.b = fd2si_z;
+					ftype = D1S;
+					break;
+				case fd2ui_op:
+					func.b = fd2ui;
+					ftype = D1S;
+					break;
+				case fd2ui_z_op:
+					func.b = fd2ui_z;
+					ftype = D1S;
+					break;
+				case fsi2d_op:
+					func.b = fsi2d;
+					ftype = D1S;
+					break;
+				case fui2d_op:
+					func.b = fui2d;
+					ftype = D1S;
+					break;
+				case fsqrtd_op:
+					func.b = fsqrtd;
+					ftype = D1D;
+					break;
+				default:
+					return SIGILL;
+				}
+				break;
+			default:
+				return SIGILL;
+
+			}
+			break;
+		}
+
+	case fd2_op:
+		switch (NDS32Insn_OPCODE_BIT69(insn)) {
+		case fcmpeqd_op:
+		case fcmpeqd_e_op:
+		case fcmpltd_op:
+		case fcmpltd_e_op:
+		case fcmpled_op:
+		case fcmpled_e_op:
+		case fcmpund_op:
+		case fcmpund_e_op:
+			ftype = CD;
+			break;
+		default:
+			return SIGILL;
+		}
+		break;
+
+	default:
+		return SIGILL;
+	}
+
+	switch (ftype) {
+	case S1S:{
+			void *ft, *fa;
+
+			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			func.b(ft, fa);
+			break;
+		}
+	case S2S:{
+			void *ft, *fa, *fb;
+
+			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+			func.t(ft, fa, fb);
+			break;
+		}
+	case S1D:{
+			void *ft, *fa;
+
+			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			func.b(ft, fa);
+			break;
+		}
+	case CS:{
+			unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
+			void *ft, *fa, *fb;
+
+			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			SPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			SPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+			if (cmpop < 0x8) {
+				cmpop = cmptab[cmpop];
+				fcmps(ft, fa, fb, cmpop);
+			} else
+				return SIGILL;
+			break;
+		}
+	case D1D:{
+			void *ft, *fa;
+
+			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			func.b(ft, fa);
+			break;
+		}
+	case D2D:{
+			void *ft, *fa, *fb;
+
+			DPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+			func.t(ft, fa, fb);
+			break;
+		}
+	case D1S:{
+			void *ft, *fa;
+
+			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			func.b(ft, fa);
+			break;
+		}
+	case CD:{
+			unsigned int cmpop = NDS32Insn_OPCODE_BIT69(insn);
+			void *ft, *fa, *fb;
+
+			SPFROMREG(ft, NDS32Insn_OPCODE_Rt(insn));
+			DPFROMREG(fa, NDS32Insn_OPCODE_Ra(insn));
+			DPFROMREG(fb, NDS32Insn_OPCODE_Rb(insn));
+			if (cmpop < 0x8) {
+				cmpop = cmptab[cmpop];
+				fcmpd(ft, fa, fb, cmpop);
+			} else
+				return SIGILL;
+			break;
+		}
+	default:
+		return SIGILL;
+	}
+
+	/*
+	 * If an exception is required, generate a tidy SIGFPE exception.
+	 */
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+	if (((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE_NO_UDF_IEXE)
+	    || ((fpu_reg->fpcsr << 5) & (fpu_reg->UDF_IEX_trap))) {
+#else
+	if ((fpu_reg->fpcsr << 5) & fpu_reg->fpcsr & FPCSR_mskALLE) {
+#endif
+		return SIGFPE;
+	}
+	return 0;
+}
+
+int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu)
+{
+	unsigned long insn = 0, addr = regs->ipc;
+	unsigned long emulpc, contpc;
+	unsigned char *pc = (void *)&insn;
+	char c;
+	int i = 0, ret;
+
+	for (i = 0; i < 4; i++) {
+		if (__get_user(c, (unsigned char *)addr++))
+			return SIGBUS;
+		*pc++ = c;
+	}
+
+	insn = be32_to_cpu(insn);
+
+	emulpc = regs->ipc;
+	contpc = regs->ipc + 4;
+
+	if (NDS32Insn_OPCODE(insn) != cop0_op)
+		return SIGILL;
+
+	switch (NDS32Insn_OPCODE_COP0(insn)) {
+	case fs1_op:
+	case fs2_op:
+	case fd1_op:
+	case fd2_op:
+		{
+			/* a real fpu computation instruction */
+			ret = fpu_emu(fpu, insn);
+			if (!ret)
+				regs->ipc = contpc;
+		}
+		break;
+
+	default:
+		return SIGILL;
+	}
+
+	return ret;
+}
diff --git a/arch/nds32/math-emu/fs2d.c b/arch/nds32/math-emu/fs2d.c
new file mode 100644
index 0000000..0e8db90
--- /dev/null
+++ b/arch/nds32/math-emu/fs2d.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/double.h>
+#include <math-emu/single.h>
+#include <math-emu/soft-fp.h>
+
+void fs2d(void *ft, void *fa)
+{
+	FP_DECL_S(A);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	FP_CONV(D, S, 2, 1, R, A);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fs2si.c b/arch/nds32/math-emu/fs2si.c
new file mode 100644
index 0000000..b4931d6
--- /dev/null
+++ b/arch/nds32/math-emu/fs2si.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fs2si(void *ft, void *fa)
+{
+	int r;
+
+	FP_DECL_S(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_ROUND_S(r, A, 32, 1);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(int *)ft = r;
+	}
+}
diff --git a/arch/nds32/math-emu/fs2siz.c b/arch/nds32/math-emu/fs2siz.c
new file mode 100644
index 0000000..1c2b99c
--- /dev/null
+++ b/arch/nds32/math-emu/fs2siz.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fs2si_z(void *ft, void *fa)
+{
+	int r;
+
+	FP_DECL_S(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(int *)ft = (A_s == 0) ? 0x7fffffff : 0x80000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_S(r, A, 32, 1);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(int *)ft = r;
+	}
+}
diff --git a/arch/nds32/math-emu/fs2ui.c b/arch/nds32/math-emu/fs2ui.c
new file mode 100644
index 0000000..c337f03
--- /dev/null
+++ b/arch/nds32/math-emu/fs2ui.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fs2ui(void *ft, void *fa)
+{
+	unsigned int r;
+
+	FP_DECL_S(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(unsigned int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_ROUND_S(r, A, 32, 0);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(unsigned int *)ft = r;
+	}
+}
diff --git a/arch/nds32/math-emu/fs2uiz.c b/arch/nds32/math-emu/fs2uiz.c
new file mode 100644
index 0000000..22c5e47
--- /dev/null
+++ b/arch/nds32/math-emu/fs2uiz.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fs2ui_z(void *ft, void *fa)
+{
+	unsigned int r;
+
+	FP_DECL_S(A);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	if (A_c == FP_CLS_INF) {
+		*(unsigned int *)ft = (A_s == 0) ? 0xffffffff : 0x00000000;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else if (A_c == FP_CLS_NAN) {
+		*(unsigned int *)ft = 0xffffffff;
+		__FPU_FPCSR |= FP_EX_INVALID;
+	} else {
+		FP_TO_INT_S(r, A, 32, 0);
+		__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+		*(unsigned int *)ft = r;
+	}
+
+}
diff --git a/arch/nds32/math-emu/fsi2d.c b/arch/nds32/math-emu/fsi2d.c
new file mode 100644
index 0000000..6b04cec
--- /dev/null
+++ b/arch/nds32/math-emu/fsi2d.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fsi2d(void *ft, void *fa)
+{
+	int a = *(int *)fa;
+
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_FROM_INT_D(R, a, 32, int);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fsi2s.c b/arch/nds32/math-emu/fsi2s.c
new file mode 100644
index 0000000..689864a
--- /dev/null
+++ b/arch/nds32/math-emu/fsi2s.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fsi2s(void *ft, void *fa)
+{
+	int a = *(int *)fa;
+
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_FROM_INT_S(R, a, 32, int);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fsqrtd.c b/arch/nds32/math-emu/fsqrtd.c
new file mode 100644
index 0000000..c3a8dbd
--- /dev/null
+++ b/arch/nds32/math-emu/fsqrtd.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fsqrtd(void *ft, void *fa)
+{
+	FP_DECL_D(A);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+
+	FP_SQRT_D(R, A);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsqrts.c b/arch/nds32/math-emu/fsqrts.c
new file mode 100644
index 0000000..4c6f94b
--- /dev/null
+++ b/arch/nds32/math-emu/fsqrts.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/uaccess.h>
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fsqrts(void *ft, void *fa)
+{
+	FP_DECL_S(A);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+
+	FP_SQRT_S(R, A);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsubd.c b/arch/nds32/math-emu/fsubd.c
new file mode 100644
index 0000000..81b6a0d
--- /dev/null
+++ b/arch/nds32/math-emu/fsubd.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+void fsubd(void *ft, void *fa, void *fb)
+{
+
+	FP_DECL_D(A);
+	FP_DECL_D(B);
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_DP(A, fa);
+	FP_UNPACK_DP(B, fb);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	FP_ADD_D(R, A, B);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fsubs.c b/arch/nds32/math-emu/fsubs.c
new file mode 100644
index 0000000..61ddd97
--- /dev/null
+++ b/arch/nds32/math-emu/fsubs.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+void fsubs(void *ft, void *fa, void *fb)
+{
+
+	FP_DECL_S(A);
+	FP_DECL_S(B);
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_UNPACK_SP(A, fa);
+	FP_UNPACK_SP(B, fb);
+
+	if (B_c != FP_CLS_NAN)
+		B_s ^= 1;
+
+	FP_ADD_S(R, A, B);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+}
diff --git a/arch/nds32/math-emu/fui2d.c b/arch/nds32/math-emu/fui2d.c
new file mode 100644
index 0000000..9689d33
--- /dev/null
+++ b/arch/nds32/math-emu/fui2d.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/double.h>
+
+void fui2d(void *ft, void *fa)
+{
+	unsigned int a = *(unsigned int *)fa;
+
+	FP_DECL_D(R);
+	FP_DECL_EX;
+
+	FP_FROM_INT_D(R, a, 32, int);
+
+	FP_PACK_DP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/math-emu/fui2s.c b/arch/nds32/math-emu/fui2s.c
new file mode 100644
index 0000000..f70f076
--- /dev/null
+++ b/arch/nds32/math-emu/fui2s.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2019 Andes Technology Corporation
+#include <linux/uaccess.h>
+
+#include <asm/sfp-machine.h>
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+
+void fui2s(void *ft, void *fa)
+{
+	unsigned int a = *(unsigned int *)fa;
+
+	FP_DECL_S(R);
+	FP_DECL_EX;
+
+	FP_FROM_INT_S(R, a, 32, int);
+
+	FP_PACK_SP(ft, R);
+
+	__FPU_FPCSR |= FP_CUR_EXCEPTIONS;
+
+}
diff --git a/arch/nds32/mm/Makefile b/arch/nds32/mm/Makefile
index 6b68558..bd360e4 100644
--- a/arch/nds32/mm/Makefile
+++ b/arch/nds32/mm/Makefile
@@ -1,7 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
 obj-y				:= extable.o tlb.o \
 				   fault.o init.o ioremap.o mmap.o \
                                    mm-nds32.o cacheflush.o proc.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
 obj-$(CONFIG_HIGHMEM)           += highmem.o
-CFLAGS_proc-n13.o		+= -fomit-frame-pointer
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_proc.o     = $(CC_FLAGS_FTRACE)
+endif
+CFLAGS_proc.o              += -fomit-frame-pointer
diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c
index e1aed9d..c8b9061 100644
--- a/arch/nds32/mm/alignment.c
+++ b/arch/nds32/mm/alignment.c
@@ -289,13 +289,13 @@
 		unaligned_addr += shift;
 
 	if (load) {
-		if (!access_ok(VERIFY_READ, (void *)unaligned_addr, len))
+		if (!access_ok((void *)unaligned_addr, len))
 			return -EACCES;
 
 		get_data(unaligned_addr, &target_val, len);
 		*idx_to_addr(regs, target_idx) = target_val;
 	} else {
-		if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
+		if (!access_ok((void *)unaligned_addr, len))
 			return -EACCES;
 		target_val = *idx_to_addr(regs, target_idx);
 		set_data((void *)unaligned_addr, target_val, len);
@@ -479,7 +479,7 @@
 
 	if (load) {
 
-		if (!access_ok(VERIFY_READ, (void *)unaligned_addr, len))
+		if (!access_ok((void *)unaligned_addr, len))
 			return -EACCES;
 
 		get_data(unaligned_addr, &target_val, len);
@@ -491,7 +491,7 @@
 			*idx_to_addr(regs, RT(inst)) = target_val;
 	} else {
 
-		if (!access_ok(VERIFY_WRITE, (void *)unaligned_addr, len))
+		if (!access_ok((void *)unaligned_addr, len))
 			return -EACCES;
 
 		target_val = *idx_to_addr(regs, RT(inst));
diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c
index b740534..064ae5d 100644
--- a/arch/nds32/mm/fault.c
+++ b/arch/nds32/mm/fault.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/hardirq.h>
 #include <linux/uaccess.h>
+#include <linux/perf_event.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -169,8 +170,6 @@
 			mask = VM_EXEC;
 		else {
 			mask = VM_READ | VM_WRITE;
-			if (vma->vm_flags & VM_WRITE)
-				flags |= FAULT_FLAG_WRITE;
 		}
 	} else if (entry == ENTRY_TLB_MISC) {
 		switch (error_code & ITYPE_mskETYPE) {
@@ -231,11 +230,17 @@
 	 * attempt. If we go through a retry, it is extremely likely that the
 	 * page will be found in page cache at that point.
 	 */
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 	if (flags & FAULT_FLAG_ALLOW_RETRY) {
-		if (fault & VM_FAULT_MAJOR)
+		if (fault & VM_FAULT_MAJOR) {
 			tsk->maj_flt++;
-		else
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
+				      1, regs, addr);
+		} else {
 			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
+				      1, regs, addr);
+		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
 			flags |= FAULT_FLAG_TRIED;
@@ -266,7 +271,7 @@
 		tsk->thread.address = addr;
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_no = entry;
-		force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
+		force_sig_fault(SIGSEGV, si_code, (void __user *)addr);
 		return;
 	}
 
@@ -335,7 +340,7 @@
 	tsk->thread.address = addr;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = entry;
-	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
+	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr);
 
 	return;
 
diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c
index e17cb8a..022779a 100644
--- a/arch/nds32/mm/highmem.c
+++ b/arch/nds32/mm/highmem.c
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index c713d2a..55703b0 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -7,12 +7,11 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
-#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/setup.h>
@@ -22,8 +21,6 @@
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 DEFINE_SPINLOCK(anon_alias_lock);
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
-extern unsigned long phys_initrd_start;
-extern unsigned long phys_initrd_size;
 
 /*
  * empty_zero_page is a special page that is used for
@@ -81,8 +78,10 @@
 		}
 
 		/* Alloc one page for holding PTE's... */
-		pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-		memset(pte, 0, PAGE_SIZE);
+		pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+		if (!pte)
+			panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+			      __func__, PAGE_SIZE, PAGE_SIZE);
 		set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 
 		/* Fill the newly allocated page with PTE'S */
@@ -114,8 +113,10 @@
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	fixmap_pmd_p = (pmd_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-	memset(fixmap_pmd_p, 0, PAGE_SIZE);
+	fixmap_pmd_p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!fixmap_pmd_p)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE));
 
 #ifdef CONFIG_HIGHMEM
@@ -127,8 +128,10 @@
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-	memset(pte, 0, PAGE_SIZE);
+	pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!pte)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 	pkmap_page_table = pte;
 #endif /* CONFIG_HIGHMEM */
@@ -153,8 +156,10 @@
 	fixedrange_init();
 
 	/* allocate space for empty_zero_page */
-	zero_page = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-	memset(zero_page, 0, PAGE_SIZE);
+	zero_page = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!zero_page)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
 	zone_sizes_init();
 
 	empty_zero_page = virt_to_page(zero_page);
@@ -192,7 +197,7 @@
 	free_highmem();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	pr_info("virtual kernel memory layout:\n"
@@ -247,18 +252,6 @@
 	return;
 }
 
-void free_initmem(void)
-{
-	free_initmem_default(-1);
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void free_initrd_mem(unsigned long start, unsigned long end)
-{
-	free_reserved_area((void *)start, (void *)end, -1, "initrd");
-}
-#endif
-
 void __set_fixmap(enum fixed_addresses idx,
 			       phys_addr_t phys, pgprot_t flags)
 {
@@ -267,7 +260,7 @@
 
 	BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-	pte = (pte_t *)&fixmap_pmd_p[pte_index(addr)];;
+	pte = (pte_t *)&fixmap_pmd_p[pte_index(addr)];
 
 	if (pgprot_val(flags)) {
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));