v4.19.13 snapshot.
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
new file mode 100644
index 0000000..b169e58
--- /dev/null
+++ b/arch/arm/mm/Kconfig
@@ -0,0 +1,1119 @@
+# SPDX-License-Identifier: GPL-2.0
+comment "Processor Type"
+
+# Select CPU types depending on the architecture selected.  This selects
+# which CPUs we support in the kernel image, and the compiler instruction
+# optimiser behaviour.
+
+# ARM7TDMI
+config CPU_ARM7TDMI
+	bool
+	depends on !MMU
+	select CPU_32v4T
+	select CPU_ABRT_LV4T
+	select CPU_CACHE_V4
+	select CPU_PABRT_LEGACY
+	help
+	  A 32-bit RISC microprocessor based on the ARM7 processor core
+	  which has no memory control unit and cache.
+
+	  Say Y if you want support for the ARM7TDMI processor.
+	  Otherwise, say N.
+
+# ARM720T
+config CPU_ARM720T
+	bool
+	select CPU_32v4T
+	select CPU_ABRT_LV4T
+	select CPU_CACHE_V4
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WT if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WT if MMU
+	help
+	  A 32-bit RISC processor with 8kByte Cache, Write Buffer and
+	  MMU built around an ARM7TDMI core.
+
+	  Say Y if you want support for the ARM720T processor.
+	  Otherwise, say N.
+
+# ARM740T
+config CPU_ARM740T
+	bool
+	depends on !MMU
+	select CPU_32v4T
+	select CPU_ABRT_LV4T
+	select CPU_CACHE_V4
+	select CPU_CP15_MPU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	help
+	  A 32-bit RISC processor with 8KB cache or 4KB variants,
+	  write buffer and MPU(Protection Unit) built around
+	  an ARM7TDMI core.
+
+	  Say Y if you want support for the ARM740T processor.
+	  Otherwise, say N.
+
+# ARM9TDMI
+config CPU_ARM9TDMI
+	bool
+	depends on !MMU
+	select CPU_32v4T
+	select CPU_ABRT_NOMMU
+	select CPU_CACHE_V4
+	select CPU_PABRT_LEGACY
+	help
+	  A 32-bit RISC microprocessor based on the ARM9 processor core
+	  which has no memory control unit and cache.
+
+	  Say Y if you want support for the ARM9TDMI processor.
+	  Otherwise, say N.
+
+# ARM920T
+config CPU_ARM920T
+	bool
+	select CPU_32v4T
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  The ARM920T is licensed to be produced by numerous vendors,
+	  and is used in the Cirrus EP93xx and the Samsung S3C2410.
+
+	  Say Y if you want support for the ARM920T processor.
+	  Otherwise, say N.
+
+# ARM922T
+config CPU_ARM922T
+	bool
+	select CPU_32v4T
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  The ARM922T is a version of the ARM920T, but with smaller
+	  instruction and data caches. It is used in Altera's
+	  Excalibur XA device family and Micrel's KS8695 Centaur.
+
+	  Say Y if you want support for the ARM922T processor.
+	  Otherwise, say N.
+
+# ARM925T
+config CPU_ARM925T
+	bool
+	select CPU_32v4T
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+ 	help
+ 	  The ARM925T is a mix between the ARM920T and ARM926T, but with
+	  different instruction and data caches. It is used in TI's OMAP
+ 	  device family.
+
+ 	  Say Y if you want support for the ARM925T processor.
+ 	  Otherwise, say N.
+
+# ARM926T
+config CPU_ARM926T
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5TJ
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  This is a variant of the ARM920.  It has slightly different
+	  instruction sequences for cache and TLB operations.  Curiously,
+	  there is no documentation on it at the ARM corporate website.
+
+	  Say Y if you want support for the ARM926T processor.
+	  Otherwise, say N.
+
+# FA526
+config CPU_FA526
+	bool
+	select CPU_32v4
+	select CPU_ABRT_EV4
+	select CPU_CACHE_FA
+	select CPU_CACHE_VIVT
+	select CPU_COPY_FA if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_TLB_FA if MMU
+	help
+	  The FA526 is a version of the ARMv4 compatible processor with
+	  Branch Target Buffer, Unified TLB and cache line size 16.
+
+	  Say Y if you want support for the FA526 processor.
+	  Otherwise, say N.
+
+# ARM940T
+config CPU_ARM940T
+	bool
+	depends on !MMU
+	select CPU_32v4T
+	select CPU_ABRT_NOMMU
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MPU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	help
+	  ARM940T is a member of the ARM9TDMI family of general-
+	  purpose microprocessors with MPU and separate 4KB
+	  instruction and 4KB data cases, each with a 4-word line
+	  length.
+
+	  Say Y if you want support for the ARM940T processor.
+	  Otherwise, say N.
+
+# ARM946E-S
+config CPU_ARM946E
+	bool
+	depends on !MMU
+	select CPU_32v5
+	select CPU_ABRT_NOMMU
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MPU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	help
+	  ARM946E-S is a member of the ARM9E-S family of high-
+	  performance, 32-bit system-on-chip processor solutions.
+	  The TCM and ARMv5TE 32-bit instruction set is supported.
+
+	  Say Y if you want support for the ARM946E-S processor.
+	  Otherwise, say N.
+
+# ARM1020 - needs validating
+config CPU_ARM1020
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  The ARM1020 is the 32K cached version of the ARM10 processor,
+	  with an addition of a floating-point unit.
+
+	  Say Y if you want support for the ARM1020 processor.
+	  Otherwise, say N.
+
+# ARM1020E - needs validating
+config CPU_ARM1020E
+	bool
+	depends on n
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_V4WT
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+
+# ARM1022E
+config CPU_ARM1022
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV4T
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU # can probably do better
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  The ARM1022E is an implementation of the ARMv5TE architecture
+	  based upon the ARM10 integer core with a 16KiB L1 Harvard cache,
+	  embedded trace macrocell, and a floating-point unit.
+
+	  Say Y if you want support for the ARM1022E processor.
+	  Otherwise, say N.
+
+# ARM1026EJ-S
+config CPU_ARM1026
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T # But need Jazelle, but EV5TJ ignores bit 10
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU # can probably do better
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	help
+	  The ARM1026EJ-S is an implementation of the ARMv5TEJ architecture
+	  based upon the ARM10 integer core.
+
+	  Say Y if you want support for the ARM1026EJ-S processor.
+	  Otherwise, say N.
+
+# SA110
+config CPU_SA110
+	bool
+	select CPU_32v3 if ARCH_RPC
+	select CPU_32v4 if !ARCH_RPC
+	select CPU_ABRT_EV4
+	select CPU_CACHE_V4WB
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_TLB_V4WB if MMU
+	help
+	  The Intel StrongARM(R) SA-110 is a 32-bit microprocessor and
+	  is available at five speeds ranging from 100 MHz to 233 MHz.
+	  More information is available at
+	  <http://developer.intel.com/design/strong/sa110.htm>.
+
+	  Say Y if you want support for the SA-110 processor.
+	  Otherwise, say N.
+
+# SA1100
+config CPU_SA1100
+	bool
+	select CPU_32v4
+	select CPU_ABRT_EV4
+	select CPU_CACHE_V4WB
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_TLB_V4WB if MMU
+
+# XScale
+config CPU_XSCALE
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+
+# XScale Core Version 3
+config CPU_XSC3
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+	select IO_36
+
+# Marvell PJ1 (Mohawk)
+config CPU_MOHAWK
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_COPY_V4WB if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V4WBI if MMU
+
+# Feroceon
+config CPU_FEROCEON
+	bool
+	select CPU_32v5
+	select CPU_ABRT_EV5T
+	select CPU_CACHE_VIVT
+	select CPU_COPY_FEROCEON if MMU
+	select CPU_CP15_MMU
+	select CPU_PABRT_LEGACY
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_FEROCEON if MMU
+
+config CPU_FEROCEON_OLD_ID
+	bool "Accept early Feroceon cores with an ARM926 ID"
+	depends on CPU_FEROCEON && !CPU_ARM926T
+	default y
+	help
+	  This enables the usage of some old Feroceon cores
+	  for which the CPU ID is equal to the ARM926 ID.
+	  Relevant for Feroceon-1850 and early Feroceon-2850.
+
+# Marvell PJ4
+config CPU_PJ4
+	bool
+	select ARM_THUMBEE
+	select CPU_V7
+
+config CPU_PJ4B
+	bool
+	select CPU_V7
+
+# ARMv6
+config CPU_V6
+	bool
+	select CPU_32v6
+	select CPU_ABRT_EV6
+	select CPU_CACHE_V6
+	select CPU_CACHE_VIPT
+	select CPU_COPY_V6 if MMU
+	select CPU_CP15_MMU
+	select CPU_HAS_ASID if MMU
+	select CPU_PABRT_V6
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V6 if MMU
+
+# ARMv6k
+config CPU_V6K
+	bool
+	select CPU_32v6
+	select CPU_32v6K
+	select CPU_ABRT_EV6
+	select CPU_CACHE_V6
+	select CPU_CACHE_VIPT
+	select CPU_COPY_V6 if MMU
+	select CPU_CP15_MMU
+	select CPU_HAS_ASID if MMU
+	select CPU_PABRT_V6
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V6 if MMU
+
+# ARMv7
+config CPU_V7
+	bool
+	select CPU_32v6K
+	select CPU_32v7
+	select CPU_ABRT_EV7
+	select CPU_CACHE_V7
+	select CPU_CACHE_VIPT
+	select CPU_COPY_V6 if MMU
+	select CPU_CP15_MMU if MMU
+	select CPU_CP15_MPU if !MMU
+	select CPU_HAS_ASID if MMU
+	select CPU_PABRT_V7
+	select CPU_SPECTRE if MMU
+	select CPU_THUMB_CAPABLE
+	select CPU_TLB_V7 if MMU
+
+# ARMv7M
+config CPU_V7M
+	bool
+	select CPU_32v7M
+	select CPU_ABRT_NOMMU
+	select CPU_CACHE_V7M
+	select CPU_CACHE_NOP
+	select CPU_PABRT_LEGACY
+	select CPU_THUMBONLY
+
+config CPU_THUMBONLY
+	bool
+	select CPU_THUMB_CAPABLE
+	# There are no CPUs available with MMU that don't implement an ARM ISA:
+	depends on !MMU
+	help
+	  Select this if your CPU doesn't support the 32 bit ARM instructions.
+
+config CPU_THUMB_CAPABLE
+	bool
+	help
+	  Select this if your CPU can support Thumb mode.
+
+# Figure out what processor architecture version we should be using.
+# This defines the compiler instruction set which depends on the machine type.
+config CPU_32v3
+	bool
+	select CPU_USE_DOMAINS if MMU
+	select NEED_KUSER_HELPERS
+	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
+
+config CPU_32v4
+	bool
+	select CPU_USE_DOMAINS if MMU
+	select NEED_KUSER_HELPERS
+	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
+
+config CPU_32v4T
+	bool
+	select CPU_USE_DOMAINS if MMU
+	select NEED_KUSER_HELPERS
+	select TLS_REG_EMUL if SMP || !MMU
+	select CPU_NO_EFFICIENT_FFS
+
+config CPU_32v5
+	bool
+	select CPU_USE_DOMAINS if MMU
+	select NEED_KUSER_HELPERS
+	select TLS_REG_EMUL if SMP || !MMU
+
+config CPU_32v6
+	bool
+	select TLS_REG_EMUL if !CPU_32v6K && !MMU
+
+config CPU_32v6K
+	bool
+
+config CPU_32v7
+	bool
+
+config CPU_32v7M
+	bool
+
+# The abort model
+config CPU_ABRT_NOMMU
+	bool
+
+config CPU_ABRT_EV4
+	bool
+
+config CPU_ABRT_EV4T
+	bool
+
+config CPU_ABRT_LV4T
+	bool
+
+config CPU_ABRT_EV5T
+	bool
+
+config CPU_ABRT_EV5TJ
+	bool
+
+config CPU_ABRT_EV6
+	bool
+
+config CPU_ABRT_EV7
+	bool
+
+config CPU_PABRT_LEGACY
+	bool
+
+config CPU_PABRT_V6
+	bool
+
+config CPU_PABRT_V7
+	bool
+
+# The cache model
+config CPU_CACHE_V4
+	bool
+
+config CPU_CACHE_V4WT
+	bool
+
+config CPU_CACHE_V4WB
+	bool
+
+config CPU_CACHE_V6
+	bool
+
+config CPU_CACHE_V7
+	bool
+
+config CPU_CACHE_NOP
+	bool
+
+config CPU_CACHE_VIVT
+	bool
+
+config CPU_CACHE_VIPT
+	bool
+
+config CPU_CACHE_FA
+	bool
+
+config CPU_CACHE_V7M
+	bool
+
+if MMU
+# The copy-page model
+config CPU_COPY_V4WT
+	bool
+
+config CPU_COPY_V4WB
+	bool
+
+config CPU_COPY_FEROCEON
+	bool
+
+config CPU_COPY_FA
+	bool
+
+config CPU_COPY_V6
+	bool
+
+# This selects the TLB model
+config CPU_TLB_V4WT
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writethrough cache.
+
+config CPU_TLB_V4WB
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writeback cache.
+
+config CPU_TLB_V4WBI
+	bool
+	help
+	  ARM Architecture Version 4 TLB with writeback cache and invalidate
+	  instruction cache entry.
+
+config CPU_TLB_FEROCEON
+	bool
+	help
+	  Feroceon TLB (v4wbi with non-outer-cachable page table walks).
+
+config CPU_TLB_FA
+	bool
+	help
+	  Faraday ARM FA526 architecture, unified TLB with writeback cache
+	  and invalidate instruction cache entry. Branch target buffer is
+	  also supported.
+
+config CPU_TLB_V6
+	bool
+
+config CPU_TLB_V7
+	bool
+
+config VERIFY_PERMISSION_FAULT
+	bool
+endif
+
+config CPU_HAS_ASID
+	bool
+	help
+	  This indicates whether the CPU has the ASID register; used to
+	  tag TLB and possibly cache entries.
+
+config CPU_CP15
+	bool
+	help
+	  Processor has the CP15 register.
+
+config CPU_CP15_MMU
+	bool
+	select CPU_CP15
+	help
+	  Processor has the CP15 register, which has MMU related registers.
+
+config CPU_CP15_MPU
+	bool
+	select CPU_CP15
+	help
+	  Processor has the CP15 register, which has MPU related registers.
+
+config CPU_USE_DOMAINS
+	bool
+	help
+	  This option enables or disables the use of domain switching
+	  via the set_fs() function.
+
+config CPU_V7M_NUM_IRQ
+	int "Number of external interrupts connected to the NVIC"
+	depends on CPU_V7M
+	default 90 if ARCH_STM32
+	default 38 if ARCH_EFM32
+	default 112 if SOC_VF610
+	default 240
+	help
+	  This option indicates the number of interrupts connected to the NVIC.
+	  The value can be larger than the real number of interrupts supported
+	  by the system, but must not be lower.
+	  The default value is 240, corresponding to the maximum number of
+	  interrupts supported by the NVIC on Cortex-M family.
+
+	  If unsure, keep default value.
+
+#
+# CPU supports 36-bit I/O
+#
+config IO_36
+	bool
+
+comment "Processor Features"
+
+config ARM_LPAE
+	bool "Support for the Large Physical Address Extension"
+	depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \
+		!CPU_32v4 && !CPU_32v3
+	select PHYS_ADDR_T_64BIT
+	help
+	  Say Y if you have an ARMv7 processor supporting the LPAE page
+	  table format and you would like to access memory beyond the
+	  4GB limit. The resulting kernel image will not run on
+	  processors without the LPA extension.
+
+	  If unsure, say N.
+
+config ARM_PV_FIXUP
+	def_bool y
+	depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE
+
+config ARM_THUMB
+	bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT
+	depends on CPU_THUMB_CAPABLE
+	default y
+	help
+	  Say Y if you want to include kernel support for running user space
+	  Thumb binaries.
+
+	  The Thumb instruction set is a compressed form of the standard ARM
+	  instruction set resulting in smaller binaries at the expense of
+	  slightly less efficient code.
+
+	  If this option is disabled, and you run userspace that switches to
+	  Thumb mode, signal handling will not work correctly, resulting in
+	  segmentation faults or illegal instruction aborts.
+
+	  If you don't know what this all is, saying Y is a safe choice.
+
+config ARM_THUMBEE
+	bool "Enable ThumbEE CPU extension"
+	depends on CPU_V7
+	help
+	  Say Y here if you have a CPU with the ThumbEE extension and code to
+	  make use of it. Say N for code that can run on CPUs without ThumbEE.
+
+config ARM_VIRT_EXT
+	bool
+	default y if CPU_V7
+	help
+	  Enable the kernel to make use of the ARM Virtualization
+	  Extensions to install hypervisors without run-time firmware
+	  assistance.
+
+	  A compliant bootloader is required in order to make maximum
+	  use of this feature.  Refer to Documentation/arm/Booting for
+	  details.
+
+config SWP_EMULATE
+	bool "Emulate SWP/SWPB instructions" if !SMP
+	depends on CPU_V7
+	default y if SMP
+	select HAVE_PROC_CPU if PROC_FS
+	help
+	  ARMv6 architecture deprecates use of the SWP/SWPB instructions.
+	  ARMv7 multiprocessing extensions introduce the ability to disable
+	  these instructions, triggering an undefined instruction exception
+	  when executed. Say Y here to enable software emulation of these
+	  instructions for userspace (not kernel) using LDREX/STREX.
+	  Also creates /proc/cpu/swp_emulation for statistics.
+
+	  In some older versions of glibc [<=2.8] SWP is used during futex
+	  trylock() operations with the assumption that the code will not
+	  be preempted. This invalid assumption may be more likely to fail
+	  with SWP emulation enabled, leading to deadlock of the user
+	  application.
+
+	  NOTE: when accessing uncached shared regions, LDREX/STREX rely
+	  on an external transaction monitoring block called a global
+	  monitor to maintain update atomicity. If your system does not
+	  implement a global monitor, this option can cause programs that
+	  perform SWP operations to uncached memory to deadlock.
+
+	  If unsure, say Y.
+
+config CPU_BIG_ENDIAN
+	bool "Build big-endian kernel"
+	depends on ARCH_SUPPORTS_BIG_ENDIAN
+	help
+	  Say Y if you plan on running a kernel in big-endian mode.
+	  Note that your board must be properly built and your board
+	  port must properly enable any big-endian related features
+	  of your chipset/board/processor.
+
+config CPU_ENDIAN_BE8
+	bool
+	depends on CPU_BIG_ENDIAN
+	default CPU_V6 || CPU_V6K || CPU_V7
+	help
+	  Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors.
+
+config CPU_ENDIAN_BE32
+	bool
+	depends on CPU_BIG_ENDIAN
+	default !CPU_ENDIAN_BE8
+	help
+	  Support for the BE-32 (big-endian) mode on pre-ARMv6 processors.
+
+config CPU_HIGH_VECTOR
+	depends on !MMU && CPU_CP15 && !CPU_ARM740T
+	bool "Select the High exception vector"
+	help
+	  Say Y here to select high exception vector(0xFFFF0000~).
+	  The exception vector can vary depending on the platform
+	  design in nommu mode. If your platform needs to select
+	  high exception vector, say Y.
+	  Otherwise or if you are unsure, say N, and the low exception
+	  vector (0x00000000~) will be used.
+
+config CPU_ICACHE_DISABLE
+	bool "Disable I-Cache (I-bit)"
+	depends on (CPU_CP15 && !(CPU_ARM720T || CPU_ARM740T || CPU_XSCALE || CPU_XSC3)) || CPU_V7M
+	help
+	  Say Y here to disable the processor instruction cache. Unless
+	  you have a reason not to or are unsure, say N.
+
+config CPU_DCACHE_DISABLE
+	bool "Disable D-Cache (C-bit)"
+	depends on (CPU_CP15 && !SMP) || CPU_V7M
+	help
+	  Say Y here to disable the processor data cache. Unless
+	  you have a reason not to or are unsure, say N.
+
+config CPU_DCACHE_SIZE
+	hex
+	depends on CPU_ARM740T || CPU_ARM946E
+	default 0x00001000 if CPU_ARM740T
+	default 0x00002000 # default size for ARM946E-S
+	help
+	  Some cores are synthesizable to have various sized cache. For
+	  ARM946E-S case, it can vary from 0KB to 1MB.
+	  To support such cache operations, it is efficient to know the size
+	  before compile time.
+	  If your SoC is configured to have a different size, define the value
+	  here with proper conditions.
+
+config CPU_DCACHE_WRITETHROUGH
+	bool "Force write through D-cache"
+	depends on (CPU_ARM740T || CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM940T || CPU_ARM946E || CPU_ARM1020 || CPU_FA526) && !CPU_DCACHE_DISABLE
+	default y if CPU_ARM925T
+	help
+	  Say Y here to use the data cache in writethrough mode. Unless you
+	  specifically require this or are unsure, say N.
+
+config CPU_CACHE_ROUND_ROBIN
+	bool "Round robin I and D cache replacement algorithm"
+	depends on (CPU_ARM926T || CPU_ARM946E || CPU_ARM1020) && (!CPU_ICACHE_DISABLE || !CPU_DCACHE_DISABLE)
+	help
+	  Say Y here to use the predictable round-robin cache replacement
+	  policy.  Unless you specifically require this or are unsure, say N.
+
+config CPU_BPREDICT_DISABLE
+	bool "Disable branch prediction"
+	depends on CPU_ARM1020 || CPU_V6 || CPU_V6K || CPU_MOHAWK || CPU_XSC3 || CPU_V7 || CPU_FA526 || CPU_V7M
+	help
+	  Say Y here to disable branch prediction.  If unsure, say N.
+
+config CPU_SPECTRE
+	bool
+
+config HARDEN_BRANCH_PREDICTOR
+	bool "Harden the branch predictor against aliasing attacks" if EXPERT
+	depends on CPU_SPECTRE
+	default y
+	help
+	   Speculation attacks against some high-performance processors rely
+	   on being able to manipulate the branch predictor for a victim
+	   context by executing aliasing branches in the attacker context.
+	   Such attacks can be partially mitigated against by clearing
+	   internal branch predictor state and limiting the prediction
+	   logic in some situations.
+
+	   This config option will take CPU-specific actions to harden
+	   the branch predictor against aliasing attacks and may rely on
+	   specific instruction sequences or control bits being set by
+	   the system firmware.
+
+	   If unsure, say Y.
+
+config TLS_REG_EMUL
+	bool
+	select NEED_KUSER_HELPERS
+	help
+	  An SMP system using a pre-ARMv6 processor (there are apparently
+	  a few prototypes like that in existence) and therefore access to
+	  that required register must be emulated.
+
+config NEED_KUSER_HELPERS
+	bool
+
+config KUSER_HELPERS
+	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	depends on MMU
+	default y
+	help
+	  Warning: disabling this option may break user programs.
+
+	  Provide kuser helpers in the vector page.  The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  in the high vector page to allow userspace to be independent of
+	  the CPU type fitted to the system.  This permits binaries to be
+	  run on ARMv4 through to ARMv7 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will receive a SIGILL signal,
+	  which will terminate the program.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
+config VDSO
+	bool "Enable VDSO for acceleration of some system calls"
+	depends on AEABI && MMU && CPU_V7
+	default y if ARM_ARCH_TIMER
+	select GENERIC_TIME_VSYSCALL
+	help
+	  Place in the process address space an ELF shared object
+	  providing fast implementations of gettimeofday and
+	  clock_gettime.  Systems that implement the ARM architected
+	  timer will receive maximum benefit.
+
+	  You must have glibc 2.22 or later for programs to seamlessly
+	  take advantage of this.
+
+config DMA_CACHE_RWFO
+	bool "Enable read/write for ownership DMA cache maintenance"
+	depends on CPU_V6K && SMP
+	default y
+	help
+	  The Snoop Control Unit on ARM11MPCore does not detect the
+	  cache maintenance operations and the dma_{map,unmap}_area()
+	  functions may leave stale cache entries on other CPUs. By
+	  enabling this option, Read or Write For Ownership in the ARMv6
+	  DMA cache maintenance functions is performed. These LDR/STR
+	  instructions change the cache line state to shared or modified
+	  so that the cache operation has the desired effect.
+
+	  Note that the workaround is only valid on processors that do
+	  not perform speculative loads into the D-cache. For such
+	  processors, if cache maintenance operations are not broadcast
+	  in hardware, other workarounds are needed (e.g. cache
+	  maintenance broadcasting in software via FIQ).
+
+config OUTER_CACHE
+	bool
+
+config OUTER_CACHE_SYNC
+	bool
+	select ARM_HEAVY_MB
+	help
+	  The outer cache has a outer_cache_fns.sync function pointer
+	  that can be used to drain the write buffer of the outer cache.
+
+config CACHE_B15_RAC
+	bool "Enable the Broadcom Brahma-B15 read-ahead cache controller"
+	depends on ARCH_BRCMSTB
+	default y
+	help
+	  This option enables the Broadcom Brahma-B15 read-ahead cache
+	  controller. If disabled, the read-ahead cache remains off.
+
+config CACHE_FEROCEON_L2
+	bool "Enable the Feroceon L2 cache controller"
+	depends on ARCH_MV78XX0 || ARCH_MVEBU
+	default y
+	select OUTER_CACHE
+	help
+	  This option enables the Feroceon L2 cache controller.
+
+config CACHE_FEROCEON_L2_WRITETHROUGH
+	bool "Force Feroceon L2 cache write through"
+	depends on CACHE_FEROCEON_L2
+	help
+	  Say Y here to use the Feroceon L2 cache in writethrough mode.
+	  Unless you specifically require this, say N for writeback mode.
+
+config MIGHT_HAVE_CACHE_L2X0
+	bool
+	help
+	  This option should be selected by machines which have a L2x0
+	  or PL310 cache controller, but where its use is optional.
+
+	  The only effect of this option is to make CACHE_L2X0 and
+	  related options available to the user for configuration.
+
+	  Boards or SoCs which always require the cache controller
+	  support to be present should select CACHE_L2X0 directly
+	  instead of this option, thus preventing the user from
+	  inadvertently configuring a broken kernel.
+
+config CACHE_L2X0
+	bool "Enable the L2x0 outer cache controller" if MIGHT_HAVE_CACHE_L2X0
+	default MIGHT_HAVE_CACHE_L2X0
+	select OUTER_CACHE
+	select OUTER_CACHE_SYNC
+	help
+	  This option enables the L2x0 PrimeCell.
+
+config CACHE_L2X0_PMU
+	bool "L2x0 performance monitor support" if CACHE_L2X0
+	depends on PERF_EVENTS
+	help
+	  This option enables support for the performance monitoring features
+	  of the L220 and PL310 outer cache controllers.
+
+if CACHE_L2X0
+
+config PL310_ERRATA_588369
+	bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines"
+	help
+	   The PL310 L2 cache controller implements three types of Clean &
+	   Invalidate maintenance operations: by Physical Address
+	   (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC).
+	   They are architecturally defined to behave as the execution of a
+	   clean operation followed immediately by an invalidate operation,
+	   both performing to the same memory location. This functionality
+	   is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0)
+	   as clean lines are not invalidated as a result of these operations.
+
+config PL310_ERRATA_727915
+	bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption"
+	help
+	  PL310 implements the Clean & Invalidate by Way L2 cache maintenance
+	  operation (offset 0x7FC). This operation runs in background so that
+	  PL310 can handle normal accesses while it is in progress. Under very
+	  rare circumstances, due to this erratum, write data can be lost when
+	  PL310 treats a cacheable write transaction during a Clean &
+	  Invalidate by Way operation.  Revisions prior to r3p1 are affected by
+	  this errata (fixed in r3p1).
+
+config PL310_ERRATA_753970
+	bool "PL310 errata: cache sync operation may be faulty"
+	help
+	  This option enables the workaround for the 753970 PL310 (r3p0) erratum.
+
+	  Under some condition the effect of cache sync operation on
+	  the store buffer still remains when the operation completes.
+	  This means that the store buffer is always asked to drain and
+	  this prevents it from merging any further writes. The workaround
+	  is to replace the normal offset of cache sync operation (0x730)
+	  by another offset targeting an unmapped PL310 register 0x740.
+	  This has the same effect as the cache sync operation: store buffer
+	  drain and waiting for all buffers empty.
+
+config PL310_ERRATA_769419
+	bool "PL310 errata: no automatic Store Buffer drain"
+	help
+	  On revisions of the PL310 prior to r3p2, the Store Buffer does
+	  not automatically drain. This can cause normal, non-cacheable
+	  writes to be retained when the memory system is idle, leading
+	  to suboptimal I/O performance for drivers using coherent DMA.
+	  This option adds a write barrier to the cpu_idle loop so that,
+	  on systems with an outer cache, the store buffer is drained
+	  explicitly.
+
+endif
+
+config CACHE_TAUROS2
+	bool "Enable the Tauros2 L2 cache controller"
+	depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4)
+	default y
+	select OUTER_CACHE
+	help
+	  This option enables the Tauros2 L2 cache controller (as
+	  found on PJ1/PJ4).
+
+config CACHE_UNIPHIER
+	bool "Enable the UniPhier outer cache controller"
+	depends on ARCH_UNIPHIER
+	select ARM_L1_CACHE_SHIFT_7
+	select OUTER_CACHE
+	select OUTER_CACHE_SYNC
+	help
+	  This option enables the UniPhier outer cache (system cache)
+	  controller.
+
+config CACHE_XSC3L2
+	bool "Enable the L2 cache on XScale3"
+	depends on CPU_XSC3
+	default y
+	select OUTER_CACHE
+	help
+	  This option enables the L2 cache on XScale3.
+
+config ARM_L1_CACHE_SHIFT_6
+	bool
+	default y if CPU_V7
+	help
+	  Setting ARM L1 cache line size to 64 Bytes.
+
+config ARM_L1_CACHE_SHIFT_7
+	bool
+	help
+	  Setting ARM L1 cache line size to 128 Bytes.
+
+config ARM_L1_CACHE_SHIFT
+	int
+	default 7 if ARM_L1_CACHE_SHIFT_7
+	default 6 if ARM_L1_CACHE_SHIFT_6
+	default 5
+
+config ARM_DMA_MEM_BUFFERABLE
+	bool "Use non-cacheable memory for DMA" if (CPU_V6 || CPU_V6K || CPU_V7M) && !CPU_V7
+	default y if CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M
+	help
+	  Historically, the kernel has used strongly ordered mappings to
+	  provide DMA coherent memory.  With the advent of ARMv7, mapping
+	  memory with differing types results in unpredictable behaviour,
+	  so on these CPUs, this option is forced on.
+
+	  Multiple mappings with differing attributes is also unpredictable
+	  on ARMv6 CPUs, but since they do not have aggressive speculative
+	  prefetch, no harm appears to occur.
+
+	  However, drivers may be missing the necessary barriers for ARMv6,
+	  and therefore turning this on may result in unpredictable driver
+	  behaviour.  Therefore, we offer this as an option.
+
+	  On some of the beefier ARMv7-M machines (with DMA and write
+	  buffers) you likely want this enabled, while those that
+	  didn't need it until now also won't need it in the future.
+
+	  You are recommended say 'Y' here and debug any affected drivers.
+
+config ARM_HEAVY_MB
+	bool
+
+config ARCH_SUPPORTS_BIG_ENDIAN
+	bool
+	help
+	  This option specifies the architecture can support big endian
+	  operation.
+
+config DEBUG_ALIGN_RODATA
+	bool "Make rodata strictly non-executable"
+	depends on STRICT_KERNEL_RWX
+	default y
+	help
+	  If this is set, rodata will be made explicitly non-executable. This
+	  provides protection on the rare chance that attackers might find and
+	  use ROP gadgets that exist in the rodata section. This adds an
+	  additional section-aligned split of rodata from kernel text so it
+	  can be made explicitly non-executable. This padding may waste memory
+	  space to gain the additional protection.
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
new file mode 100644
index 0000000..7cb1699
--- /dev/null
+++ b/arch/arm/mm/Makefile
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux arm-specific parts of the memory manager.
+#
+
+obj-y				:= extable.o fault.o init.o iomap.o
+obj-y				+= dma-mapping$(MMUEXT).o
+obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
+				   mmap.o pgd.o mmu.o pageattr.o
+
+ifneq ($(CONFIG_MMU),y)
+obj-y				+= nommu.o
+obj-$(CONFIG_ARM_MPU)		+= pmsa-v7.o pmsa-v8.o
+endif
+
+obj-$(CONFIG_ARM_PTDUMP_CORE)	+= dump.o
+obj-$(CONFIG_ARM_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
+obj-$(CONFIG_MODULES)		+= proc-syms.o
+obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
+
+obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
+obj-$(CONFIG_HIGHMEM)		+= highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_ARM_PV_FIXUP)	+= pv-fixup-asm.o
+
+obj-$(CONFIG_CPU_ABRT_NOMMU)	+= abort-nommu.o
+obj-$(CONFIG_CPU_ABRT_EV4)	+= abort-ev4.o
+obj-$(CONFIG_CPU_ABRT_EV4T)	+= abort-ev4t.o
+obj-$(CONFIG_CPU_ABRT_LV4T)	+= abort-lv4t.o
+obj-$(CONFIG_CPU_ABRT_EV5T)	+= abort-ev5t.o
+obj-$(CONFIG_CPU_ABRT_EV5TJ)	+= abort-ev5tj.o
+obj-$(CONFIG_CPU_ABRT_EV6)	+= abort-ev6.o
+obj-$(CONFIG_CPU_ABRT_EV7)	+= abort-ev7.o
+
+AFLAGS_abort-ev6.o	:=-Wa,-march=armv6k
+AFLAGS_abort-ev7.o	:=-Wa,-march=armv7-a
+
+obj-$(CONFIG_CPU_PABRT_LEGACY)	+= pabort-legacy.o
+obj-$(CONFIG_CPU_PABRT_V6)	+= pabort-v6.o
+obj-$(CONFIG_CPU_PABRT_V7)	+= pabort-v7.o
+
+obj-$(CONFIG_CPU_CACHE_V4)	+= cache-v4.o
+obj-$(CONFIG_CPU_CACHE_V4WT)	+= cache-v4wt.o
+obj-$(CONFIG_CPU_CACHE_V4WB)	+= cache-v4wb.o
+obj-$(CONFIG_CPU_CACHE_V6)	+= cache-v6.o
+obj-$(CONFIG_CPU_CACHE_V7)	+= cache-v7.o
+obj-$(CONFIG_CPU_CACHE_FA)	+= cache-fa.o
+obj-$(CONFIG_CPU_CACHE_NOP)	+= cache-nop.o
+obj-$(CONFIG_CPU_CACHE_V7M)	+= cache-v7m.o
+
+AFLAGS_cache-v6.o	:=-Wa,-march=armv6
+AFLAGS_cache-v7.o	:=-Wa,-march=armv7-a
+AFLAGS_cache-v7m.o	:=-Wa,-march=armv7-m
+
+obj-$(CONFIG_CPU_COPY_V4WT)	+= copypage-v4wt.o
+obj-$(CONFIG_CPU_COPY_V4WB)	+= copypage-v4wb.o
+obj-$(CONFIG_CPU_COPY_FEROCEON)	+= copypage-feroceon.o
+obj-$(CONFIG_CPU_COPY_V6)	+= copypage-v6.o context.o
+obj-$(CONFIG_CPU_SA1100)	+= copypage-v4mc.o
+obj-$(CONFIG_CPU_XSCALE)	+= copypage-xscale.o
+obj-$(CONFIG_CPU_XSC3)		+= copypage-xsc3.o
+obj-$(CONFIG_CPU_COPY_FA)	+= copypage-fa.o
+
+CFLAGS_copypage-feroceon.o := -march=armv5te
+
+obj-$(CONFIG_CPU_TLB_V4WT)	+= tlb-v4.o
+obj-$(CONFIG_CPU_TLB_V4WB)	+= tlb-v4wb.o
+obj-$(CONFIG_CPU_TLB_V4WBI)	+= tlb-v4wbi.o
+obj-$(CONFIG_CPU_TLB_FEROCEON)	+= tlb-v4wbi.o	# reuse v4wbi TLB functions
+obj-$(CONFIG_CPU_TLB_V6)	+= tlb-v6.o
+obj-$(CONFIG_CPU_TLB_V7)	+= tlb-v7.o
+obj-$(CONFIG_CPU_TLB_FA)	+= tlb-fa.o
+
+AFLAGS_tlb-v6.o		:=-Wa,-march=armv6
+AFLAGS_tlb-v7.o		:=-Wa,-march=armv7-a
+
+obj-$(CONFIG_CPU_ARM7TDMI)	+= proc-arm7tdmi.o
+obj-$(CONFIG_CPU_ARM720T)	+= proc-arm720.o
+obj-$(CONFIG_CPU_ARM740T)	+= proc-arm740.o
+obj-$(CONFIG_CPU_ARM9TDMI)	+= proc-arm9tdmi.o
+obj-$(CONFIG_CPU_ARM920T)	+= proc-arm920.o
+obj-$(CONFIG_CPU_ARM922T)	+= proc-arm922.o
+obj-$(CONFIG_CPU_ARM925T)	+= proc-arm925.o
+obj-$(CONFIG_CPU_ARM926T)	+= proc-arm926.o
+obj-$(CONFIG_CPU_ARM940T)	+= proc-arm940.o
+obj-$(CONFIG_CPU_ARM946E)	+= proc-arm946.o
+obj-$(CONFIG_CPU_FA526)		+= proc-fa526.o
+obj-$(CONFIG_CPU_ARM1020)	+= proc-arm1020.o
+obj-$(CONFIG_CPU_ARM1020E)	+= proc-arm1020e.o
+obj-$(CONFIG_CPU_ARM1022)	+= proc-arm1022.o
+obj-$(CONFIG_CPU_ARM1026)	+= proc-arm1026.o
+obj-$(CONFIG_CPU_SA110)		+= proc-sa110.o
+obj-$(CONFIG_CPU_SA1100)	+= proc-sa1100.o
+obj-$(CONFIG_CPU_XSCALE)	+= proc-xscale.o
+obj-$(CONFIG_CPU_XSC3)		+= proc-xsc3.o
+obj-$(CONFIG_CPU_MOHAWK)	+= proc-mohawk.o
+obj-$(CONFIG_CPU_FEROCEON)	+= proc-feroceon.o
+obj-$(CONFIG_CPU_V6)		+= proc-v6.o
+obj-$(CONFIG_CPU_V6K)		+= proc-v6.o
+obj-$(CONFIG_CPU_V7)		+= proc-v7.o proc-v7-bugs.o
+obj-$(CONFIG_CPU_V7M)		+= proc-v7m.o
+
+AFLAGS_proc-v6.o	:=-Wa,-march=armv6
+AFLAGS_proc-v7.o	:=-Wa,-march=armv7-a
+
+obj-$(CONFIG_OUTER_CACHE)	+= l2c-common.o
+obj-$(CONFIG_CACHE_B15_RAC)	+= cache-b15-rac.o
+obj-$(CONFIG_CACHE_FEROCEON_L2)	+= cache-feroceon-l2.o
+obj-$(CONFIG_CACHE_L2X0)	+= cache-l2x0.o l2c-l2x0-resume.o
+obj-$(CONFIG_CACHE_L2X0_PMU)	+= cache-l2x0-pmu.o
+obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
+obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o
+obj-$(CONFIG_CACHE_UNIPHIER)	+= cache-uniphier.o
diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S
new file mode 100644
index 0000000..a10bcb8
--- /dev/null
+++ b/arch/arm/mm/abort-ev4.S
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v4_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v4_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	ldr	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable userspace access
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 11		@ yes.
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev4t.S b/arch/arm/mm/abort-ev4t.S
new file mode 100644
index 0000000..14743a2
--- /dev/null
+++ b/arch/arm/mm/abort-ev4t.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v4t_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v4t_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r3, #1 << 20			@ check write
+	orreq	r1, r1, #1 << 11
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S
new file mode 100644
index 0000000..98c5231
--- /dev/null
+++ b/arch/arm/mm/abort-ev5t.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v5t_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v5t_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable user access
+	bic	r1, r1, #1 << 11		@ clear bits 11 of FSR
+	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
+	beq	do_DataAbort			@ yes
+	tst	r3, #1 << 20			@ check write
+	orreq	r1, r1, #1 << 11
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S
new file mode 100644
index 0000000..fec72f4
--- /dev/null
+++ b/arch/arm/mm/abort-ev5tj.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v5tj_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v5tj_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+	tst	r5, #PSR_J_BIT			@ Java?
+	bne	do_DataAbort
+	do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3
+	ldreq	r3, [r4]			@ read aborted ARM instruction
+	uaccess_disable ip			@ disable userspace access
+	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
+	beq	do_DataAbort			@ yes
+	tst	r3, #1 << 20			@ L = 0 -> write
+	orreq	r1, r1, #1 << 11		@ yes.
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S
new file mode 100644
index 0000000..c58bf8b
--- /dev/null
+++ b/arch/arm/mm/abort-ev6.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "abort-macro.S"
+/*
+ * Function: v6_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+	.align	5
+ENTRY(v6_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+/*
+ * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR.
+ */
+#ifdef CONFIG_ARM_ERRATA_326103
+	ldr	ip, =0x4107b36
+	mrc	p15, 0, r3, c0, c0, 0		@ get processor id
+	teq	ip, r3, lsr #4			@ r0 ARM1136?
+	bne	1f
+	tst	r5, #PSR_J_BIT			@ Java?
+	tsteq	r5, #PSR_T_BIT			@ Thumb?
+	bne	1f
+	bic	r1, r1, #1 << 11		@ clear bit 11 of FSR
+	ldr	r3, [r4]			@ read aborted ARM instruction
+ ARM_BE8(rev	r3, r3)
+
+	teq_ldrd tmp=ip, insn=r3		@ insn was LDRD?
+	beq	1f				@ yes
+	tst	r3, #1 << 20			@ L = 0 -> write
+	orreq	r1, r1, #1 << 11		@ yes.
+#endif
+1:	uaccess_disable ip			@ disable userspace access
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S
new file mode 100644
index 0000000..f7cc5d6
--- /dev/null
+++ b/arch/arm/mm/abort-ev7.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v7_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ */
+	.align	5
+ENTRY(v7_early_abort)
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	uaccess_disable ip			@ disable userspace access
+
+	/*
+	 * V6 code adjusts the returned DFSR.
+	 * New designs should not need to patch up faults.
+	 */
+
+#if defined(CONFIG_VERIFY_PERMISSION_FAULT)
+	/*
+	 * Detect erroneous permission failures and fix
+	 */
+	ldr	r3, =0x40d			@ On permission fault
+	and	r3, r1, r3
+	cmp	r3, #0x0d
+	bne	do_DataAbort
+
+	mcr	p15, 0, r0, c7, c8, 0   	@ Retranslate FAR
+	isb
+	mrc	p15, 0, ip, c7, c4, 0   	@ Read the PAR
+	and	r3, ip, #0x7b   		@ On translation fault
+	cmp	r3, #0x0b
+	bne	do_DataAbort
+	bic	r1, r1, #0xf			@ Fix up FSR FS[5:0]
+	and	ip, ip, #0x7e
+	orr	r1, r1, ip, LSR #1
+#endif
+
+	b	do_DataAbort
+ENDPROC(v7_early_abort)
diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S
new file mode 100644
index 0000000..fbd60a1
--- /dev/null
+++ b/arch/arm/mm/abort-lv4t.S
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: v4t_late_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4-r5, r9-r11, r13 preserved
+ *
+ * Purpose : obtain information about current aborted instruction.
+ * Note: we read user space.  This means we might cause a data
+ * abort here if the I-TLB and D-TLB aren't seeing the same
+ * picture.  Unfortunately, this does happen.  We live with it.
+ */
+ENTRY(v4t_late_abort)
+	tst	r5, #PSR_T_BIT			@ check for thumb mode
+#ifdef CONFIG_CPU_CP15_MMU
+	mrc	p15, 0, r1, c5, c0, 0		@ get FSR
+	mrc	p15, 0, r0, c6, c0, 0		@ get FAR
+	bic	r1, r1, #1 << 11 | 1 << 10	@ clear bits 11 and 10 of FSR
+#else
+	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
+	mov	r1, #0
+#endif
+	bne	.data_thumb_abort
+	ldr	r8, [r4]			@ read arm instruction
+	uaccess_disable ip			@ disable userspace access
+	tst	r8, #1 << 20			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 11		@ yes.
+	and	r7, r8, #15 << 24
+	add	pc, pc, r7, lsr #22		@ Now branch to the relevant processing routine
+	nop
+
+/* 0 */	b	.data_arm_lateldrhpost		@ ldrh	rd, [rn], #m/rm
+/* 1 */	b	.data_arm_lateldrhpre		@ ldrh	rd, [rn, #m/rm]
+/* 2 */	b	.data_unknown
+/* 3 */	b	.data_unknown
+/* 4 */	b	.data_arm_lateldrpostconst	@ ldr	rd, [rn], #m
+/* 5 */	b	.data_arm_lateldrpreconst	@ ldr	rd, [rn, #m] 
+/* 6 */	b	.data_arm_lateldrpostreg	@ ldr	rd, [rn], rm
+/* 7 */	b	.data_arm_lateldrprereg		@ ldr	rd, [rn, rm]
+/* 8 */	b	.data_arm_ldmstm		@ ldm*a	rn, <rlist>
+/* 9 */	b	.data_arm_ldmstm		@ ldm*b	rn, <rlist>
+/* a */	b	.data_unknown
+/* b */	b	.data_unknown
+/* c */	b	do_DataAbort			@ ldc	rd, [rn], #m	@ Same as ldr	rd, [rn], #m
+/* d */	b	do_DataAbort			@ ldc	rd, [rn, #m]
+/* e */	b	.data_unknown
+/* f */	b	.data_unknown
+
+.data_unknown_r9:
+	ldr	r9, [sp], #4
+.data_unknown:	@ Part of jumptable
+	mov	r0, r4
+	mov	r1, r8
+	b	baddataabort
+
+.data_arm_ldmstm:
+	tst	r8, #1 << 21			@ check writeback bit
+	beq	do_DataAbort			@ no writeback -> no fixup
+	str	r9, [sp, #-4]!
+	mov	r7, #0x11
+	orr	r7, r7, #0x1100
+	and	r6, r8, r7
+	and	r9, r8, r7, lsl #1
+	add	r6, r6, r9, lsr #1
+	and	r9, r8, r7, lsl #2
+	add	r6, r6, r9, lsr #2
+	and	r9, r8, r7, lsl #3
+	add	r6, r6, r9, lsr #3
+	add	r6, r6, r6, lsr #8
+	add	r6, r6, r6, lsr #4
+	and	r6, r6, #15			@ r6 = no. of registers to transfer.
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6, lsl #2		@ Undo increment
+	addeq	r7, r7, r6, lsl #2		@ Undo decrement
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
+	b	do_DataAbort
+
+.data_arm_lateldrhpre:
+	tst	r8, #1 << 21			@ Check writeback bit
+	beq	do_DataAbort			@ No writeback -> no fixup
+.data_arm_lateldrhpost:
+	str	r9, [sp, #-4]!
+	and	r9, r8, #0x00f			@ get Rm / low nibble of immediate value
+	tst	r8, #1 << 22			@ if (immediate offset)
+	andne	r6, r8, #0xf00			@ { immediate high nibble
+	orrne	r6, r9, r6, lsr #4		@   combine nibbles } else
+	ldreq	r6, [r2, r9, lsl #2]		@ { load Rm value }
+.data_arm_apply_r6_and_rn:
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6			@ Undo incrmenet
+	addeq	r7, r7, r6			@ Undo decrement
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
+	b	do_DataAbort
+
+.data_arm_lateldrpreconst:
+	tst	r8, #1 << 21			@ check writeback bit
+	beq	do_DataAbort			@ no writeback -> no fixup
+.data_arm_lateldrpostconst:
+	movs	r6, r8, lsl #20			@ Get offset
+	beq	do_DataAbort			@ zero -> no fixup
+	str	r9, [sp, #-4]!
+	and	r9, r8, #15 << 16		@ Extract 'n' from instruction
+	ldr	r7, [r2, r9, lsr #14]		@ Get register 'Rn'
+	tst	r8, #1 << 23			@ Check U bit
+	subne	r7, r7, r6, lsr #20		@ Undo increment
+	addeq	r7, r7, r6, lsr #20		@ Undo decrement
+	str	r7, [r2, r9, lsr #14]		@ Put register 'Rn'
+	ldr	r9, [sp], #4
+	b	do_DataAbort
+
+.data_arm_lateldrprereg:
+	tst	r8, #1 << 21			@ check writeback bit
+	beq	do_DataAbort			@ no writeback -> no fixup
+.data_arm_lateldrpostreg:
+	and	r7, r8, #15			@ Extract 'm' from instruction
+	ldr	r6, [r2, r7, lsl #2]		@ Get register 'Rm'
+	str	r9, [sp, #-4]!
+	mov	r9, r8, lsr #7			@ get shift count
+	ands	r9, r9, #31
+	and	r7, r8, #0x70			@ get shift type
+	orreq	r7, r7, #8			@ shift count = 0
+	add	pc, pc, r7
+	nop
+
+	mov	r6, r6, lsl r9			@ 0: LSL #!0
+	b	.data_arm_apply_r6_and_rn
+	b	.data_arm_apply_r6_and_rn	@ 1: LSL #0
+	nop
+	b	.data_unknown_r9		@ 2: MUL?
+	nop
+	b	.data_unknown_r9		@ 3: MUL?
+	nop
+	mov	r6, r6, lsr r9			@ 4: LSR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, lsr #32			@ 5: LSR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown_r9		@ 6: MUL?
+	nop
+	b	.data_unknown_r9		@ 7: MUL?
+	nop
+	mov	r6, r6, asr r9			@ 8: ASR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, asr #32			@ 9: ASR #32
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown_r9		@ A: MUL?
+	nop
+	b	.data_unknown_r9		@ B: MUL?
+	nop
+	mov	r6, r6, ror r9			@ C: ROR #!0
+	b	.data_arm_apply_r6_and_rn
+	mov	r6, r6, rrx			@ D: RRX
+	b	.data_arm_apply_r6_and_rn
+	b	.data_unknown_r9		@ E: MUL?
+	nop
+	b	.data_unknown_r9		@ F: MUL?
+
+.data_thumb_abort:
+	ldrh	r8, [r4]			@ read instruction
+	uaccess_disable ip			@ disable userspace access
+	tst	r8, #1 << 11			@ L = 1 -> write?
+	orreq	r1, r1, #1 << 8			@ yes
+	and	r7, r8, #15 << 12
+	add	pc, pc, r7, lsr #10		@ lookup in table
+	nop
+
+/* 0 */	b	.data_unknown
+/* 1 */	b	.data_unknown
+/* 2 */	b	.data_unknown
+/* 3 */	b	.data_unknown
+/* 4 */	b	.data_unknown
+/* 5 */	b	.data_thumb_reg
+/* 6 */	b	do_DataAbort
+/* 7 */	b	do_DataAbort
+/* 8 */	b	do_DataAbort
+/* 9 */	b	do_DataAbort
+/* A */	b	.data_unknown
+/* B */	b	.data_thumb_pushpop
+/* C */	b	.data_thumb_ldmstm
+/* D */	b	.data_unknown
+/* E */	b	.data_unknown
+/* F */	b	.data_unknown
+
+.data_thumb_reg:
+	tst	r8, #1 << 9
+	beq	do_DataAbort
+	tst	r8, #1 << 10			@ If 'S' (signed) bit is set
+	movne	r1, #0				@ it must be a load instr
+	b	do_DataAbort
+
+.data_thumb_pushpop:
+	tst	r8, #1 << 10
+	beq	.data_unknown
+	str	r9, [sp, #-4]!
+	and	r6, r8, #0x55			@ hweight8(r8) + R bit
+	and	r9, r8, #0xaa
+	add	r6, r6, r9, lsr #1
+	and	r9, r6, #0xcc
+	and	r6, r6, #0x33
+	add	r6, r6, r9, lsr #2
+	movs	r7, r8, lsr #9			@ C = r8 bit 8 (R bit)
+	adc	r6, r6, r6, lsr #4		@ high + low nibble + R bit
+	and	r6, r6, #15			@ number of regs to transfer
+	ldr	r7, [r2, #13 << 2]
+	tst	r8, #1 << 11
+	addeq	r7, r7, r6, lsl #2		@ increment SP if PUSH
+	subne	r7, r7, r6, lsl #2		@ decrement SP if POP
+	str	r7, [r2, #13 << 2]
+	ldr	r9, [sp], #4
+	b	do_DataAbort
+
+.data_thumb_ldmstm:
+	str	r9, [sp, #-4]!
+	and	r6, r8, #0x55			@ hweight8(r8)
+	and	r9, r8, #0xaa
+	add	r6, r6, r9, lsr #1
+	and	r9, r6, #0xcc
+	and	r6, r6, #0x33
+	add	r6, r6, r9, lsr #2
+	add	r6, r6, r6, lsr #4
+	and	r9, r8, #7 << 8
+	ldr	r7, [r2, r9, lsr #6]
+	and	r6, r6, #15			@ number of regs to transfer
+	sub	r7, r7, r6, lsl #2		@ always decrement
+	str	r7, [r2, r9, lsr #6]
+	ldr	r9, [sp], #4
+	b	do_DataAbort
diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S
new file mode 100644
index 0000000..bacf53f
--- /dev/null
+++ b/arch/arm/mm/abort-macro.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The ARM LDRD and Thumb LDRSB instructions use bit 20/11 (ARM/Thumb)
+ * differently than every other instruction, so it is set to 0 (write)
+ * even though the instructions are read instructions. This means that
+ * during an abort the instructions will be treated as a write and the
+ * handler will raise a signal from unwriteable locations if they
+ * fault. We have to specifically check for these instructions
+ * from the abort handlers to treat them properly.
+ *
+ */
+
+	.macro	do_thumb_abort, fsr, pc, psr, tmp
+	tst	\psr, #PSR_T_BIT
+	beq	not_thumb
+	ldrh	\tmp, [\pc]			@ Read aborted Thumb instruction
+	uaccess_disable ip			@ disable userspace access
+	and	\tmp, \tmp, # 0xfe00		@ Mask opcode field
+	cmp	\tmp, # 0x5600			@ Is it ldrsb?
+	orreq	\tmp, \tmp, #1 << 11		@ Set L-bit if yes
+	tst	\tmp, #1 << 11			@ L = 0 -> write
+	orreq	\fsr, \fsr, #1 << 11		@ yes.
+	b	do_DataAbort
+not_thumb:
+	.endm
+
+/*
+ * We check for the following instruction encoding for LDRD.
+ *
+ * [27:25] == 000
+ *   [7:4] == 1101
+ *    [20] == 0
+ */
+	.macro	teq_ldrd, tmp, insn
+	mov	\tmp, #0x0e100000
+	orr	\tmp, #0x000000f0
+	and	\tmp, \insn, \tmp
+	teq	\tmp, #0x000000d0
+	.endm
diff --git a/arch/arm/mm/abort-nommu.S b/arch/arm/mm/abort-nommu.S
new file mode 100644
index 0000000..6e2366a
--- /dev/null
+++ b/arch/arm/mm/abort-nommu.S
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+/*
+ * Function: nommu_early_abort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = aborted context pc
+ *	   : r5 = aborted context psr
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Note: There is no FSR/FAR on !CPU_CP15_MMU cores.
+ *       Just fill zero into the registers.
+ */
+	.align	5
+ENTRY(nommu_early_abort)
+	mov	r0, #0				@ clear r0, r1 (no FSR/FAR)
+	mov	r1, #0
+	b	do_DataAbort
+ENDPROC(nommu_early_abort)
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
new file mode 100644
index 0000000..bd2c739
--- /dev/null
+++ b/arch/arm/mm/alignment.c
@@ -0,0 +1,1030 @@
+/*
+ *  linux/arch/arm/mm/alignment.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2001 Russell King
+ *  Thumb alignment fault fixups (c) 2004 MontaVista Software, Inc.
+ *  - Adapted from gdb/sim/arm/thumbemu.c -- Thumb instruction emulation.
+ *    Copyright (C) 1996, Cygnus Software Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/moduleparam.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/sched/debug.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/sched/signal.h>
+#include <linux/uaccess.h>
+
+#include <asm/cp15.h>
+#include <asm/system_info.h>
+#include <asm/unaligned.h>
+#include <asm/opcodes.h>
+
+#include "fault.h"
+#include "mm.h"
+
+/*
+ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
+ * /proc/sys/debug/alignment, modified and integrated into
+ * Linux 2.1 by Russell King
+ *
+ * Speed optimisations and better fault handling by Russell King.
+ *
+ * *** NOTE ***
+ * This code is not portable to processors with late data abort handling.
+ */
+#define CODING_BITS(i)	(i & 0x0e000000)
+#define COND_BITS(i)	(i & 0xf0000000)
+
+#define LDST_I_BIT(i)	(i & (1 << 26))		/* Immediate constant	*/
+#define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
+#define LDST_U_BIT(i)	(i & (1 << 23))		/* Add offset		*/
+#define LDST_W_BIT(i)	(i & (1 << 21))		/* Writeback		*/
+#define LDST_L_BIT(i)	(i & (1 << 20))		/* Load			*/
+
+#define LDST_P_EQ_U(i)	((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
+
+#define LDSTHD_I_BIT(i)	(i & (1 << 22))		/* double/half-word immed */
+#define LDM_S_BIT(i)	(i & (1 << 22))		/* write CPSR from SPSR	*/
+
+#define RN_BITS(i)	((i >> 16) & 15)	/* Rn			*/
+#define RD_BITS(i)	((i >> 12) & 15)	/* Rd			*/
+#define RM_BITS(i)	(i & 15)		/* Rm			*/
+
+#define REGMASK_BITS(i)	(i & 0xffff)
+#define OFFSET_BITS(i)	(i & 0x0fff)
+
+#define IS_SHIFT(i)	(i & 0x0ff0)
+#define SHIFT_BITS(i)	((i >> 7) & 0x1f)
+#define SHIFT_TYPE(i)	(i & 0x60)
+#define SHIFT_LSL	0x00
+#define SHIFT_LSR	0x20
+#define SHIFT_ASR	0x40
+#define SHIFT_RORRRX	0x60
+
+#define BAD_INSTR 	0xdeadc0de
+
+/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
+#define IS_T32(hi16) \
+	(((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
+
+static unsigned long ai_user;
+static unsigned long ai_sys;
+static void *ai_sys_last_pc;
+static unsigned long ai_skipped;
+static unsigned long ai_half;
+static unsigned long ai_word;
+static unsigned long ai_dword;
+static unsigned long ai_multi;
+static int ai_usermode;
+static unsigned long cr_no_alignment;
+
+core_param(alignment, ai_usermode, int, 0600);
+
+#define UM_WARN		(1 << 0)
+#define UM_FIXUP	(1 << 1)
+#define UM_SIGNAL	(1 << 2)
+
+/* Return true if and only if the ARMv6 unaligned access model is in use. */
+static bool cpu_is_v6_unaligned(void)
+{
+	return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U;
+}
+
+static int safe_usermode(int new_usermode, bool warn)
+{
+	/*
+	 * ARMv6 and later CPUs can perform unaligned accesses for
+	 * most single load and store instructions up to word size.
+	 * LDM, STM, LDRD and STRD still need to be handled.
+	 *
+	 * Ignoring the alignment fault is not an option on these
+	 * CPUs since we spin re-faulting the instruction without
+	 * making any progress.
+	 */
+	if (cpu_is_v6_unaligned() && !(new_usermode & (UM_FIXUP | UM_SIGNAL))) {
+		new_usermode |= UM_FIXUP;
+
+		if (warn)
+			pr_warn("alignment: ignoring faults is unsafe on this CPU.  Defaulting to fixup mode.\n");
+	}
+
+	return new_usermode;
+}
+
+#ifdef CONFIG_PROC_FS
+static const char *usermode_action[] = {
+	"ignored",
+	"warn",
+	"fixup",
+	"fixup+warn",
+	"signal",
+	"signal+warn"
+};
+
+static int alignment_proc_show(struct seq_file *m, void *v)
+{
+	seq_printf(m, "User:\t\t%lu\n", ai_user);
+	seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc);
+	seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
+	seq_printf(m, "Half:\t\t%lu\n", ai_half);
+	seq_printf(m, "Word:\t\t%lu\n", ai_word);
+	if (cpu_architecture() >= CPU_ARCH_ARMv5TE)
+		seq_printf(m, "DWord:\t\t%lu\n", ai_dword);
+	seq_printf(m, "Multi:\t\t%lu\n", ai_multi);
+	seq_printf(m, "User faults:\t%i (%s)\n", ai_usermode,
+			usermode_action[ai_usermode]);
+
+	return 0;
+}
+
+static int alignment_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, alignment_proc_show, NULL);
+}
+
+static ssize_t alignment_proc_write(struct file *file, const char __user *buffer,
+				    size_t count, loff_t *pos)
+{
+	char mode;
+
+	if (count > 0) {
+		if (get_user(mode, buffer))
+			return -EFAULT;
+		if (mode >= '0' && mode <= '5')
+			ai_usermode = safe_usermode(mode - '0', true);
+	}
+	return count;
+}
+
+static const struct file_operations alignment_proc_fops = {
+	.open		= alignment_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= alignment_proc_write,
+};
+#endif /* CONFIG_PROC_FS */
+
+union offset_union {
+	unsigned long un;
+	  signed long sn;
+};
+
+#define TYPE_ERROR	0
+#define TYPE_FAULT	1
+#define TYPE_LDST	2
+#define TYPE_DONE	3
+
+#ifdef __ARMEB__
+#define BE		1
+#define FIRST_BYTE_16	"mov	%1, %1, ror #8\n"
+#define FIRST_BYTE_32	"mov	%1, %1, ror #24\n"
+#define NEXT_BYTE	"ror #24"
+#else
+#define BE		0
+#define FIRST_BYTE_16
+#define FIRST_BYTE_32
+#define NEXT_BYTE	"lsr #8"
+#endif
+
+#define __get8_unaligned_check(ins,val,addr,err)	\
+	__asm__(					\
+ ARM(	"1:	"ins"	%1, [%2], #1\n"	)		\
+ THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
+ THUMB(	"	add	%2, %2, #1\n"	)		\
+	"2:\n"						\
+	"	.pushsection .text.fixup,\"ax\"\n"	\
+	"	.align	2\n"				\
+	"3:	mov	%0, #1\n"			\
+	"	b	2b\n"				\
+	"	.popsection\n"				\
+	"	.pushsection __ex_table,\"a\"\n"	\
+	"	.align	3\n"				\
+	"	.long	1b, 3b\n"			\
+	"	.popsection\n"				\
+	: "=r" (err), "=&r" (val), "=r" (addr)		\
+	: "0" (err), "2" (addr))
+
+#define __get16_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v, a = addr;		\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val =  v << ((BE) ? 8 : 0);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ? 0 : 8);			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define get16_unaligned_check(val,addr) \
+	__get16_unaligned_check("ldrb",val,addr)
+
+#define get16t_unaligned_check(val,addr) \
+	__get16_unaligned_check("ldrbt",val,addr)
+
+#define __get32_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v, a = addr;		\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val =  v << ((BE) ? 24 :  0);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ? 16 :  8);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ?  8 : 16);			\
+		__get8_unaligned_check(ins,v,a,err);		\
+		val |= v << ((BE) ?  0 : 24);			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define get32_unaligned_check(val,addr) \
+	__get32_unaligned_check("ldrb",val,addr)
+
+#define get32t_unaligned_check(val,addr) \
+	__get32_unaligned_check("ldrbt",val,addr)
+
+#define __put16_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v = val, a = addr;	\
+		__asm__( FIRST_BYTE_16				\
+	 ARM(	"1:	"ins"	%1, [%2], #1\n"	)		\
+	 THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
+	 THUMB(	"	add	%2, %2, #1\n"	)		\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"2:	"ins"	%1, [%2]\n"			\
+		"3:\n"						\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
+		"	.align	2\n"				\
+		"4:	mov	%0, #1\n"			\
+		"	b	3b\n"				\
+		"	.popsection\n"				\
+		"	.pushsection __ex_table,\"a\"\n"	\
+		"	.align	3\n"				\
+		"	.long	1b, 4b\n"			\
+		"	.long	2b, 4b\n"			\
+		"	.popsection\n"				\
+		: "=r" (err), "=&r" (v), "=&r" (a)		\
+		: "0" (err), "1" (v), "2" (a));			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define put16_unaligned_check(val,addr)  \
+	__put16_unaligned_check("strb",val,addr)
+
+#define put16t_unaligned_check(val,addr) \
+	__put16_unaligned_check("strbt",val,addr)
+
+#define __put32_unaligned_check(ins,val,addr)			\
+	do {							\
+		unsigned int err = 0, v = val, a = addr;	\
+		__asm__( FIRST_BYTE_32				\
+	 ARM(	"1:	"ins"	%1, [%2], #1\n"	)		\
+	 THUMB(	"1:	"ins"	%1, [%2]\n"	)		\
+	 THUMB(	"	add	%2, %2, #1\n"	)		\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+	 ARM(	"2:	"ins"	%1, [%2], #1\n"	)		\
+	 THUMB(	"2:	"ins"	%1, [%2]\n"	)		\
+	 THUMB(	"	add	%2, %2, #1\n"	)		\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+	 ARM(	"3:	"ins"	%1, [%2], #1\n"	)		\
+	 THUMB(	"3:	"ins"	%1, [%2]\n"	)		\
+	 THUMB(	"	add	%2, %2, #1\n"	)		\
+		"	mov	%1, %1, "NEXT_BYTE"\n"		\
+		"4:	"ins"	%1, [%2]\n"			\
+		"5:\n"						\
+		"	.pushsection .text.fixup,\"ax\"\n"	\
+		"	.align	2\n"				\
+		"6:	mov	%0, #1\n"			\
+		"	b	5b\n"				\
+		"	.popsection\n"				\
+		"	.pushsection __ex_table,\"a\"\n"	\
+		"	.align	3\n"				\
+		"	.long	1b, 6b\n"			\
+		"	.long	2b, 6b\n"			\
+		"	.long	3b, 6b\n"			\
+		"	.long	4b, 6b\n"			\
+		"	.popsection\n"				\
+		: "=r" (err), "=&r" (v), "=&r" (a)		\
+		: "0" (err), "1" (v), "2" (a));			\
+		if (err)					\
+			goto fault;				\
+	} while (0)
+
+#define put32_unaligned_check(val,addr) \
+	__put32_unaligned_check("strb", val, addr)
+
+#define put32t_unaligned_check(val,addr) \
+	__put32_unaligned_check("strbt", val, addr)
+
+static void
+do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset)
+{
+	if (!LDST_U_BIT(instr))
+		offset.un = -offset.un;
+
+	if (!LDST_P_BIT(instr))
+		addr += offset.un;
+
+	if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
+		regs->uregs[RN_BITS(instr)] = addr;
+}
+
+static int
+do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+
+	ai_half += 1;
+
+	if (user_mode(regs))
+		goto user;
+
+	if (LDST_L_BIT(instr)) {
+		unsigned long val;
+		get16_unaligned_check(val, addr);
+
+		/* signed half-word? */
+		if (instr & 0x40)
+			val = (signed long)((signed short) val);
+
+		regs->uregs[rd] = val;
+	} else
+		put16_unaligned_check(regs->uregs[rd], addr);
+
+	return TYPE_LDST;
+
+ user:
+	if (LDST_L_BIT(instr)) {
+		unsigned long val;
+		unsigned int __ua_flags = uaccess_save_and_enable();
+
+		get16t_unaligned_check(val, addr);
+		uaccess_restore(__ua_flags);
+
+		/* signed half-word? */
+		if (instr & 0x40)
+			val = (signed long)((signed short) val);
+
+		regs->uregs[rd] = val;
+	} else {
+		unsigned int __ua_flags = uaccess_save_and_enable();
+		put16t_unaligned_check(regs->uregs[rd], addr);
+		uaccess_restore(__ua_flags);
+	}
+
+	return TYPE_LDST;
+
+ fault:
+	return TYPE_FAULT;
+}
+
+static int
+do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
+		      struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+	unsigned int rd2;
+	int load;
+
+	if ((instr & 0xfe000000) == 0xe8000000) {
+		/* ARMv7 Thumb-2 32-bit LDRD/STRD */
+		rd2 = (instr >> 8) & 0xf;
+		load = !!(LDST_L_BIT(instr));
+	} else if (((rd & 1) == 1) || (rd == 14))
+		goto bad;
+	else {
+		load = ((instr & 0xf0) == 0xd0);
+		rd2 = rd + 1;
+	}
+
+	ai_dword += 1;
+
+	if (user_mode(regs))
+		goto user;
+
+	if (load) {
+		unsigned long val;
+		get32_unaligned_check(val, addr);
+		regs->uregs[rd] = val;
+		get32_unaligned_check(val, addr + 4);
+		regs->uregs[rd2] = val;
+	} else {
+		put32_unaligned_check(regs->uregs[rd], addr);
+		put32_unaligned_check(regs->uregs[rd2], addr + 4);
+	}
+
+	return TYPE_LDST;
+
+ user:
+	if (load) {
+		unsigned long val, val2;
+		unsigned int __ua_flags = uaccess_save_and_enable();
+
+		get32t_unaligned_check(val, addr);
+		get32t_unaligned_check(val2, addr + 4);
+
+		uaccess_restore(__ua_flags);
+
+		regs->uregs[rd] = val;
+		regs->uregs[rd2] = val2;
+	} else {
+		unsigned int __ua_flags = uaccess_save_and_enable();
+		put32t_unaligned_check(regs->uregs[rd], addr);
+		put32t_unaligned_check(regs->uregs[rd2], addr + 4);
+		uaccess_restore(__ua_flags);
+	}
+
+	return TYPE_LDST;
+ bad:
+	return TYPE_ERROR;
+ fault:
+	return TYPE_FAULT;
+}
+
+static int
+do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd = RD_BITS(instr);
+
+	ai_word += 1;
+
+	if ((!LDST_P_BIT(instr) && LDST_W_BIT(instr)) || user_mode(regs))
+		goto trans;
+
+	if (LDST_L_BIT(instr)) {
+		unsigned int val;
+		get32_unaligned_check(val, addr);
+		regs->uregs[rd] = val;
+	} else
+		put32_unaligned_check(regs->uregs[rd], addr);
+	return TYPE_LDST;
+
+ trans:
+	if (LDST_L_BIT(instr)) {
+		unsigned int val;
+		unsigned int __ua_flags = uaccess_save_and_enable();
+		get32t_unaligned_check(val, addr);
+		uaccess_restore(__ua_flags);
+		regs->uregs[rd] = val;
+	} else {
+		unsigned int __ua_flags = uaccess_save_and_enable();
+		put32t_unaligned_check(regs->uregs[rd], addr);
+		uaccess_restore(__ua_flags);
+	}
+	return TYPE_LDST;
+
+ fault:
+	return TYPE_FAULT;
+}
+
+/*
+ * LDM/STM alignment handler.
+ *
+ * There are 4 variants of this instruction:
+ *
+ * B = rn pointer before instruction, A = rn pointer after instruction
+ *              ------ increasing address ----->
+ *	        |    | r0 | r1 | ... | rx |    |
+ * PU = 01             B                    A
+ * PU = 11        B                    A
+ * PU = 00        A                    B
+ * PU = 10             A                    B
+ */
+static int
+do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+{
+	unsigned int rd, rn, correction, nr_regs, regbits;
+	unsigned long eaddr, newaddr;
+
+	if (LDM_S_BIT(instr))
+		goto bad;
+
+	correction = 4; /* processor implementation defined */
+	regs->ARM_pc += correction;
+
+	ai_multi += 1;
+
+	/* count the number of registers in the mask to be transferred */
+	nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
+
+	rn = RN_BITS(instr);
+	newaddr = eaddr = regs->uregs[rn];
+
+	if (!LDST_U_BIT(instr))
+		nr_regs = -nr_regs;
+	newaddr += nr_regs;
+	if (!LDST_U_BIT(instr))
+		eaddr = newaddr;
+
+	if (LDST_P_EQ_U(instr))	/* U = P */
+		eaddr += 4;
+
+	/*
+	 * For alignment faults on the ARM922T/ARM920T the MMU  makes
+	 * the FSR (and hence addr) equal to the updated base address
+	 * of the multiple access rather than the restored value.
+	 * Switch this message off if we've got a ARM92[02], otherwise
+	 * [ls]dm alignment faults are noisy!
+	 */
+#if !(defined CONFIG_CPU_ARM922T)  && !(defined CONFIG_CPU_ARM920T)
+	/*
+	 * This is a "hint" - we already have eaddr worked out by the
+	 * processor for us.
+	 */
+	if (addr != eaddr) {
+		pr_err("LDMSTM: PC = %08lx, instr = %08lx, "
+			"addr = %08lx, eaddr = %08lx\n",
+			 instruction_pointer(regs), instr, addr, eaddr);
+		show_regs(regs);
+	}
+#endif
+
+	if (user_mode(regs)) {
+		unsigned int __ua_flags = uaccess_save_and_enable();
+		for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+		     regbits >>= 1, rd += 1)
+			if (regbits & 1) {
+				if (LDST_L_BIT(instr)) {
+					unsigned int val;
+					get32t_unaligned_check(val, eaddr);
+					regs->uregs[rd] = val;
+				} else
+					put32t_unaligned_check(regs->uregs[rd], eaddr);
+				eaddr += 4;
+			}
+		uaccess_restore(__ua_flags);
+	} else {
+		for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
+		     regbits >>= 1, rd += 1)
+			if (regbits & 1) {
+				if (LDST_L_BIT(instr)) {
+					unsigned int val;
+					get32_unaligned_check(val, eaddr);
+					regs->uregs[rd] = val;
+				} else
+					put32_unaligned_check(regs->uregs[rd], eaddr);
+				eaddr += 4;
+			}
+	}
+
+	if (LDST_W_BIT(instr))
+		regs->uregs[rn] = newaddr;
+	if (!LDST_L_BIT(instr) || !(REGMASK_BITS(instr) & (1 << 15)))
+		regs->ARM_pc -= correction;
+	return TYPE_DONE;
+
+fault:
+	regs->ARM_pc -= correction;
+	return TYPE_FAULT;
+
+bad:
+	pr_err("Alignment trap: not handling ldm with s-bit set\n");
+	return TYPE_ERROR;
+}
+
+/*
+ * Convert Thumb ld/st instruction forms to equivalent ARM instructions so
+ * we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * This implementation was initially based on the algorithm found in
+ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
+ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
+ *
+ * NOTES:
+ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
+ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
+ *    decode, we return 0xdeadc0de. This should never happen under normal
+ *    circumstances but if it does, we've got other problems to deal with
+ *    elsewhere and we obviously can't fix those problems here.
+ */
+
+static unsigned long
+thumb2arm(u16 tinstr)
+{
+	u32 L = (tinstr & (1<<11)) >> 11;
+
+	switch ((tinstr & 0xf800) >> 11) {
+	/* 6.5.1 Format 1: */
+	case 0x6000 >> 11:				/* 7.1.52 STR(1) */
+	case 0x6800 >> 11:				/* 7.1.26 LDR(1) */
+	case 0x7000 >> 11:				/* 7.1.55 STRB(1) */
+	case 0x7800 >> 11:				/* 7.1.30 LDRB(1) */
+		return 0xe5800000 |
+			((tinstr & (1<<12)) << (22-12)) |	/* fixup */
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<0)) << (12-0)) |		/* Rd */
+			((tinstr & (7<<3)) << (16-3)) |		/* Rn */
+			((tinstr & (31<<6)) >>			/* immed_5 */
+				(6 - ((tinstr & (1<<12)) ? 0 : 2)));
+	case 0x8000 >> 11:				/* 7.1.57 STRH(1) */
+	case 0x8800 >> 11:				/* 7.1.32 LDRH(1) */
+		return 0xe1c000b0 |
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<0)) << (12-0)) |		/* Rd */
+			((tinstr & (7<<3)) << (16-3)) |		/* Rn */
+			((tinstr & (7<<6)) >> (6-1)) |	 /* immed_5[2:0] */
+			((tinstr & (3<<9)) >> (9-8));	 /* immed_5[4:3] */
+
+	/* 6.5.1 Format 2: */
+	case 0x5000 >> 11:
+	case 0x5800 >> 11:
+		{
+			static const u32 subset[8] = {
+				0xe7800000,		/* 7.1.53 STR(2) */
+				0xe18000b0,		/* 7.1.58 STRH(2) */
+				0xe7c00000,		/* 7.1.56 STRB(2) */
+				0xe19000d0,		/* 7.1.34 LDRSB */
+				0xe7900000,		/* 7.1.27 LDR(2) */
+				0xe19000b0,		/* 7.1.33 LDRH(2) */
+				0xe7d00000,		/* 7.1.31 LDRB(2) */
+				0xe19000f0		/* 7.1.35 LDRSH */
+			};
+			return subset[(tinstr & (7<<9)) >> 9] |
+			    ((tinstr & (7<<0)) << (12-0)) |	/* Rd */
+			    ((tinstr & (7<<3)) << (16-3)) |	/* Rn */
+			    ((tinstr & (7<<6)) >> (6-0));	/* Rm */
+		}
+
+	/* 6.5.1 Format 3: */
+	case 0x4800 >> 11:				/* 7.1.28 LDR(3) */
+		/* NOTE: This case is not technically possible. We're
+		 *	 loading 32-bit memory data via PC relative
+		 *	 addressing mode. So we can and should eliminate
+		 *	 this case. But I'll leave it here for now.
+		 */
+		return 0xe59f0000 |
+		    ((tinstr & (7<<8)) << (12-8)) |		/* Rd */
+		    ((tinstr & 255) << (2-0));			/* immed_8 */
+
+	/* 6.5.1 Format 4: */
+	case 0x9000 >> 11:				/* 7.1.54 STR(3) */
+	case 0x9800 >> 11:				/* 7.1.29 LDR(4) */
+		return 0xe58d0000 |
+			(L<<20) |				/* L==1? */
+			((tinstr & (7<<8)) << (12-8)) |		/* Rd */
+			((tinstr & 255) << 2);			/* immed_8 */
+
+	/* 6.6.1 Format 1: */
+	case 0xc000 >> 11:				/* 7.1.51 STMIA */
+	case 0xc800 >> 11:				/* 7.1.25 LDMIA */
+		{
+			u32 Rn = (tinstr & (7<<8)) >> 8;
+			u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
+
+			return 0xe8800000 | W | (L<<20) | (Rn<<16) |
+				(tinstr&255);
+		}
+
+	/* 6.6.1 Format 2: */
+	case 0xb000 >> 11:				/* 7.1.48 PUSH */
+	case 0xb800 >> 11:				/* 7.1.47 POP */
+		if ((tinstr & (3 << 9)) == 0x0400) {
+			static const u32 subset[4] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe92d4000,	/* STMDB sp!,{registers,lr} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+				0xe8bd8000	/* LDMIA sp!,{registers,pc} */
+			};
+			return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
+			    (tinstr & 255);		/* register_list */
+		}
+		/* Else fall through for illegal instruction case */
+
+	default:
+		return BAD_INSTR;
+	}
+}
+
+/*
+ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
+ * handlable by ARM alignment handler, also find the corresponding handler,
+ * so that we can reuse ARM userland alignment fault fixups for Thumb.
+ *
+ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
+ * @regs: register context.
+ * @poffset: return offset from faulted addr for later writeback
+ *
+ * NOTES:
+ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
+ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
+ */
+static void *
+do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
+			    union offset_union *poffset)
+{
+	unsigned long instr = *pinstr;
+	u16 tinst1 = (instr >> 16) & 0xffff;
+	u16 tinst2 = instr & 0xffff;
+
+	switch (tinst1 & 0xffe0) {
+	/* A6.3.5 Load/Store multiple */
+	case 0xe880:		/* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
+	case 0xe8a0:		/* ...above writeback version */
+	case 0xe900:		/* STMDB/STMFD, LDMDB/LDMEA */
+	case 0xe920:		/* ...above writeback version */
+		/* no need offset decision since handler calculates it */
+		return do_alignment_ldmstm;
+
+	case 0xf840:		/* POP/PUSH T3 (single register) */
+		if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
+			u32 L = !!(LDST_L_BIT(instr));
+			const u32 subset[2] = {
+				0xe92d0000,	/* STMDB sp!,{registers} */
+				0xe8bd0000,	/* LDMIA sp!,{registers} */
+			};
+			*pinstr = subset[L] | (1<<RD_BITS(instr));
+			return do_alignment_ldmstm;
+		}
+		/* Else fall through for illegal instruction case */
+		break;
+
+	/* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
+	case 0xe860:
+	case 0xe960:
+	case 0xe8e0:
+	case 0xe9e0:
+		poffset->un = (tinst2 & 0xff) << 2;
+	case 0xe940:
+	case 0xe9c0:
+		return do_alignment_ldrdstrd;
+
+	/*
+	 * No need to handle load/store instructions up to word size
+	 * since ARMv6 and later CPUs can perform unaligned accesses.
+	 */
+	default:
+		break;
+	}
+	return NULL;
+}
+
+static int
+do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	union offset_union uninitialized_var(offset);
+	unsigned long instr = 0, instrptr;
+	int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
+	unsigned int type;
+	unsigned int fault;
+	u16 tinstr = 0;
+	int isize = 4;
+	int thumb2_32b = 0;
+
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
+	instrptr = instruction_pointer(regs);
+
+	if (thumb_mode(regs)) {
+		u16 *ptr = (u16 *)(instrptr & ~1);
+		fault = probe_kernel_address(ptr, tinstr);
+		tinstr = __mem_to_opcode_thumb16(tinstr);
+		if (!fault) {
+			if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
+			    IS_T32(tinstr)) {
+				/* Thumb-2 32-bit */
+				u16 tinst2 = 0;
+				fault = probe_kernel_address(ptr + 1, tinst2);
+				tinst2 = __mem_to_opcode_thumb16(tinst2);
+				instr = __opcode_thumb32_compose(tinstr, tinst2);
+				thumb2_32b = 1;
+			} else {
+				isize = 2;
+				instr = thumb2arm(tinstr);
+			}
+		}
+	} else {
+		fault = probe_kernel_address((void *)instrptr, instr);
+		instr = __mem_to_opcode_arm(instr);
+	}
+
+	if (fault) {
+		type = TYPE_FAULT;
+		goto bad_or_fault;
+	}
+
+	if (user_mode(regs))
+		goto user;
+
+	ai_sys += 1;
+	ai_sys_last_pc = (void *)instruction_pointer(regs);
+
+ fixup:
+
+	regs->ARM_pc += isize;
+
+	switch (CODING_BITS(instr)) {
+	case 0x00000000:	/* 3.13.4 load/store instruction extensions */
+		if (LDSTHD_I_BIT(instr))
+			offset.un = (instr & 0xf00) >> 4 | (instr & 15);
+		else
+			offset.un = regs->uregs[RM_BITS(instr)];
+
+		if ((instr & 0x000000f0) == 0x000000b0 || /* LDRH, STRH */
+		    (instr & 0x001000f0) == 0x001000f0)   /* LDRSH */
+			handler = do_alignment_ldrhstrh;
+		else if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */
+			 (instr & 0x001000f0) == 0x000000f0)   /* STRD */
+			handler = do_alignment_ldrdstrd;
+		else if ((instr & 0x01f00ff0) == 0x01000090) /* SWP */
+			goto swp;
+		else
+			goto bad;
+		break;
+
+	case 0x04000000:	/* ldr or str immediate */
+		if (COND_BITS(instr) == 0xf0000000) /* NEON VLDn, VSTn */
+			goto bad;
+		offset.un = OFFSET_BITS(instr);
+		handler = do_alignment_ldrstr;
+		break;
+
+	case 0x06000000:	/* ldr or str register */
+		offset.un = regs->uregs[RM_BITS(instr)];
+
+		if (IS_SHIFT(instr)) {
+			unsigned int shiftval = SHIFT_BITS(instr);
+
+			switch(SHIFT_TYPE(instr)) {
+			case SHIFT_LSL:
+				offset.un <<= shiftval;
+				break;
+
+			case SHIFT_LSR:
+				offset.un >>= shiftval;
+				break;
+
+			case SHIFT_ASR:
+				offset.sn >>= shiftval;
+				break;
+
+			case SHIFT_RORRRX:
+				if (shiftval == 0) {
+					offset.un >>= 1;
+					if (regs->ARM_cpsr & PSR_C_BIT)
+						offset.un |= 1 << 31;
+				} else
+					offset.un = offset.un >> shiftval |
+							  offset.un << (32 - shiftval);
+				break;
+			}
+		}
+		handler = do_alignment_ldrstr;
+		break;
+
+	case 0x08000000:	/* ldm or stm, or thumb-2 32bit instruction */
+		if (thumb2_32b) {
+			offset.un = 0;
+			handler = do_alignment_t32_to_handler(&instr, regs, &offset);
+		} else {
+			offset.un = 0;
+			handler = do_alignment_ldmstm;
+		}
+		break;
+
+	default:
+		goto bad;
+	}
+
+	if (!handler)
+		goto bad;
+	type = handler(addr, instr, regs);
+
+	if (type == TYPE_ERROR || type == TYPE_FAULT) {
+		regs->ARM_pc -= isize;
+		goto bad_or_fault;
+	}
+
+	if (type == TYPE_LDST)
+		do_alignment_finish_ldst(addr, instr, regs, offset);
+
+	return 0;
+
+ bad_or_fault:
+	if (type == TYPE_ERROR)
+		goto bad;
+	/*
+	 * We got a fault - fix it up, or die.
+	 */
+	do_bad_area(addr, fsr, regs);
+	return 0;
+
+ swp:
+	pr_err("Alignment trap: not handling swp instruction\n");
+
+ bad:
+	/*
+	 * Oops, we didn't handle the instruction.
+	 */
+	pr_err("Alignment trap: not handling instruction "
+		"%0*lx at [<%08lx>]\n",
+		isize << 1,
+		isize == 2 ? tinstr : instr, instrptr);
+	ai_skipped += 1;
+	return 1;
+
+ user:
+	ai_user += 1;
+
+	if (ai_usermode & UM_WARN)
+		printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
+		       "Address=0x%08lx FSR 0x%03x\n", current->comm,
+			task_pid_nr(current), instrptr,
+			isize << 1,
+			isize == 2 ? tinstr : instr,
+		        addr, fsr);
+
+	if (ai_usermode & UM_FIXUP)
+		goto fixup;
+
+	if (ai_usermode & UM_SIGNAL) {
+		siginfo_t si;
+
+		clear_siginfo(&si);
+		si.si_signo = SIGBUS;
+		si.si_errno = 0;
+		si.si_code = BUS_ADRALN;
+		si.si_addr = (void __user *)addr;
+
+		force_sig_info(si.si_signo, &si, current);
+	} else {
+		/*
+		 * We're about to disable the alignment trap and return to
+		 * user space.  But if an interrupt occurs before actually
+		 * reaching user space, then the IRQ vector entry code will
+		 * notice that we were still in kernel space and therefore
+		 * the alignment trap won't be re-enabled in that case as it
+		 * is presumed to be always on from kernel space.
+		 * Let's prevent that race by disabling interrupts here (they
+		 * are disabled on the way back to user space anyway in
+		 * entry-common.S) and disable the alignment trap only if
+		 * there is no work pending for this thread.
+		 */
+		raw_local_irq_disable();
+		if (!(current_thread_info()->flags & _TIF_WORK_MASK))
+			set_cr(cr_no_alignment);
+	}
+
+	return 0;
+}
+
+static int __init noalign_setup(char *__unused)
+{
+	set_cr(__clear_cr(CR_A));
+	return 1;
+}
+__setup("noalign", noalign_setup);
+
+/*
+ * This needs to be done after sysctl_init, otherwise sys/ will be
+ * overwritten.  Actually, this shouldn't be in sys/ at all since
+ * it isn't a sysctl, and it doesn't contain sysctl information.
+ * We now locate it in /proc/cpu/alignment instead.
+ */
+static int __init alignment_init(void)
+{
+#ifdef CONFIG_PROC_FS
+	struct proc_dir_entry *res;
+
+	res = proc_create("cpu/alignment", S_IWUSR | S_IRUGO, NULL,
+			  &alignment_proc_fops);
+	if (!res)
+		return -ENOMEM;
+#endif
+
+	if (cpu_is_v6_unaligned()) {
+		set_cr(__clear_cr(CR_A));
+		ai_usermode = safe_usermode(ai_usermode, false);
+	}
+
+	cr_no_alignment = get_cr() & ~CR_A;
+
+	hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN,
+			"alignment exception");
+
+	/*
+	 * ARMv6K and ARMv7 use fault status 3 (0b00011) as Access Flag section
+	 * fault, not as alignment error.
+	 *
+	 * TODO: handle ARMv6K properly. Runtime check for 'K' extension is
+	 * needed.
+	 */
+	if (cpu_architecture() <= CPU_ARCH_ARMv6) {
+		hook_fault_code(3, do_alignment, SIGBUS, BUS_ADRALN,
+				"alignment exception");
+	}
+
+	return 0;
+}
+
+fs_initcall(alignment_init);
diff --git a/arch/arm/mm/cache-aurora-l2.h b/arch/arm/mm/cache-aurora-l2.h
new file mode 100644
index 0000000..c861247
--- /dev/null
+++ b/arch/arm/mm/cache-aurora-l2.h
@@ -0,0 +1,55 @@
+/*
+ * AURORA shared L2 cache controller support
+ *
+ * Copyright (C) 2012 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef __ASM_ARM_HARDWARE_AURORA_L2_H
+#define __ASM_ARM_HARDWARE_AURORA_L2_H
+
+#define AURORA_SYNC_REG		    0x700
+#define AURORA_RANGE_BASE_ADDR_REG  0x720
+#define AURORA_FLUSH_PHY_ADDR_REG   0x7f0
+#define AURORA_INVAL_RANGE_REG	    0x774
+#define AURORA_CLEAN_RANGE_REG	    0x7b4
+#define AURORA_FLUSH_RANGE_REG	    0x7f4
+
+#define AURORA_ACR_REPLACEMENT_OFFSET	    27
+#define AURORA_ACR_REPLACEMENT_MASK	     \
+	(0x3 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_WAYRR    \
+	(0 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_LFSR     \
+	(1 << AURORA_ACR_REPLACEMENT_OFFSET)
+#define AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU \
+	(3 << AURORA_ACR_REPLACEMENT_OFFSET)
+
+#define AURORA_ACR_FORCE_WRITE_POLICY_OFFSET	0
+#define AURORA_ACR_FORCE_WRITE_POLICY_MASK	\
+	(0x3 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_POLICY_DIS	\
+	(0 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_BACK_POLICY	\
+	(1 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+#define AURORA_ACR_FORCE_WRITE_THRO_POLICY	\
+	(2 << AURORA_ACR_FORCE_WRITE_POLICY_OFFSET)
+
+#define MAX_RANGE_SIZE		1024
+
+#define AURORA_WAY_SIZE_SHIFT	2
+
+#define AURORA_CTRL_FW		0x100
+
+/* chose a number outside L2X0_CACHE_ID_PART_MASK to be sure to make
+ * the distinction between a number coming from hardware and a number
+ * coming from the device tree */
+#define AURORA_CACHE_ID	       0x100
+
+#endif /* __ASM_ARM_HARDWARE_AURORA_L2_H */
diff --git a/arch/arm/mm/cache-b15-rac.c b/arch/arm/mm/cache-b15-rac.c
new file mode 100644
index 0000000..c6ed148
--- /dev/null
+++ b/arch/arm/mm/cache-b15-rac.c
@@ -0,0 +1,378 @@
+/*
+ * Broadcom Brahma-B15 CPU read-ahead cache management functions
+ *
+ * Copyright (C) 2015-2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+#include <linux/of_address.h>
+#include <linux/notifier.h>
+#include <linux/cpu.h>
+#include <linux/syscore_ops.h>
+#include <linux/reboot.h>
+
+#include <asm/cacheflush.h>
+#include <asm/hardware/cache-b15-rac.h>
+
+extern void v7_flush_kern_cache_all(void);
+
+/* RAC register offsets, relative to the HIF_CPU_BIUCTRL register base */
+#define RAC_CONFIG0_REG			(0x78)
+#define  RACENPREF_MASK			(0x3)
+#define  RACPREFINST_SHIFT		(0)
+#define  RACENINST_SHIFT		(2)
+#define  RACPREFDATA_SHIFT		(4)
+#define  RACENDATA_SHIFT		(6)
+#define  RAC_CPU_SHIFT			(8)
+#define  RACCFG_MASK			(0xff)
+#define RAC_CONFIG1_REG			(0x7c)
+/* Brahma-B15 is a quad-core only design */
+#define B15_RAC_FLUSH_REG		(0x80)
+/* Brahma-B53 is an octo-core design */
+#define B53_RAC_FLUSH_REG		(0x84)
+#define  FLUSH_RAC			(1 << 0)
+
+/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
+#define RAC_DATA_INST_EN_MASK		(1 << RACPREFINST_SHIFT | \
+					 RACENPREF_MASK << RACENINST_SHIFT | \
+					 1 << RACPREFDATA_SHIFT | \
+					 RACENPREF_MASK << RACENDATA_SHIFT)
+
+#define RAC_ENABLED			0
+/* Special state where we want to bypass the spinlock and call directly
+ * into the v7 cache maintenance operations during suspend/resume
+ */
+#define RAC_SUSPENDED			1
+
+static void __iomem *b15_rac_base;
+static DEFINE_SPINLOCK(rac_lock);
+
+static u32 rac_config0_reg;
+static u32 rac_flush_offset;
+
+/* Initialization flag to avoid checking for b15_rac_base, and to prevent
+ * multi-platform kernels from crashing here as well.
+ */
+static unsigned long b15_rac_flags;
+
+static inline u32 __b15_rac_disable(void)
+{
+	u32 val = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+	__raw_writel(0, b15_rac_base + RAC_CONFIG0_REG);
+	dmb();
+	return val;
+}
+
+static inline void __b15_rac_flush(void)
+{
+	u32 reg;
+
+	__raw_writel(FLUSH_RAC, b15_rac_base + rac_flush_offset);
+	do {
+		/* This dmb() is required to force the Bus Interface Unit
+		 * to clean oustanding writes, and forces an idle cycle
+		 * to be inserted.
+		 */
+		dmb();
+		reg = __raw_readl(b15_rac_base + rac_flush_offset);
+	} while (reg & FLUSH_RAC);
+}
+
+static inline u32 b15_rac_disable_and_flush(void)
+{
+	u32 reg;
+
+	reg = __b15_rac_disable();
+	__b15_rac_flush();
+	return reg;
+}
+
+static inline void __b15_rac_enable(u32 val)
+{
+	__raw_writel(val, b15_rac_base + RAC_CONFIG0_REG);
+	/* dsb() is required here to be consistent with __flush_icache_all() */
+	dsb();
+}
+
+#define BUILD_RAC_CACHE_OP(name, bar)				\
+void b15_flush_##name(void)					\
+{								\
+	unsigned int do_flush;					\
+	u32 val = 0;						\
+								\
+	if (test_bit(RAC_SUSPENDED, &b15_rac_flags)) {		\
+		v7_flush_##name();				\
+		bar;						\
+		return;						\
+	}							\
+								\
+	spin_lock(&rac_lock);					\
+	do_flush = test_bit(RAC_ENABLED, &b15_rac_flags);	\
+	if (do_flush)						\
+		val = b15_rac_disable_and_flush();		\
+	v7_flush_##name();					\
+	if (!do_flush)						\
+		bar;						\
+	else							\
+		__b15_rac_enable(val);				\
+	spin_unlock(&rac_lock);					\
+}
+
+#define nobarrier
+
+/* The readahead cache present in the Brahma-B15 CPU is a special piece of
+ * hardware after the integrated L2 cache of the B15 CPU complex whose purpose
+ * is to prefetch instruction and/or data with a line size of either 64 bytes
+ * or 256 bytes. The rationale is that the data-bus of the CPU interface is
+ * optimized for 256-bytes transactions, and enabling the readahead cache
+ * provides a significant performance boost we want it enabled (typically
+ * twice the performance for a memcpy benchmark application).
+ *
+ * The readahead cache is transparent for Modified Virtual Addresses
+ * cache maintenance operations: ICIMVAU, DCIMVAC, DCCMVAC, DCCMVAU and
+ * DCCIMVAC.
+ *
+ * It is however not transparent for the following cache maintenance
+ * operations: DCISW, DCCSW, DCCISW, ICIALLUIS and ICIALLU which is precisely
+ * what we are patching here with our BUILD_RAC_CACHE_OP here.
+ */
+BUILD_RAC_CACHE_OP(kern_cache_all, nobarrier);
+
+static void b15_rac_enable(void)
+{
+	unsigned int cpu;
+	u32 enable = 0;
+
+	for_each_possible_cpu(cpu)
+		enable |= (RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT));
+
+	b15_rac_disable_and_flush();
+	__b15_rac_enable(enable);
+}
+
+static int b15_rac_reboot_notifier(struct notifier_block *nb,
+				   unsigned long action,
+				   void *data)
+{
+	/* During kexec, we are not yet migrated on the boot CPU, so we need to
+	 * make sure we are SMP safe here. Once the RAC is disabled, flag it as
+	 * suspended such that the hotplug notifier returns early.
+	 */
+	if (action == SYS_RESTART) {
+		spin_lock(&rac_lock);
+		b15_rac_disable_and_flush();
+		clear_bit(RAC_ENABLED, &b15_rac_flags);
+		set_bit(RAC_SUSPENDED, &b15_rac_flags);
+		spin_unlock(&rac_lock);
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block b15_rac_reboot_nb = {
+	.notifier_call	= b15_rac_reboot_notifier,
+};
+
+/* The CPU hotplug case is the most interesting one, we basically need to make
+ * sure that the RAC is disabled for the entire system prior to having a CPU
+ * die, in particular prior to this dying CPU having exited the coherency
+ * domain.
+ *
+ * Once this CPU is marked dead, we can safely re-enable the RAC for the
+ * remaining CPUs in the system which are still online.
+ *
+ * Offlining a CPU is the problematic case, onlining a CPU is not much of an
+ * issue since the CPU and its cache-level hierarchy will start filling with
+ * the RAC disabled, so L1 and L2 only.
+ *
+ * In this function, we should NOT have to verify any unsafe setting/condition
+ * b15_rac_base:
+ *
+ *   It is protected by the RAC_ENABLED flag which is cleared by default, and
+ *   being cleared when initial procedure is done. b15_rac_base had been set at
+ *   that time.
+ *
+ * RAC_ENABLED:
+ *   There is a small timing windows, in b15_rac_init(), between
+ *      cpuhp_setup_state_*()
+ *      ...
+ *      set RAC_ENABLED
+ *   However, there is no hotplug activity based on the Linux booting procedure.
+ *
+ * Since we have to disable RAC for all cores, we keep RAC on as long as as
+ * possible (disable it as late as possible) to gain the cache benefit.
+ *
+ * Thus, dying/dead states are chosen here
+ *
+ * We are choosing not do disable the RAC on a per-CPU basis, here, if we did
+ * we would want to consider disabling it as early as possible to benefit the
+ * other active CPUs.
+ */
+
+/* Running on the dying CPU */
+static int b15_rac_dying_cpu(unsigned int cpu)
+{
+	/* During kexec/reboot, the RAC is disabled via the reboot notifier
+	 * return early here.
+	 */
+	if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+		return 0;
+
+	spin_lock(&rac_lock);
+
+	/* Indicate that we are starting a hotplug procedure */
+	__clear_bit(RAC_ENABLED, &b15_rac_flags);
+
+	/* Disable the readahead cache and save its value to a global */
+	rac_config0_reg = b15_rac_disable_and_flush();
+
+	spin_unlock(&rac_lock);
+
+	return 0;
+}
+
+/* Running on a non-dying CPU */
+static int b15_rac_dead_cpu(unsigned int cpu)
+{
+	/* During kexec/reboot, the RAC is disabled via the reboot notifier
+	 * return early here.
+	 */
+	if (test_bit(RAC_SUSPENDED, &b15_rac_flags))
+		return 0;
+
+	spin_lock(&rac_lock);
+
+	/* And enable it */
+	__b15_rac_enable(rac_config0_reg);
+	__set_bit(RAC_ENABLED, &b15_rac_flags);
+
+	spin_unlock(&rac_lock);
+
+	return 0;
+}
+
+static int b15_rac_suspend(void)
+{
+	/* Suspend the read-ahead cache oeprations, forcing our cache
+	 * implementation to fallback to the regular ARMv7 calls.
+	 *
+	 * We are guaranteed to be running on the boot CPU at this point and
+	 * with every other CPU quiesced, so setting RAC_SUSPENDED is not racy
+	 * here.
+	 */
+	rac_config0_reg = b15_rac_disable_and_flush();
+	set_bit(RAC_SUSPENDED, &b15_rac_flags);
+
+	return 0;
+}
+
+static void b15_rac_resume(void)
+{
+	/* Coming out of a S3 suspend/resume cycle, the read-ahead cache
+	 * register RAC_CONFIG0_REG will be restored to its default value, make
+	 * sure we re-enable it and set the enable flag, we are also guaranteed
+	 * to run on the boot CPU, so not racy again.
+	 */
+	__b15_rac_enable(rac_config0_reg);
+	clear_bit(RAC_SUSPENDED, &b15_rac_flags);
+}
+
+static struct syscore_ops b15_rac_syscore_ops = {
+	.suspend	= b15_rac_suspend,
+	.resume		= b15_rac_resume,
+};
+
+static int __init b15_rac_init(void)
+{
+	struct device_node *dn, *cpu_dn;
+	int ret = 0, cpu;
+	u32 reg, en_mask = 0;
+
+	dn = of_find_compatible_node(NULL, NULL, "brcm,brcmstb-cpu-biu-ctrl");
+	if (!dn)
+		return -ENODEV;
+
+	if (WARN(num_possible_cpus() > 4, "RAC only supports 4 CPUs\n"))
+		goto out;
+
+	b15_rac_base = of_iomap(dn, 0);
+	if (!b15_rac_base) {
+		pr_err("failed to remap BIU control base\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	cpu_dn = of_get_cpu_node(0, NULL);
+	if (!cpu_dn) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (of_device_is_compatible(cpu_dn, "brcm,brahma-b15"))
+		rac_flush_offset = B15_RAC_FLUSH_REG;
+	else if (of_device_is_compatible(cpu_dn, "brcm,brahma-b53"))
+		rac_flush_offset = B53_RAC_FLUSH_REG;
+	else {
+		pr_err("Unsupported CPU\n");
+		of_node_put(cpu_dn);
+		ret = -EINVAL;
+		goto out;
+	}
+	of_node_put(cpu_dn);
+
+	ret = register_reboot_notifier(&b15_rac_reboot_nb);
+	if (ret) {
+		pr_err("failed to register reboot notifier\n");
+		iounmap(b15_rac_base);
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+		ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
+					"arm/cache-b15-rac:dead",
+					NULL, b15_rac_dead_cpu);
+		if (ret)
+			goto out_unmap;
+
+		ret = cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING,
+					"arm/cache-b15-rac:dying",
+					NULL, b15_rac_dying_cpu);
+		if (ret)
+			goto out_cpu_dead;
+	}
+
+	if (IS_ENABLED(CONFIG_PM_SLEEP))
+		register_syscore_ops(&b15_rac_syscore_ops);
+
+	spin_lock(&rac_lock);
+	reg = __raw_readl(b15_rac_base + RAC_CONFIG0_REG);
+	for_each_possible_cpu(cpu)
+		en_mask |= ((1 << RACPREFDATA_SHIFT) << (cpu * RAC_CPU_SHIFT));
+	WARN(reg & en_mask, "Read-ahead cache not previously disabled\n");
+
+	b15_rac_enable();
+	set_bit(RAC_ENABLED, &b15_rac_flags);
+	spin_unlock(&rac_lock);
+
+	pr_info("Broadcom Brahma-B15 readahead cache at: 0x%p\n",
+		b15_rac_base + RAC_CONFIG0_REG);
+
+	goto out;
+
+out_cpu_dead:
+	cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CACHE_B15_RAC_DYING);
+out_unmap:
+	unregister_reboot_notifier(&b15_rac_reboot_nb);
+	iounmap(b15_rac_base);
+out:
+	of_node_put(dn);
+	return ret;
+}
+arch_initcall(b15_rac_init);
diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S
new file mode 100644
index 0000000..2f0c588
--- /dev/null
+++ b/arch/arm/mm/cache-fa.S
@@ -0,0 +1,250 @@
+/*
+ *  linux/arch/arm/mm/cache-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on cache-v4wb.S:
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  Processors: FA520 FA526 FA626	
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	16
+
+/*
+ * The total size of the data cache.
+ */
+#ifdef CONFIG_ARCH_GEMINI
+#define CACHE_DSIZE	8192
+#else
+#define CACHE_DSIZE	16384 
+#endif 
+
+/* FIXME: put optimal value here. Current one is just estimation */
+#define CACHE_DLIMIT	(CACHE_DSIZE * 2)
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(fa_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(fa_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(fa_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(fa_flush_kern_cache_all)
+	mov	ip, #0
+	mov	r2, #VM_EXEC
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean/invalidate D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(fa_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
+	bhs	__flush_whole_cache		@ flush whole D cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I line
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_kern_range)
+	/* fall through */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(fa_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- size of region
+ */
+ENTRY(fa_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+fa_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	bic	r1, r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+fa_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(fa_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0	
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(fa_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	fa_dma_clean_range
+	bcs	fa_dma_inv_range
+	b	fa_dma_flush_range
+ENDPROC(fa_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(fa_dma_unmap_area)
+	ret	lr
+ENDPROC(fa_dma_unmap_area)
+
+	.globl	fa_flush_kern_cache_louis
+	.equ	fa_flush_kern_cache_louis, fa_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions fa
diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c
new file mode 100644
index 0000000..5c1b7a7
--- /dev/null
+++ b/arch/arm/mm/cache-feroceon-l2.c
@@ -0,0 +1,392 @@
+/*
+ * arch/arm/mm/cache-feroceon-l2.c - Feroceon L2 cache controller support
+ *
+ * Copyright (C) 2008 Marvell Semiconductor
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * References:
+ * - Unified Layer 2 Cache for Feroceon CPU Cores,
+ *   Document ID MV-S104858-00, Rev. A, October 23 2007.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/hardware/cache-feroceon-l2.h>
+
+#define L2_WRITETHROUGH_KIRKWOOD	BIT(4)
+
+/*
+ * Low-level cache maintenance operations.
+ *
+ * As well as the regular 'clean/invalidate/flush L2 cache line by
+ * MVA' instructions, the Feroceon L2 cache controller also features
+ * 'clean/invalidate L2 range by MVA' operations.
+ *
+ * Cache range operations are initiated by writing the start and
+ * end addresses to successive cp15 registers, and process every
+ * cache line whose first byte address lies in the inclusive range
+ * [start:end].
+ *
+ * The cache range operations stall the CPU pipeline until completion.
+ *
+ * The range operations require two successive cp15 writes, in
+ * between which we don't want to be preempted.
+ */
+
+static inline unsigned long l2_get_va(unsigned long paddr)
+{
+#ifdef CONFIG_HIGHMEM
+	/*
+	 * Because range ops can't be done on physical addresses,
+	 * we simply install a virtual mapping for it only for the
+	 * TLB lookup to occur, hence no need to flush the untouched
+	 * memory mapping afterwards (note: a cache flush may happen
+	 * in some circumstances depending on the path taken in kunmap_atomic).
+	 */
+	void *vaddr = kmap_atomic_pfn(paddr >> PAGE_SHIFT);
+	return (unsigned long)vaddr + (paddr & ~PAGE_MASK);
+#else
+	return __phys_to_virt(paddr);
+#endif
+}
+
+static inline void l2_put_va(unsigned long vaddr)
+{
+#ifdef CONFIG_HIGHMEM
+	kunmap_atomic((void *)vaddr);
+#endif
+}
+
+static inline void l2_clean_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c15, c9, 3" : : "r" (addr));
+}
+
+static inline void l2_clean_pa_range(unsigned long start, unsigned long end)
+{
+	unsigned long va_start, va_end, flags;
+
+	/*
+	 * Make sure 'start' and 'end' reference the same page, as
+	 * L2 is PIPT and range operations only do a TLB lookup on
+	 * the start address.
+	 */
+	BUG_ON((start ^ end) >> PAGE_SHIFT);
+
+	va_start = l2_get_va(start);
+	va_end = va_start + (end - start);
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 1, %0, c15, c9, 4\n\t"
+		"mcr p15, 1, %1, c15, c9, 5"
+		: : "r" (va_start), "r" (va_end));
+	raw_local_irq_restore(flags);
+	l2_put_va(va_start);
+}
+
+static inline void l2_clean_inv_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c15, c10, 3" : : "r" (addr));
+}
+
+static inline void l2_inv_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c15, c11, 3" : : "r" (addr));
+}
+
+static inline void l2_inv_pa_range(unsigned long start, unsigned long end)
+{
+	unsigned long va_start, va_end, flags;
+
+	/*
+	 * Make sure 'start' and 'end' reference the same page, as
+	 * L2 is PIPT and range operations only do a TLB lookup on
+	 * the start address.
+	 */
+	BUG_ON((start ^ end) >> PAGE_SHIFT);
+
+	va_start = l2_get_va(start);
+	va_end = va_start + (end - start);
+	raw_local_irq_save(flags);
+	__asm__("mcr p15, 1, %0, c15, c11, 4\n\t"
+		"mcr p15, 1, %1, c15, c11, 5"
+		: : "r" (va_start), "r" (va_end));
+	raw_local_irq_restore(flags);
+	l2_put_va(va_start);
+}
+
+static inline void l2_inv_all(void)
+{
+	__asm__("mcr p15, 1, %0, c15, c11, 0" : : "r" (0));
+}
+
+/*
+ * Linux primitives.
+ *
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+#define CACHE_LINE_SIZE		32
+#define MAX_RANGE_SIZE		1024
+
+static int l2_wt_override;
+
+static unsigned long calc_range_end(unsigned long start, unsigned long end)
+{
+	unsigned long range_end;
+
+	BUG_ON(start & (CACHE_LINE_SIZE - 1));
+	BUG_ON(end & (CACHE_LINE_SIZE - 1));
+
+	/*
+	 * Try to process all cache lines between 'start' and 'end'.
+	 */
+	range_end = end;
+
+	/*
+	 * Limit the number of cache lines processed at once,
+	 * since cache range operations stall the CPU pipeline
+	 * until completion.
+	 */
+	if (range_end > start + MAX_RANGE_SIZE)
+		range_end = start + MAX_RANGE_SIZE;
+
+	/*
+	 * Cache range operations can't straddle a page boundary.
+	 */
+	if (range_end > (start | (PAGE_SIZE - 1)) + 1)
+		range_end = (start | (PAGE_SIZE - 1)) + 1;
+
+	return range_end;
+}
+
+static void feroceon_l2_inv_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * Clean and invalidate partial first cache line.
+	 */
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		l2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+	}
+
+	/*
+	 * Clean and invalidate partial last cache line.
+	 */
+	if (start < end && end & (CACHE_LINE_SIZE - 1)) {
+		l2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
+		end &= ~(CACHE_LINE_SIZE - 1);
+	}
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	while (start < end) {
+		unsigned long range_end = calc_range_end(start, end);
+		l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE);
+		start = range_end;
+	}
+
+	dsb();
+}
+
+static void feroceon_l2_clean_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 is forced to WT, the L2 will always be clean and we
+	 * don't need to do anything here.
+	 */
+	if (!l2_wt_override) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+		while (start != end) {
+			unsigned long range_end = calc_range_end(start, end);
+			l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE);
+			start = range_end;
+		}
+	}
+
+	dsb();
+}
+
+static void feroceon_l2_flush_range(unsigned long start, unsigned long end)
+{
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = (end + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1);
+	while (start != end) {
+		unsigned long range_end = calc_range_end(start, end);
+		if (!l2_wt_override)
+			l2_clean_pa_range(start, range_end - CACHE_LINE_SIZE);
+		l2_inv_pa_range(start, range_end - CACHE_LINE_SIZE);
+		start = range_end;
+	}
+
+	dsb();
+}
+
+
+/*
+ * Routines to disable and re-enable the D-cache and I-cache at run
+ * time.  These are necessary because the L2 cache can only be enabled
+ * or disabled while the L1 Dcache and Icache are both disabled.
+ */
+static int __init flush_and_disable_dcache(void)
+{
+	u32 cr;
+
+	cr = get_cr();
+	if (cr & CR_C) {
+		unsigned long flags;
+
+		raw_local_irq_save(flags);
+		flush_cache_all();
+		set_cr(cr & ~CR_C);
+		raw_local_irq_restore(flags);
+		return 1;
+	}
+	return 0;
+}
+
+static void __init enable_dcache(void)
+{
+	u32 cr;
+
+	cr = get_cr();
+	set_cr(cr | CR_C);
+}
+
+static void __init __invalidate_icache(void)
+{
+	__asm__("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
+}
+
+static int __init invalidate_and_disable_icache(void)
+{
+	u32 cr;
+
+	cr = get_cr();
+	if (cr & CR_I) {
+		set_cr(cr & ~CR_I);
+		__invalidate_icache();
+		return 1;
+	}
+	return 0;
+}
+
+static void __init enable_icache(void)
+{
+	u32 cr;
+
+	cr = get_cr();
+	set_cr(cr | CR_I);
+}
+
+static inline u32 read_extra_features(void)
+{
+	u32 u;
+
+	__asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u));
+
+	return u;
+}
+
+static inline void write_extra_features(u32 u)
+{
+	__asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u));
+}
+
+static void __init disable_l2_prefetch(void)
+{
+	u32 u;
+
+	/*
+	 * Read the CPU Extra Features register and verify that the
+	 * Disable L2 Prefetch bit is set.
+	 */
+	u = read_extra_features();
+	if (!(u & 0x01000000)) {
+		pr_info("Feroceon L2: Disabling L2 prefetch.\n");
+		write_extra_features(u | 0x01000000);
+	}
+}
+
+static void __init enable_l2(void)
+{
+	u32 u;
+
+	u = read_extra_features();
+	if (!(u & 0x00400000)) {
+		int i, d;
+
+		pr_info("Feroceon L2: Enabling L2\n");
+
+		d = flush_and_disable_dcache();
+		i = invalidate_and_disable_icache();
+		l2_inv_all();
+		write_extra_features(u | 0x00400000);
+		if (i)
+			enable_icache();
+		if (d)
+			enable_dcache();
+	} else
+		pr_err(FW_BUG
+		       "Feroceon L2: bootloader left the L2 cache on!\n");
+}
+
+void __init feroceon_l2_init(int __l2_wt_override)
+{
+	l2_wt_override = __l2_wt_override;
+
+	disable_l2_prefetch();
+
+	outer_cache.inv_range = feroceon_l2_inv_range;
+	outer_cache.clean_range = feroceon_l2_clean_range;
+	outer_cache.flush_range = feroceon_l2_flush_range;
+
+	enable_l2();
+
+	pr_info("Feroceon L2: Cache support initialised%s.\n",
+			 l2_wt_override ? ", in WT override mode" : "");
+}
+#ifdef CONFIG_OF
+static const struct of_device_id feroceon_ids[] __initconst = {
+	{ .compatible = "marvell,kirkwood-cache"},
+	{ .compatible = "marvell,feroceon-cache"},
+	{}
+};
+
+int __init feroceon_of_init(void)
+{
+	struct device_node *node;
+	void __iomem *base;
+	bool l2_wt_override = false;
+
+#if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+	l2_wt_override = true;
+#endif
+
+	node = of_find_matching_node(NULL, feroceon_ids);
+	if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) {
+		base = of_iomap(node, 0);
+		if (!base)
+			return -ENOMEM;
+
+		if (l2_wt_override)
+			writel(readl(base) | L2_WRITETHROUGH_KIRKWOOD, base);
+		else
+			writel(readl(base) & ~L2_WRITETHROUGH_KIRKWOOD, base);
+	}
+
+	feroceon_l2_init(l2_wt_override);
+
+	return 0;
+}
+#endif
diff --git a/arch/arm/mm/cache-l2x0-pmu.c b/arch/arm/mm/cache-l2x0-pmu.c
new file mode 100644
index 0000000..afe5b4c
--- /dev/null
+++ b/arch/arm/mm/cache-l2x0-pmu.c
@@ -0,0 +1,584 @@
+/*
+ * L220/L310 cache controller support
+ *
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/errno.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include <asm/hardware/cache-l2x0.h>
+
+#define PMU_NR_COUNTERS 2
+
+static void __iomem *l2x0_base;
+static struct pmu *l2x0_pmu;
+static cpumask_t pmu_cpu;
+
+static const char *l2x0_name;
+
+static ktime_t l2x0_pmu_poll_period;
+static struct hrtimer l2x0_pmu_hrtimer;
+
+/*
+ * The L220/PL310 PMU has two equivalent counters, Counter1 and Counter0.
+ * Registers controlling these are laid out in pairs, in descending order, i.e.
+ * the register for Counter1 comes first, followed by the register for
+ * Counter0.
+ * We ensure that idx 0 -> Counter0, and idx1 -> Counter1.
+ */
+static struct perf_event *events[PMU_NR_COUNTERS];
+
+/* Find an unused counter */
+static int l2x0_pmu_find_idx(void)
+{
+	int i;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (!events[i])
+			return i;
+	}
+
+	return -1;
+}
+
+/* How many counters are allocated? */
+static int l2x0_pmu_num_active_counters(void)
+{
+	int i, cnt = 0;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static void l2x0_pmu_counter_config_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_CFG - 4 * idx);
+}
+
+static u32 l2x0_pmu_counter_read(int idx)
+{
+	return readl_relaxed(l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void l2x0_pmu_counter_write(int idx, u32 val)
+{
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT0_VAL - 4 * idx);
+}
+
+static void __l2x0_pmu_enable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val |= L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void __l2x0_pmu_disable(void)
+{
+	u32 val = readl_relaxed(l2x0_base + L2X0_EVENT_CNT_CTRL);
+	val &= ~L2X0_EVENT_CNT_CTRL_ENABLE;
+	writel_relaxed(val, l2x0_base + L2X0_EVENT_CNT_CTRL);
+}
+
+static void l2x0_pmu_enable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_enable();
+}
+
+static void l2x0_pmu_disable(struct pmu *pmu)
+{
+	if (l2x0_pmu_num_active_counters() == 0)
+		return;
+
+	__l2x0_pmu_disable();
+}
+
+static void warn_if_saturated(u32 count)
+{
+	if (count != 0xffffffff)
+		return;
+
+	pr_warn_ratelimited("L2X0 counter saturated. Poll period too long\n");
+}
+
+static void l2x0_pmu_event_read(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count, mask;
+
+	do {
+		 prev_count = local64_read(&hw->prev_count);
+		 new_count = l2x0_pmu_counter_read(hw->idx);
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	mask = GENMASK_ULL(31, 0);
+	local64_add((new_count - prev_count) & mask, &event->count);
+
+	warn_if_saturated(new_count);
+}
+
+static void l2x0_pmu_event_configure(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	/*
+	 * The L2X0 counters saturate at 0xffffffff rather than wrapping, so we
+	 * will *always* lose some number of events when a counter saturates,
+	 * and have no way of detecting how many were lost.
+	 *
+	 * To minimize the impact of this, we try to maximize the period by
+	 * always starting counters at zero. To ensure that group ratios are
+	 * representative, we poll periodically to avoid counters saturating.
+	 * See l2x0_pmu_poll().
+	 */
+	local64_set(&hw->prev_count, 0);
+	l2x0_pmu_counter_write(hw->idx, 0);
+}
+
+static enum hrtimer_restart l2x0_pmu_poll(struct hrtimer *hrtimer)
+{
+	unsigned long flags;
+	int i;
+
+	local_irq_save(flags);
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		struct perf_event *event = events[i];
+
+		if (!event)
+			continue;
+
+		l2x0_pmu_event_read(event);
+		l2x0_pmu_event_configure(event);
+	}
+
+	__l2x0_pmu_enable();
+	local_irq_restore(flags);
+
+	hrtimer_forward_now(hrtimer, l2x0_pmu_poll_period);
+	return HRTIMER_RESTART;
+}
+
+
+static void __l2x0_pmu_event_enable(int idx, u32 event)
+{
+	u32 val;
+
+	val = event << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD) {
+		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+		l2x0_pmu_event_configure(event);
+	}
+
+	hw->state = 0;
+
+	__l2x0_pmu_event_enable(hw->idx, hw->config_base);
+}
+
+static void __l2x0_pmu_event_disable(int idx)
+{
+	u32 val;
+
+	val = L2X0_EVENT_CNT_CFG_SRC_DISABLED << L2X0_EVENT_CNT_CFG_SRC_SHIFT;
+	val |= L2X0_EVENT_CNT_CFG_INT_DISABLED;
+	l2x0_pmu_counter_config_write(idx, val);
+}
+
+static void l2x0_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (WARN_ON_ONCE(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	__l2x0_pmu_event_disable(hw->idx);
+
+	hw->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_UPDATE) {
+		l2x0_pmu_event_read(event);
+		hw->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int l2x0_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+	int idx = l2x0_pmu_find_idx();
+
+	if (idx == -1)
+		return -EAGAIN;
+
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_start(&l2x0_pmu_hrtimer, l2x0_pmu_poll_period,
+			      HRTIMER_MODE_REL_PINNED);
+
+	events[idx] = event;
+	hw->idx = idx;
+
+	l2x0_pmu_event_configure(event);
+
+	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		l2x0_pmu_event_start(event, 0);
+
+	return 0;
+}
+
+static void l2x0_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	l2x0_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	events[hw->idx] = NULL;
+	hw->idx = -1;
+
+	if (l2x0_pmu_num_active_counters() == 0)
+		hrtimer_cancel(&l2x0_pmu_hrtimer);
+}
+
+static bool l2x0_pmu_group_is_valid(struct perf_event *event)
+{
+	struct pmu *pmu = event->pmu;
+	struct perf_event *leader = event->group_leader;
+	struct perf_event *sibling;
+	int num_hw = 0;
+
+	if (leader->pmu == pmu)
+		num_hw++;
+	else if (!is_software_event(leader))
+		return false;
+
+	for_each_sibling_event(sibling, leader) {
+		if (sibling->pmu == pmu)
+			num_hw++;
+		else if (!is_software_event(sibling))
+			return false;
+	}
+
+	return num_hw <= PMU_NR_COUNTERS;
+}
+
+static int l2x0_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (event->attr.type != l2x0_pmu->type)
+		return -ENOENT;
+
+	if (is_sampling_event(event) ||
+	    event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	if (event->attr.config & ~L2X0_EVENT_CNT_CFG_SRC_MASK)
+		return -EINVAL;
+
+	hw->config_base = event->attr.config;
+
+	if (!l2x0_pmu_group_is_valid(event))
+		return -EINVAL;
+
+	event->cpu = cpumask_first(&pmu_cpu);
+
+	return 0;
+}
+
+struct l2x0_event_attribute {
+	struct device_attribute attr;
+	unsigned int config;
+	bool pl310_only;
+};
+
+#define L2X0_EVENT_ATTR(_name, _config, _pl310_only)				\
+	(&((struct l2x0_event_attribute[]) {{					\
+		.attr = __ATTR(_name, S_IRUGO, l2x0_pmu_event_show, NULL),	\
+		.config = _config,						\
+		.pl310_only = _pl310_only,					\
+	}})[0].attr.attr)
+
+#define L220_PLUS_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, false)
+
+#define PL310_EVENT_ATTR(_name, _config)					\
+	L2X0_EVENT_ATTR(_name, _config, true)
+
+static ssize_t l2x0_pmu_event_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr);
+	return snprintf(buf, PAGE_SIZE, "config=0x%x\n", lattr->config);
+}
+
+static umode_t l2x0_pmu_event_attr_is_visible(struct kobject *kobj,
+					      struct attribute *attr,
+					      int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct l2x0_event_attribute *lattr;
+
+	lattr = container_of(attr, typeof(*lattr), attr.attr);
+
+	if (!lattr->pl310_only || strcmp("l2c_310", pmu->name) == 0)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute *l2x0_pmu_event_attrs[] = {
+	L220_PLUS_EVENT_ATTR(co,	0x1),
+	L220_PLUS_EVENT_ATTR(drhit,	0x2),
+	L220_PLUS_EVENT_ATTR(drreq,	0x3),
+	L220_PLUS_EVENT_ATTR(dwhit,	0x4),
+	L220_PLUS_EVENT_ATTR(dwreq,	0x5),
+	L220_PLUS_EVENT_ATTR(dwtreq,	0x6),
+	L220_PLUS_EVENT_ATTR(irhit,	0x7),
+	L220_PLUS_EVENT_ATTR(irreq,	0x8),
+	L220_PLUS_EVENT_ATTR(wa,	0x9),
+	PL310_EVENT_ATTR(ipfalloc,	0xa),
+	PL310_EVENT_ATTR(epfhit,	0xb),
+	PL310_EVENT_ATTR(epfalloc,	0xc),
+	PL310_EVENT_ATTR(srrcvd,	0xd),
+	PL310_EVENT_ATTR(srconf,	0xe),
+	PL310_EVENT_ATTR(epfrcvd,	0xf),
+	NULL
+};
+
+static struct attribute_group l2x0_pmu_event_attrs_group = {
+	.name = "events",
+	.attrs = l2x0_pmu_event_attrs,
+	.is_visible = l2x0_pmu_event_attr_is_visible,
+};
+
+static ssize_t l2x0_pmu_cpumask_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &pmu_cpu);
+}
+
+static struct device_attribute l2x0_pmu_cpumask_attr =
+		__ATTR(cpumask, S_IRUGO, l2x0_pmu_cpumask_show, NULL);
+
+static struct attribute *l2x0_pmu_cpumask_attrs[] = {
+	&l2x0_pmu_cpumask_attr.attr,
+	NULL,
+};
+
+static struct attribute_group l2x0_pmu_cpumask_attr_group = {
+	.attrs = l2x0_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *l2x0_pmu_attr_groups[] = {
+	&l2x0_pmu_event_attrs_group,
+	&l2x0_pmu_cpumask_attr_group,
+	NULL,
+};
+
+static void l2x0_pmu_reset(void)
+{
+	int i;
+
+	__l2x0_pmu_disable();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++)
+		__l2x0_pmu_event_disable(i);
+}
+
+static int l2x0_pmu_offline_cpu(unsigned int cpu)
+{
+	unsigned int target;
+
+	if (!cpumask_test_and_clear_cpu(cpu, &pmu_cpu))
+		return 0;
+
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+
+	perf_pmu_migrate_context(l2x0_pmu, cpu, target);
+	cpumask_set_cpu(target, &pmu_cpu);
+
+	return 0;
+}
+
+void l2x0_pmu_suspend(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_disable(l2x0_pmu);
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_stop(events[i], PERF_EF_UPDATE);
+	}
+
+}
+
+void l2x0_pmu_resume(void)
+{
+	int i;
+
+	if (!l2x0_pmu)
+		return;
+
+	l2x0_pmu_reset();
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		if (events[i])
+			l2x0_pmu_event_start(events[i], PERF_EF_RELOAD);
+	}
+
+	l2x0_pmu_enable(l2x0_pmu);
+}
+
+void __init l2x0_pmu_register(void __iomem *base, u32 part)
+{
+	/*
+	 * Determine whether we support the PMU, and choose the name for sysfs.
+	 * This is also used by l2x0_pmu_event_attr_is_visible to determine
+	 * which events to display, as the PL310 PMU supports a superset of
+	 * L220 events.
+	 *
+	 * The L210 PMU has a different programmer's interface, and is not
+	 * supported by this driver.
+	 *
+	 * We must defer registering the PMU until the perf subsystem is up and
+	 * running, so just stash the name and base, and leave that to another
+	 * initcall.
+	 */
+	switch (part & L2X0_CACHE_ID_PART_MASK) {
+	case L2X0_CACHE_ID_PART_L220:
+		l2x0_name = "l2c_220";
+		break;
+	case L2X0_CACHE_ID_PART_L310:
+		l2x0_name = "l2c_310";
+		break;
+	default:
+		return;
+	}
+
+	l2x0_base = base;
+}
+
+static __init int l2x0_pmu_init(void)
+{
+	int ret;
+
+	if (!l2x0_base)
+		return 0;
+
+	l2x0_pmu = kzalloc(sizeof(*l2x0_pmu), GFP_KERNEL);
+	if (!l2x0_pmu) {
+		pr_warn("Unable to allocate L2x0 PMU\n");
+		return -ENOMEM;
+	}
+
+	*l2x0_pmu = (struct pmu) {
+		.task_ctx_nr = perf_invalid_context,
+		.pmu_enable = l2x0_pmu_enable,
+		.pmu_disable = l2x0_pmu_disable,
+		.read = l2x0_pmu_event_read,
+		.start = l2x0_pmu_event_start,
+		.stop = l2x0_pmu_event_stop,
+		.add = l2x0_pmu_event_add,
+		.del = l2x0_pmu_event_del,
+		.event_init = l2x0_pmu_event_init,
+		.attr_groups = l2x0_pmu_attr_groups,
+	};
+
+	l2x0_pmu_reset();
+
+	/*
+	 * We always use a hrtimer rather than an interrupt.
+	 * See comments in l2x0_pmu_event_configure and l2x0_pmu_poll.
+	 *
+	 * Polling once a second allows the counters to fill up to 1/128th on a
+	 * quad-core test chip with cores clocked at 400MHz. Hopefully this
+	 * leaves sufficient headroom to avoid overflow on production silicon
+	 * at higher frequencies.
+	 */
+	l2x0_pmu_poll_period = ms_to_ktime(1000);
+	hrtimer_init(&l2x0_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	l2x0_pmu_hrtimer.function = l2x0_pmu_poll;
+
+	cpumask_set_cpu(0, &pmu_cpu);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+					"perf/arm/l2x0:online", NULL,
+					l2x0_pmu_offline_cpu);
+	if (ret)
+		goto out_pmu;
+
+	ret = perf_pmu_register(l2x0_pmu, l2x0_name, -1);
+	if (ret)
+		goto out_cpuhp;
+
+	return 0;
+
+out_cpuhp:
+	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_L2X0_ONLINE);
+out_pmu:
+	kfree(l2x0_pmu);
+	l2x0_pmu = NULL;
+	return ret;
+}
+device_initcall(l2x0_pmu_init);
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
new file mode 100644
index 0000000..808efbb
--- /dev/null
+++ b/arch/arm/mm/cache-l2x0.c
@@ -0,0 +1,1817 @@
+/*
+ * arch/arm/mm/cache-l2x0.c - L210/L220/L310 cache controller support
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/cpu.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/log2.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/hardware/cache-l2x0.h>
+#include "cache-tauros3.h"
+#include "cache-aurora-l2.h"
+
+struct l2c_init_data {
+	const char *type;
+	unsigned way_size_0;
+	unsigned num_lock;
+	void (*of_parse)(const struct device_node *, u32 *, u32 *);
+	void (*enable)(void __iomem *, unsigned);
+	void (*fixup)(void __iomem *, u32, struct outer_cache_fns *);
+	void (*save)(void __iomem *);
+	void (*configure)(void __iomem *);
+	void (*unlock)(void __iomem *, unsigned);
+	struct outer_cache_fns outer_cache;
+};
+
+#define CACHE_LINE_SIZE		32
+
+static void __iomem *l2x0_base;
+static const struct l2c_init_data *l2x0_data;
+static DEFINE_RAW_SPINLOCK(l2x0_lock);
+static u32 l2x0_way_mask;	/* Bitmask of active ways */
+static u32 l2x0_size;
+static unsigned long sync_reg_offset = L2X0_CACHE_SYNC;
+
+struct l2x0_regs l2x0_saved_regs;
+
+static bool l2x0_bresp_disable;
+static bool l2x0_flz_disable;
+
+/*
+ * Common code for all cache controllers.
+ */
+static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask)
+{
+	/* wait for cache operation by line or way to complete */
+	while (readl_relaxed(reg) & mask)
+		cpu_relax();
+}
+
+/*
+ * By default, we write directly to secure registers.  Platforms must
+ * override this if they are running non-secure.
+ */
+static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg)
+{
+	if (val == readl_relaxed(base + reg))
+		return;
+	if (outer_cache.write_sec)
+		outer_cache.write_sec(val, reg);
+	else
+		writel_relaxed(val, base + reg);
+}
+
+/*
+ * This should only be called when we have a requirement that the
+ * register be written due to a work-around, as platforms running
+ * in non-secure mode may not be able to access this register.
+ */
+static inline void l2c_set_debug(void __iomem *base, unsigned long val)
+{
+	l2c_write_sec(val, base, L2X0_DEBUG_CTRL);
+}
+
+static void __l2c_op_way(void __iomem *reg)
+{
+	writel_relaxed(l2x0_way_mask, reg);
+	l2c_wait_mask(reg, l2x0_way_mask);
+}
+
+static inline void l2c_unlock(void __iomem *base, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++) {
+		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+		writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE +
+			       i * L2X0_LOCKDOWN_STRIDE);
+	}
+}
+
+static void l2c_configure(void __iomem *base)
+{
+	l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL);
+}
+
+/*
+ * Enable the L2 cache controller.  This function must only be
+ * called when the cache controller is known to be disabled.
+ */
+static void l2c_enable(void __iomem *base, unsigned num_lock)
+{
+	unsigned long flags;
+
+	if (outer_cache.configure)
+		outer_cache.configure(&l2x0_saved_regs);
+	else
+		l2x0_data->configure(base);
+
+	l2x0_data->unlock(base, num_lock);
+
+	local_irq_save(flags);
+	__l2c_op_way(base + L2X0_INV_WAY);
+	writel_relaxed(0, base + sync_reg_offset);
+	l2c_wait_mask(base + sync_reg_offset, 1);
+	local_irq_restore(flags);
+
+	l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL);
+}
+
+static void l2c_disable(void)
+{
+	void __iomem *base = l2x0_base;
+
+	l2x0_pmu_suspend();
+
+	outer_cache.flush_all();
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+}
+
+static void l2c_save(void __iomem *base)
+{
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+}
+
+static void l2c_resume(void)
+{
+	void __iomem *base = l2x0_base;
+
+	/* Do not touch the controller if already enabled. */
+	if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN))
+		l2c_enable(base, l2x0_data->num_lock);
+
+	l2x0_pmu_resume();
+}
+
+/*
+ * L2C-210 specific code.
+ *
+ * The L2C-2x0 PA, set/way and sync operations are atomic, but we must
+ * ensure that no background operation is running.  The way operations
+ * are all background tasks.
+ *
+ * While a background operation is in progress, any new operation is
+ * ignored (unspecified whether this causes an error.)  Thankfully, not
+ * used on SMP.
+ *
+ * Never has a different sync register other than L2X0_CACHE_SYNC, but
+ * we use sync_reg_offset here so we can share some of this with L2C-310.
+ */
+static void __l2c210_cache_sync(void __iomem *base)
+{
+	writel_relaxed(0, base + sync_reg_offset);
+}
+
+static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start,
+	unsigned long end)
+{
+	while (start < end) {
+		writel_relaxed(start, reg);
+		start += CACHE_LINE_SIZE;
+	}
+}
+
+static void l2c210_inv_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		start &= ~(CACHE_LINE_SIZE - 1);
+		writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
+		start += CACHE_LINE_SIZE;
+	}
+
+	if (end & (CACHE_LINE_SIZE - 1)) {
+		end &= ~(CACHE_LINE_SIZE - 1);
+		writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
+	}
+
+	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_clean_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	__l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	__l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_flush_all(void)
+{
+	void __iomem *base = l2x0_base;
+
+	BUG_ON(!irqs_disabled());
+
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c210_sync(void)
+{
+	__l2c210_cache_sync(l2x0_base);
+}
+
+static const struct l2c_init_data l2c210_data __initconst = {
+	.type = "L2C-210",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.enable = l2c_enable,
+	.save = l2c_save,
+	.configure = l2c_configure,
+	.unlock = l2c_unlock,
+	.outer_cache = {
+		.inv_range = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all = l2c210_flush_all,
+		.disable = l2c_disable,
+		.sync = l2c210_sync,
+		.resume = l2c_resume,
+	},
+};
+
+/*
+ * L2C-220 specific code.
+ *
+ * All operations are background operations: they have to be waited for.
+ * Conflicting requests generate a slave error (which will cause an
+ * imprecise abort.)  Never uses sync_reg_offset, so we hard-code the
+ * sync register here.
+ *
+ * However, we can re-use the l2c210_resume call.
+ */
+static inline void __l2c220_cache_sync(void __iomem *base)
+{
+	writel_relaxed(0, base + L2X0_CACHE_SYNC);
+	l2c_wait_mask(base + L2X0_CACHE_SYNC, 1);
+}
+
+static void l2c220_op_way(void __iomem *base, unsigned reg)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + reg);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start,
+	unsigned long end, unsigned long flags)
+{
+	raw_spinlock_t *lock = &l2x0_lock;
+
+	while (start < end) {
+		unsigned long blk_end = start + min(end - start, 4096UL);
+
+		while (start < blk_end) {
+			l2c_wait_mask(reg, 1);
+			writel_relaxed(start, reg);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (blk_end < end) {
+			raw_spin_unlock_irqrestore(lock, flags);
+			raw_spin_lock_irqsave(lock, flags);
+		}
+	}
+
+	return flags;
+}
+
+static void l2c220_inv_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+		if (start & (CACHE_LINE_SIZE - 1)) {
+			start &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (end & (CACHE_LINE_SIZE - 1)) {
+			end &= ~(CACHE_LINE_SIZE - 1);
+			l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+			writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA);
+		}
+	}
+
+	flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_clean_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	if ((end - start) >= l2x0_size) {
+		l2c220_op_way(base, L2X0_CLEAN_WAY);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_range(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	if ((end - start) >= l2x0_size) {
+		l2c220_op_way(base, L2X0_CLEAN_INV_WAY);
+		return;
+	}
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA,
+				   start, end, flags);
+	l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1);
+	__l2c220_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_flush_all(void)
+{
+	l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY);
+}
+
+static void l2c220_sync(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c220_cache_sync(l2x0_base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void l2c220_enable(void __iomem *base, unsigned num_lock)
+{
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, num_lock);
+}
+
+static void l2c220_unlock(void __iomem *base, unsigned num_lock)
+{
+	if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN)
+		l2c_unlock(base, num_lock);
+}
+
+static const struct l2c_init_data l2c220_data = {
+	.type = "L2C-220",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.enable = l2c220_enable,
+	.save = l2c_save,
+	.configure = l2c_configure,
+	.unlock = l2c220_unlock,
+	.outer_cache = {
+		.inv_range = l2c220_inv_range,
+		.clean_range = l2c220_clean_range,
+		.flush_range = l2c220_flush_range,
+		.flush_all = l2c220_flush_all,
+		.disable = l2c_disable,
+		.sync = l2c220_sync,
+		.resume = l2c_resume,
+	},
+};
+
+/*
+ * L2C-310 specific code.
+ *
+ * Very similar to L2C-210, the PA, set/way and sync operations are atomic,
+ * and the way operations are all background tasks.  However, issuing an
+ * operation while a background operation is in progress results in a
+ * SLVERR response.  We can reuse:
+ *
+ *  __l2c210_cache_sync (using sync_reg_offset)
+ *  l2c210_sync
+ *  l2c210_inv_range (if 588369 is not applicable)
+ *  l2c210_clean_range
+ *  l2c210_flush_range (if 588369 is not applicable)
+ *  l2c210_flush_all (if 727915 is not applicable)
+ *
+ * Errata:
+ * 588369: PL310 R0P0->R1P0, fixed R2P0.
+ *	Affects: all clean+invalidate operations
+ *	clean and invalidate skips the invalidate step, so we need to issue
+ *	separate operations.  We also require the above debug workaround
+ *	enclosing this code fragment on affected parts.  On unaffected parts,
+ *	we must not use this workaround without the debug register writes
+ *	to avoid exposing a problem similar to 727915.
+ *
+ * 727915: PL310 R2P0->R3P0, fixed R3P1.
+ *	Affects: clean+invalidate by way
+ *	clean and invalidate by way runs in the background, and a store can
+ *	hit the line between the clean operation and invalidate operation,
+ *	resulting in the store being lost.
+ *
+ * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2.
+ *	Affects: 8x64-bit (double fill) line fetches
+ *	double fill line fetches can fail to cause dirty data to be evicted
+ *	from the cache before the new data overwrites the second line.
+ *
+ * 753970: PL310 R3P0, fixed R3P1.
+ *	Affects: sync
+ *	prevents merging writes after the sync operation, until another L2C
+ *	operation is performed (or a number of other conditions.)
+ *
+ * 769419: PL310 R0P0->R3P1, fixed R3P2.
+ *	Affects: store buffer
+ *	store buffer is not automatically drained.
+ */
+static void l2c310_inv_range_erratum(unsigned long start, unsigned long end)
+{
+	void __iomem *base = l2x0_base;
+
+	if ((start | end) & (CACHE_LINE_SIZE - 1)) {
+		unsigned long flags;
+
+		/* Erratum 588369 for both clean+invalidate operations */
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		l2c_set_debug(base, 0x03);
+
+		if (start & (CACHE_LINE_SIZE - 1)) {
+			start &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(start, base + L2X0_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+
+		if (end & (CACHE_LINE_SIZE - 1)) {
+			end &= ~(CACHE_LINE_SIZE - 1);
+			writel_relaxed(end, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(end, base + L2X0_INV_LINE_PA);
+		}
+
+		l2c_set_debug(base, 0x00);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+	}
+
+	__l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_range_erratum(unsigned long start, unsigned long end)
+{
+	raw_spinlock_t *lock = &l2x0_lock;
+	unsigned long flags;
+	void __iomem *base = l2x0_base;
+
+	raw_spin_lock_irqsave(lock, flags);
+	while (start < end) {
+		unsigned long blk_end = start + min(end - start, 4096UL);
+
+		l2c_set_debug(base, 0x03);
+		while (start < blk_end) {
+			writel_relaxed(start, base + L2X0_CLEAN_LINE_PA);
+			writel_relaxed(start, base + L2X0_INV_LINE_PA);
+			start += CACHE_LINE_SIZE;
+		}
+		l2c_set_debug(base, 0x00);
+
+		if (blk_end < end) {
+			raw_spin_unlock_irqrestore(lock, flags);
+			raw_spin_lock_irqsave(lock, flags);
+		}
+	}
+	raw_spin_unlock_irqrestore(lock, flags);
+	__l2c210_cache_sync(base);
+}
+
+static void l2c310_flush_all_erratum(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	l2c_set_debug(base, 0x03);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	l2c_set_debug(base, 0x00);
+	__l2c210_cache_sync(base);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void __init l2c310_save(void __iomem *base)
+{
+	unsigned revision;
+
+	l2c_save(base);
+
+	l2x0_saved_regs.tag_latency = readl_relaxed(base +
+		L310_TAG_LATENCY_CTRL);
+	l2x0_saved_regs.data_latency = readl_relaxed(base +
+		L310_DATA_LATENCY_CTRL);
+	l2x0_saved_regs.filter_end = readl_relaxed(base +
+		L310_ADDR_FILTER_END);
+	l2x0_saved_regs.filter_start = readl_relaxed(base +
+		L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+			L2X0_CACHE_ID_RTL_MASK;
+
+	/* From r2p0, there is Prefetch offset/control register */
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base +
+							L310_PREFETCH_CTRL);
+
+	/* From r3p0, there is Power control register */
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2x0_saved_regs.pwr_ctrl = readl_relaxed(base +
+							L310_POWER_CTRL);
+}
+
+static void l2c310_configure(void __iomem *base)
+{
+	unsigned revision;
+
+	l2c_configure(base);
+
+	/* restore pl310 setup */
+	l2c_write_sec(l2x0_saved_regs.tag_latency, base,
+		      L310_TAG_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.data_latency, base,
+		      L310_DATA_LATENCY_CTRL);
+	l2c_write_sec(l2x0_saved_regs.filter_end, base,
+		      L310_ADDR_FILTER_END);
+	l2c_write_sec(l2x0_saved_regs.filter_start, base,
+		      L310_ADDR_FILTER_START);
+
+	revision = readl_relaxed(base + L2X0_CACHE_ID) &
+				 L2X0_CACHE_ID_RTL_MASK;
+
+	if (revision >= L310_CACHE_ID_RTL_R2P0)
+		l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base,
+			      L310_PREFETCH_CTRL);
+	if (revision >= L310_CACHE_ID_RTL_R3P0)
+		l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base,
+			      L310_POWER_CTRL);
+}
+
+static int l2c310_starting_cpu(unsigned int cpu)
+{
+	set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+	return 0;
+}
+
+static int l2c310_dying_cpu(unsigned int cpu)
+{
+	set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+	return 0;
+}
+
+static void __init l2c310_enable(void __iomem *base, unsigned num_lock)
+{
+	unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK;
+	bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
+	u32 aux = l2x0_saved_regs.aux_ctrl;
+
+	if (rev >= L310_CACHE_ID_RTL_R2P0) {
+		if (cortex_a9 && !l2x0_bresp_disable) {
+			aux |= L310_AUX_CTRL_EARLY_BRESP;
+			pr_info("L2C-310 enabling early BRESP for Cortex-A9\n");
+		} else if (aux & L310_AUX_CTRL_EARLY_BRESP) {
+			pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n");
+			aux &= ~L310_AUX_CTRL_EARLY_BRESP;
+		}
+	}
+
+	if (cortex_a9 && !l2x0_flz_disable) {
+		u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL);
+		u32 acr = get_auxcr();
+
+		pr_debug("Cortex-A9 ACR=0x%08x\n", acr);
+
+		if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO))
+			pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n");
+
+		if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3)))
+			pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n");
+
+		if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) {
+			aux |= L310_AUX_CTRL_FULL_LINE_ZERO;
+			pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n");
+		}
+	} else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) {
+		pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n");
+		aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP);
+	}
+
+	/*
+	 * Always enable non-secure access to the lockdown registers -
+	 * we write to them as part of the L2C enable sequence so they
+	 * need to be accessible.
+	 */
+	l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN;
+
+	l2c_enable(base, num_lock);
+
+	/* Read back resulting AUX_CTRL value as it could have been altered. */
+	aux = readl_relaxed(base + L2X0_AUX_CTRL);
+
+	if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) {
+		u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL);
+
+		pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n",
+			aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "",
+			aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "",
+			1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK));
+	}
+
+	/* r3p0 or later has power control register */
+	if (rev >= L310_CACHE_ID_RTL_R3P0) {
+		u32 power_ctrl;
+
+		power_ctrl = readl_relaxed(base + L310_POWER_CTRL);
+		pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n",
+			power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis",
+			power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis");
+	}
+
+	if (aux & L310_AUX_CTRL_FULL_LINE_ZERO)
+		cpuhp_setup_state(CPUHP_AP_ARM_L2X0_STARTING,
+				  "arm/l2x0:starting", l2c310_starting_cpu,
+				  l2c310_dying_cpu);
+}
+
+static void __init l2c310_fixup(void __iomem *base, u32 cache_id,
+	struct outer_cache_fns *fns)
+{
+	unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK;
+	const char *errata[8];
+	unsigned n = 0;
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) &&
+	    revision < L310_CACHE_ID_RTL_R2P0 &&
+	    /* For bcm compatibility */
+	    fns->inv_range == l2c210_inv_range) {
+		fns->inv_range = l2c310_inv_range_erratum;
+		fns->flush_range = l2c310_flush_range_erratum;
+		errata[n++] = "588369";
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) &&
+	    revision >= L310_CACHE_ID_RTL_R2P0 &&
+	    revision < L310_CACHE_ID_RTL_R3P1) {
+		fns->flush_all = l2c310_flush_all_erratum;
+		errata[n++] = "727915";
+	}
+
+	if (revision >= L310_CACHE_ID_RTL_R3P0 &&
+	    revision < L310_CACHE_ID_RTL_R3P2) {
+		u32 val = l2x0_saved_regs.prefetch_ctrl;
+		if (val & L310_PREFETCH_CTRL_DBL_LINEFILL) {
+			val &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
+			l2x0_saved_regs.prefetch_ctrl = val;
+			errata[n++] = "752271";
+		}
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) &&
+	    revision == L310_CACHE_ID_RTL_R3P0) {
+		sync_reg_offset = L2X0_DUMMY_REG;
+		errata[n++] = "753970";
+	}
+
+	if (IS_ENABLED(CONFIG_PL310_ERRATA_769419))
+		errata[n++] = "769419";
+
+	if (n) {
+		unsigned i;
+
+		pr_info("L2C-310 errat%s", n > 1 ? "a" : "um");
+		for (i = 0; i < n; i++)
+			pr_cont(" %s", errata[i]);
+		pr_cont(" enabled\n");
+	}
+}
+
+static void l2c310_disable(void)
+{
+	/*
+	 * If full-line-of-zeros is enabled, we must first disable it in the
+	 * Cortex-A9 auxiliary control register before disabling the L2 cache.
+	 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1)));
+
+	l2c_disable();
+}
+
+static void l2c310_resume(void)
+{
+	l2c_resume();
+
+	/* Re-enable full-line-of-zeros for Cortex-A9 */
+	if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO)
+		set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1));
+}
+
+static void l2c310_unlock(void __iomem *base, unsigned num_lock)
+{
+	if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN)
+		l2c_unlock(base, num_lock);
+}
+
+static const struct l2c_init_data l2c310_init_fns __initconst = {
+	.type = "L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.enable = l2c310_enable,
+	.fixup = l2c310_fixup,
+	.save = l2c310_save,
+	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
+	.outer_cache = {
+		.inv_range = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all = l2c210_flush_all,
+		.disable = l2c310_disable,
+		.sync = l2c210_sync,
+		.resume = l2c310_resume,
+	},
+};
+
+static int __init __l2c_init(const struct l2c_init_data *data,
+			     u32 aux_val, u32 aux_mask, u32 cache_id, bool nosync)
+{
+	struct outer_cache_fns fns;
+	unsigned way_size_bits, ways;
+	u32 aux, old_aux;
+
+	/*
+	 * Save the pointer globally so that callbacks which do not receive
+	 * context from callers can access the structure.
+	 */
+	l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL);
+	if (!l2x0_data)
+		return -ENOMEM;
+
+	/*
+	 * Sanity check the aux values.  aux_mask is the bits we preserve
+	 * from reading the hardware register, and aux_val is the bits we
+	 * set.
+	 */
+	if (aux_val & aux_mask)
+		pr_alert("L2C: platform provided aux values permit register corruption.\n");
+
+	old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	aux &= aux_mask;
+	aux |= aux_val;
+
+	if (old_aux != aux)
+		pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n",
+		        old_aux, aux);
+
+	/* Determine the number of ways */
+	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	case L2X0_CACHE_ID_PART_L310:
+		if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16))
+			pr_warn("L2C: DT/platform tries to modify or specify cache size\n");
+		if (aux & (1 << 16))
+			ways = 16;
+		else
+			ways = 8;
+		break;
+
+	case L2X0_CACHE_ID_PART_L210:
+	case L2X0_CACHE_ID_PART_L220:
+		ways = (aux >> 13) & 0xf;
+		break;
+
+	case AURORA_CACHE_ID:
+		ways = (aux >> 13) & 0xf;
+		ways = 2 << ((ways + 1) >> 2);
+		break;
+
+	default:
+		/* Assume unknown chips have 8 ways */
+		ways = 8;
+		break;
+	}
+
+	l2x0_way_mask = (1 << ways) - 1;
+
+	/*
+	 * way_size_0 is the size that a way_size value of zero would be
+	 * given the calculation: way_size = way_size_0 << way_size_bits.
+	 * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k,
+	 * then way_size_0 would be 8k.
+	 *
+	 * L2 cache size = number of ways * way size.
+	 */
+	way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >>
+			L2C_AUX_CTRL_WAY_SIZE_SHIFT;
+	l2x0_size = ways * (data->way_size_0 << way_size_bits);
+
+	fns = data->outer_cache;
+	fns.write_sec = outer_cache.write_sec;
+	fns.configure = outer_cache.configure;
+	if (data->fixup)
+		data->fixup(l2x0_base, cache_id, &fns);
+	if (nosync) {
+		pr_info("L2C: disabling outer sync\n");
+		fns.sync = NULL;
+	}
+
+	/*
+	 * Check if l2x0 controller is already enabled.  If we are booting
+	 * in non-secure mode accessing the below registers will fault.
+	 */
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) {
+		l2x0_saved_regs.aux_ctrl = aux;
+
+		data->enable(l2x0_base, data->num_lock);
+	}
+
+	outer_cache = fns;
+
+	/*
+	 * It is strange to save the register state before initialisation,
+	 * but hey, this is what the DT implementations decided to do.
+	 */
+	if (data->save)
+		data->save(l2x0_base);
+
+	/* Re-read it in case some bits are reserved. */
+	aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+
+	pr_info("%s cache controller enabled, %d ways, %d kB\n",
+		data->type, ways, l2x0_size >> 10);
+	pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n",
+		data->type, cache_id, aux);
+
+	l2x0_pmu_register(l2x0_base, cache_id);
+
+	return 0;
+}
+
+void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask)
+{
+	const struct l2c_init_data *data;
+	u32 cache_id;
+
+	l2x0_base = base;
+
+	cache_id = readl_relaxed(base + L2X0_CACHE_ID);
+
+	switch (cache_id & L2X0_CACHE_ID_PART_MASK) {
+	default:
+	case L2X0_CACHE_ID_PART_L210:
+		data = &l2c210_data;
+		break;
+
+	case L2X0_CACHE_ID_PART_L220:
+		data = &l2c220_data;
+		break;
+
+	case L2X0_CACHE_ID_PART_L310:
+		data = &l2c310_init_fns;
+		break;
+	}
+
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
+	__l2c_init(data, aux_val, aux_mask, cache_id, false);
+}
+
+#ifdef CONFIG_OF
+static int l2_wt_override;
+
+/* Aurora don't have the cache ID register available, so we have to
+ * pass it though the device tree */
+static u32 cache_id_part_number_from_dt;
+
+/**
+ * l2x0_cache_size_of_parse() - read cache size parameters from DT
+ * @np: the device tree node for the l2 cache
+ * @aux_val: pointer to machine-supplied auxilary register value, to
+ * be augmented by the call (bits to be set to 1)
+ * @aux_mask: pointer to machine-supplied auxilary register mask, to
+ * be augmented by the call (bits to be set to 0)
+ * @associativity: variable to return the calculated associativity in
+ * @max_way_size: the maximum size in bytes for the cache ways
+ */
+static int __init l2x0_cache_size_of_parse(const struct device_node *np,
+					    u32 *aux_val, u32 *aux_mask,
+					    u32 *associativity,
+					    u32 max_way_size)
+{
+	u32 mask = 0, val = 0;
+	u32 cache_size = 0, sets = 0;
+	u32 way_size_bits = 1;
+	u32 way_size = 0;
+	u32 block_size = 0;
+	u32 line_size = 0;
+
+	of_property_read_u32(np, "cache-size", &cache_size);
+	of_property_read_u32(np, "cache-sets", &sets);
+	of_property_read_u32(np, "cache-block-size", &block_size);
+	of_property_read_u32(np, "cache-line-size", &line_size);
+
+	if (!cache_size || !sets)
+		return -ENODEV;
+
+	/* All these l2 caches have the same line = block size actually */
+	if (!line_size) {
+		if (block_size) {
+			/* If linesize is not given, it is equal to blocksize */
+			line_size = block_size;
+		} else {
+			/* Fall back to known size */
+			pr_warn("L2C OF: no cache block/line size given: "
+				"falling back to default size %d bytes\n",
+				CACHE_LINE_SIZE);
+			line_size = CACHE_LINE_SIZE;
+		}
+	}
+
+	if (line_size != CACHE_LINE_SIZE)
+		pr_warn("L2C OF: DT supplied line size %d bytes does "
+			"not match hardware line size of %d bytes\n",
+			line_size,
+			CACHE_LINE_SIZE);
+
+	/*
+	 * Since:
+	 * set size = cache size / sets
+	 * ways = cache size / (sets * line size)
+	 * way size = cache size / (cache size / (sets * line size))
+	 * way size = sets * line size
+	 * associativity = ways = cache size / way size
+	 */
+	way_size = sets * line_size;
+	*associativity = cache_size / way_size;
+
+	if (way_size > max_way_size) {
+		pr_err("L2C OF: set size %dKB is too large\n", way_size);
+		return -EINVAL;
+	}
+
+	pr_info("L2C OF: override cache size: %d bytes (%dKB)\n",
+		cache_size, cache_size >> 10);
+	pr_info("L2C OF: override line size: %d bytes\n", line_size);
+	pr_info("L2C OF: override way size: %d bytes (%dKB)\n",
+		way_size, way_size >> 10);
+	pr_info("L2C OF: override associativity: %d\n", *associativity);
+
+	/*
+	 * Calculates the bits 17:19 to set for way size:
+	 * 512KB -> 6, 256KB -> 5, ... 16KB -> 1
+	 */
+	way_size_bits = ilog2(way_size >> 10) - 3;
+	if (way_size_bits < 1 || way_size_bits > 6) {
+		pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n",
+		       way_size);
+		return -EINVAL;
+	}
+
+	mask |= L2C_AUX_CTRL_WAY_SIZE_MASK;
+	val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT);
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+
+	return 0;
+}
+
+static void __init l2x0_of_parse(const struct device_node *np,
+				 u32 *aux_val, u32 *aux_mask)
+{
+	u32 data[2] = { 0, 0 };
+	u32 tag = 0;
+	u32 dirty = 0;
+	u32 val = 0, mask = 0;
+	u32 assoc;
+	int ret;
+
+	of_property_read_u32(np, "arm,tag-latency", &tag);
+	if (tag) {
+		mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK;
+		val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT;
+	}
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1]) {
+		mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK |
+			L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK;
+		val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) |
+		       ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT);
+	}
+
+	of_property_read_u32(np, "arm,dirty-latency", &dirty);
+	if (dirty) {
+		mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK;
+		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
+	}
+
+	if (of_property_read_bool(np, "arm,parity-enable")) {
+		mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
+		val |= L2C_AUX_CTRL_PARITY_ENABLE;
+	} else if (of_property_read_bool(np, "arm,parity-disable")) {
+		mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
+	}
+
+	if (of_property_read_bool(np, "arm,shared-override")) {
+		mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE;
+		val |= L2C_AUX_CTRL_SHARED_OVERRIDE;
+	}
+
+	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
+	if (ret)
+		return;
+
+	if (assoc > 8) {
+		pr_err("l2x0 of: cache setting yield too high associativity\n");
+		pr_err("l2x0 of: %d calculated, max 8\n", assoc);
+	} else {
+		mask |= L2X0_AUX_CTRL_ASSOC_MASK;
+		val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT);
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_l2c210_data __initconst = {
+	.type = "L2C-210",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.of_parse = l2x0_of_parse,
+	.enable = l2c_enable,
+	.save = l2c_save,
+	.configure = l2c_configure,
+	.unlock = l2c_unlock,
+	.outer_cache = {
+		.inv_range   = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c_resume,
+	},
+};
+
+static const struct l2c_init_data of_l2c220_data __initconst = {
+	.type = "L2C-220",
+	.way_size_0 = SZ_8K,
+	.num_lock = 1,
+	.of_parse = l2x0_of_parse,
+	.enable = l2c220_enable,
+	.save = l2c_save,
+	.configure = l2c_configure,
+	.unlock = l2c220_unlock,
+	.outer_cache = {
+		.inv_range   = l2c220_inv_range,
+		.clean_range = l2c220_clean_range,
+		.flush_range = l2c220_flush_range,
+		.flush_all   = l2c220_flush_all,
+		.disable     = l2c_disable,
+		.sync        = l2c220_sync,
+		.resume      = l2c_resume,
+	},
+};
+
+static void __init l2c310_of_parse(const struct device_node *np,
+	u32 *aux_val, u32 *aux_mask)
+{
+	u32 data[3] = { 0, 0, 0 };
+	u32 tag[3] = { 0, 0, 0 };
+	u32 filter[2] = { 0, 0 };
+	u32 assoc;
+	u32 prefetch;
+	u32 power;
+	u32 val;
+	int ret;
+
+	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
+	if (tag[0] && tag[1] && tag[2])
+		l2x0_saved_regs.tag_latency =
+			L310_LATENCY_CTRL_RD(tag[0] - 1) |
+			L310_LATENCY_CTRL_WR(tag[1] - 1) |
+			L310_LATENCY_CTRL_SETUP(tag[2] - 1);
+
+	of_property_read_u32_array(np, "arm,data-latency",
+				   data, ARRAY_SIZE(data));
+	if (data[0] && data[1] && data[2])
+		l2x0_saved_regs.data_latency =
+			L310_LATENCY_CTRL_RD(data[0] - 1) |
+			L310_LATENCY_CTRL_WR(data[1] - 1) |
+			L310_LATENCY_CTRL_SETUP(data[2] - 1);
+
+	of_property_read_u32_array(np, "arm,filter-ranges",
+				   filter, ARRAY_SIZE(filter));
+	if (filter[1]) {
+		l2x0_saved_regs.filter_end =
+					ALIGN(filter[0] + filter[1], SZ_1M);
+		l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1))
+					| L310_ADDR_FILTER_EN;
+	}
+
+	ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
+	if (!ret) {
+		switch (assoc) {
+		case 16:
+			*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			*aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+			*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			break;
+		case 8:
+			*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+			break;
+		default:
+			pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
+			       assoc);
+			break;
+		}
+	}
+
+	if (of_property_read_bool(np, "arm,shared-override")) {
+		*aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE;
+		*aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE;
+	}
+
+	if (of_property_read_bool(np, "arm,parity-enable")) {
+		*aux_val |= L2C_AUX_CTRL_PARITY_ENABLE;
+		*aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
+	} else if (of_property_read_bool(np, "arm,parity-disable")) {
+		*aux_val &= ~L2C_AUX_CTRL_PARITY_ENABLE;
+		*aux_mask &= ~L2C_AUX_CTRL_PARITY_ENABLE;
+	}
+
+	if (of_property_read_bool(np, "arm,early-bresp-disable"))
+		l2x0_bresp_disable = true;
+
+	if (of_property_read_bool(np, "arm,full-line-zero-disable"))
+		l2x0_flz_disable = true;
+
+	prefetch = l2x0_saved_regs.prefetch_ctrl;
+
+	ret = of_property_read_u32(np, "arm,double-linefill", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-incr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val);
+	if (ret == 0) {
+		if (!val)
+			prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-drop", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "arm,prefetch-offset", &val);
+	if (ret == 0) {
+		prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK;
+		prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "prefetch-data", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF prefetch-data property value is missing\n");
+	}
+
+	ret = of_property_read_u32(np, "prefetch-instr", &val);
+	if (ret == 0) {
+		if (val)
+			prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH;
+		else
+			prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF prefetch-instr property value is missing\n");
+	}
+
+	l2x0_saved_regs.prefetch_ctrl = prefetch;
+
+	power = l2x0_saved_regs.pwr_ctrl |
+		L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN;
+
+	ret = of_property_read_u32(np, "arm,dynamic-clock-gating", &val);
+	if (!ret) {
+		if (!val)
+			power &= ~L310_DYNAMIC_CLK_GATING_EN;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF dynamic-clock-gating property value is missing or invalid\n");
+	}
+	ret = of_property_read_u32(np, "arm,standby-mode", &val);
+	if (!ret) {
+		if (!val)
+			power &= ~L310_STNDBY_MODE_EN;
+	} else if (ret != -EINVAL) {
+		pr_err("L2C-310 OF standby-mode property value is missing or invalid\n");
+	}
+
+	l2x0_saved_regs.pwr_ctrl = power;
+}
+
+static const struct l2c_init_data of_l2c310_data __initconst = {
+	.type = "L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.of_parse = l2c310_of_parse,
+	.enable = l2c310_enable,
+	.fixup = l2c310_fixup,
+	.save  = l2c310_save,
+	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
+	.outer_cache = {
+		.inv_range   = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c310_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c310_resume,
+	},
+};
+
+/*
+ * This is a variant of the of_l2c310_data with .sync set to
+ * NULL. Outer sync operations are not needed when the system is I/O
+ * coherent, and potentially harmful in certain situations (PCIe/PL310
+ * deadlock on Armada 375/38x due to hardware I/O coherency). The
+ * other operations are kept because they are infrequent (therefore do
+ * not cause the deadlock in practice) and needed for secondary CPU
+ * boot and other power management activities.
+ */
+static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
+	.type = "L2C-310 Coherent",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.of_parse = l2c310_of_parse,
+	.enable = l2c310_enable,
+	.fixup = l2c310_fixup,
+	.save  = l2c310_save,
+	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
+	.outer_cache = {
+		.inv_range   = l2c210_inv_range,
+		.clean_range = l2c210_clean_range,
+		.flush_range = l2c210_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c310_disable,
+		.resume      = l2c310_resume,
+	},
+};
+
+/*
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive, while the hardware cache range operations use
+ * inclusive start and end addresses.
+ */
+static unsigned long aurora_range_end(unsigned long start, unsigned long end)
+{
+	/*
+	 * Limit the number of cache lines processed at once,
+	 * since cache range operations stall the CPU pipeline
+	 * until completion.
+	 */
+	if (end > start + MAX_RANGE_SIZE)
+		end = start + MAX_RANGE_SIZE;
+
+	/*
+	 * Cache range operations can't straddle a page boundary.
+	 */
+	if (end > PAGE_ALIGN(start+1))
+		end = PAGE_ALIGN(start+1);
+
+	return end;
+}
+
+static void aurora_pa_range(unsigned long start, unsigned long end,
+			    unsigned long offset)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long range_end;
+	unsigned long flags;
+
+	/*
+	 * round start and end adresses up to cache line size
+	 */
+	start &= ~(CACHE_LINE_SIZE - 1);
+	end = ALIGN(end, CACHE_LINE_SIZE);
+
+	/*
+	 * perform operation on all full cache lines between 'start' and 'end'
+	 */
+	while (start < end) {
+		range_end = aurora_range_end(start, end);
+
+		raw_spin_lock_irqsave(&l2x0_lock, flags);
+		writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
+		writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
+		raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+		writel_relaxed(0, base + AURORA_SYNC_REG);
+		start = range_end;
+	}
+}
+static void aurora_inv_range(unsigned long start, unsigned long end)
+{
+	aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+}
+
+static void aurora_clean_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * If L2 is forced to WT, the L2 will always be clean and we
+	 * don't need to do anything here.
+	 */
+	if (!l2_wt_override)
+		aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
+}
+
+static void aurora_flush_range(unsigned long start, unsigned long end)
+{
+	if (l2_wt_override)
+		aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
+	else
+		aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
+}
+
+static void aurora_flush_all(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	/* clean all ways */
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+}
+
+static void aurora_cache_sync(void)
+{
+	writel_relaxed(0, l2x0_base + AURORA_SYNC_REG);
+}
+
+static void aurora_disable(void)
+{
+	void __iomem *base = l2x0_base;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&l2x0_lock, flags);
+	__l2c_op_way(base + L2X0_CLEAN_INV_WAY);
+	writel_relaxed(0, base + AURORA_SYNC_REG);
+	l2c_write_sec(0, base, L2X0_CTRL);
+	dsb(st);
+	raw_spin_unlock_irqrestore(&l2x0_lock, flags);
+}
+
+static void aurora_save(void __iomem *base)
+{
+	l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL);
+	l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL);
+}
+
+/*
+ * For Aurora cache in no outer mode, enable via the CP15 coprocessor
+ * broadcasting of cache commands to L2.
+ */
+static void __init aurora_enable_no_outer(void __iomem *base,
+	unsigned num_lock)
+{
+	u32 u;
+
+	asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u));
+	u |= AURORA_CTRL_FW;		/* Set the FW bit */
+	asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u));
+
+	isb();
+
+	l2c_enable(base, num_lock);
+}
+
+static void __init aurora_fixup(void __iomem *base, u32 cache_id,
+	struct outer_cache_fns *fns)
+{
+	sync_reg_offset = AURORA_SYNC_REG;
+}
+
+static void __init aurora_of_parse(const struct device_node *np,
+				u32 *aux_val, u32 *aux_mask)
+{
+	u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU;
+	u32 mask =  AURORA_ACR_REPLACEMENT_MASK;
+
+	of_property_read_u32(np, "cache-id-part",
+			&cache_id_part_number_from_dt);
+
+	/* Determine and save the write policy */
+	l2_wt_override = of_property_read_bool(np, "wt-override");
+
+	if (l2_wt_override) {
+		val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY;
+		mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK;
+	}
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
+static const struct l2c_init_data of_aurora_with_outer_data __initconst = {
+	.type = "Aurora",
+	.way_size_0 = SZ_4K,
+	.num_lock = 4,
+	.of_parse = aurora_of_parse,
+	.enable = l2c_enable,
+	.fixup = aurora_fixup,
+	.save  = aurora_save,
+	.configure = l2c_configure,
+	.unlock = l2c_unlock,
+	.outer_cache = {
+		.inv_range   = aurora_inv_range,
+		.clean_range = aurora_clean_range,
+		.flush_range = aurora_flush_range,
+		.flush_all   = aurora_flush_all,
+		.disable     = aurora_disable,
+		.sync	     = aurora_cache_sync,
+		.resume      = l2c_resume,
+	},
+};
+
+static const struct l2c_init_data of_aurora_no_outer_data __initconst = {
+	.type = "Aurora",
+	.way_size_0 = SZ_4K,
+	.num_lock = 4,
+	.of_parse = aurora_of_parse,
+	.enable = aurora_enable_no_outer,
+	.fixup = aurora_fixup,
+	.save  = aurora_save,
+	.configure = l2c_configure,
+	.unlock = l2c_unlock,
+	.outer_cache = {
+		.resume      = l2c_resume,
+	},
+};
+
+/*
+ * For certain Broadcom SoCs, depending on the address range, different offsets
+ * need to be added to the address before passing it to L2 for
+ * invalidation/clean/flush
+ *
+ * Section Address Range              Offset        EMI
+ *   1     0x00000000 - 0x3FFFFFFF    0x80000000    VC
+ *   2     0x40000000 - 0xBFFFFFFF    0x40000000    SYS
+ *   3     0xC0000000 - 0xFFFFFFFF    0x80000000    VC
+ *
+ * When the start and end addresses have crossed two different sections, we
+ * need to break the L2 operation into two, each within its own section.
+ * For example, if we need to invalidate addresses starts at 0xBFFF0000 and
+ * ends at 0xC0001000, we need do invalidate 1) 0xBFFF0000 - 0xBFFFFFFF and 2)
+ * 0xC0000000 - 0xC0001000
+ *
+ * Note 1:
+ * By breaking a single L2 operation into two, we may potentially suffer some
+ * performance hit, but keep in mind the cross section case is very rare
+ *
+ * Note 2:
+ * We do not need to handle the case when the start address is in
+ * Section 1 and the end address is in Section 3, since it is not a valid use
+ * case
+ *
+ * Note 3:
+ * Section 1 in practical terms can no longer be used on rev A2. Because of
+ * that the code does not need to handle section 1 at all.
+ *
+ */
+#define BCM_SYS_EMI_START_ADDR        0x40000000UL
+#define BCM_VC_EMI_SEC3_START_ADDR    0xC0000000UL
+
+#define BCM_SYS_EMI_OFFSET            0x40000000UL
+#define BCM_VC_EMI_OFFSET             0x80000000UL
+
+static inline int bcm_addr_is_sys_emi(unsigned long addr)
+{
+	return (addr >= BCM_SYS_EMI_START_ADDR) &&
+		(addr < BCM_VC_EMI_SEC3_START_ADDR);
+}
+
+static inline unsigned long bcm_l2_phys_addr(unsigned long addr)
+{
+	if (bcm_addr_is_sys_emi(addr))
+		return addr + BCM_SYS_EMI_OFFSET;
+	else
+		return addr + BCM_VC_EMI_OFFSET;
+}
+
+static void bcm_inv_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2c210_inv_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2c210_inv_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
+static void bcm_clean_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2c210_clean_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2c210_clean_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
+static void bcm_flush_range(unsigned long start, unsigned long end)
+{
+	unsigned long new_start, new_end;
+
+	BUG_ON(start < BCM_SYS_EMI_START_ADDR);
+
+	if (unlikely(end <= start))
+		return;
+
+	if ((end - start) >= l2x0_size) {
+		outer_cache.flush_all();
+		return;
+	}
+
+	new_start = bcm_l2_phys_addr(start);
+	new_end = bcm_l2_phys_addr(end);
+
+	/* normal case, no cross section between start and end */
+	if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) {
+		l2c210_flush_range(new_start, new_end);
+		return;
+	}
+
+	/* They cross sections, so it can only be a cross from section
+	 * 2 to section 3
+	 */
+	l2c210_flush_range(new_start,
+		bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1));
+	l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR),
+		new_end);
+}
+
+/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */
+static const struct l2c_init_data of_bcm_l2x0_data __initconst = {
+	.type = "BCM-L2C-310",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.of_parse = l2c310_of_parse,
+	.enable = l2c310_enable,
+	.save  = l2c310_save,
+	.configure = l2c310_configure,
+	.unlock = l2c310_unlock,
+	.outer_cache = {
+		.inv_range   = bcm_inv_range,
+		.clean_range = bcm_clean_range,
+		.flush_range = bcm_flush_range,
+		.flush_all   = l2c210_flush_all,
+		.disable     = l2c310_disable,
+		.sync        = l2c210_sync,
+		.resume      = l2c310_resume,
+	},
+};
+
+static void __init tauros3_save(void __iomem *base)
+{
+	l2c_save(base);
+
+	l2x0_saved_regs.aux2_ctrl =
+		readl_relaxed(base + TAUROS3_AUX2_CTRL);
+	l2x0_saved_regs.prefetch_ctrl =
+		readl_relaxed(base + L310_PREFETCH_CTRL);
+}
+
+static void tauros3_configure(void __iomem *base)
+{
+	l2c_configure(base);
+	writel_relaxed(l2x0_saved_regs.aux2_ctrl,
+		       base + TAUROS3_AUX2_CTRL);
+	writel_relaxed(l2x0_saved_regs.prefetch_ctrl,
+		       base + L310_PREFETCH_CTRL);
+}
+
+static const struct l2c_init_data of_tauros3_data __initconst = {
+	.type = "Tauros3",
+	.way_size_0 = SZ_8K,
+	.num_lock = 8,
+	.enable = l2c_enable,
+	.save  = tauros3_save,
+	.configure = tauros3_configure,
+	.unlock = l2c_unlock,
+	/* Tauros3 broadcasts L1 cache operations to L2 */
+	.outer_cache = {
+		.resume      = l2c_resume,
+	},
+};
+
+#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns }
+static const struct of_device_id l2x0_ids[] __initconst = {
+	L2C_ID("arm,l210-cache", of_l2c210_data),
+	L2C_ID("arm,l220-cache", of_l2c220_data),
+	L2C_ID("arm,pl310-cache", of_l2c310_data),
+	L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+	L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data),
+	L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data),
+	L2C_ID("marvell,tauros3-cache", of_tauros3_data),
+	/* Deprecated IDs */
+	L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data),
+	{}
+};
+
+int __init l2x0_of_init(u32 aux_val, u32 aux_mask)
+{
+	const struct l2c_init_data *data;
+	struct device_node *np;
+	struct resource res;
+	u32 cache_id, old_aux;
+	u32 cache_level = 2;
+	bool nosync = false;
+
+	np = of_find_matching_node(NULL, l2x0_ids);
+	if (!np)
+		return -ENODEV;
+
+	if (of_address_to_resource(np, 0, &res))
+		return -ENODEV;
+
+	l2x0_base = ioremap(res.start, resource_size(&res));
+	if (!l2x0_base)
+		return -ENOMEM;
+
+	l2x0_saved_regs.phy_base = res.start;
+
+	data = of_match_node(l2x0_ids, np)->data;
+
+	if (of_device_is_compatible(np, "arm,pl310-cache") &&
+	    of_property_read_bool(np, "arm,io-coherent"))
+		data = &of_l2c310_coherent_data;
+
+	old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL);
+	if (old_aux != ((old_aux & aux_mask) | aux_val)) {
+		pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n",
+		        old_aux, (old_aux & aux_mask) | aux_val);
+	} else if (aux_mask != ~0U && aux_val != 0) {
+		pr_alert("L2C: platform provided aux values match the hardware, so have no effect.  Please remove them.\n");
+	}
+
+	/* All L2 caches are unified, so this property should be specified */
+	if (!of_property_read_bool(np, "cache-unified"))
+		pr_err("L2C: device tree omits to specify unified cache\n");
+
+	if (of_property_read_u32(np, "cache-level", &cache_level))
+		pr_err("L2C: device tree omits to specify cache-level\n");
+
+	if (cache_level != 2)
+		pr_err("L2C: device tree specifies invalid cache level\n");
+
+	nosync = of_property_read_bool(np, "arm,outer-sync-disable");
+
+	/* Read back current (default) hardware configuration */
+	if (data->save)
+		data->save(l2x0_base);
+
+	/* L2 configuration can only be changed if the cache is disabled */
+	if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN))
+		if (data->of_parse)
+			data->of_parse(np, &aux_val, &aux_mask);
+
+	if (cache_id_part_number_from_dt)
+		cache_id = cache_id_part_number_from_dt;
+	else
+		cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID);
+
+	return __l2c_init(data, aux_val, aux_mask, cache_id, nosync);
+}
+#endif
diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S
new file mode 100644
index 0000000..f1cc986
--- /dev/null
+++ b/arch/arm/mm/cache-nop.S
@@ -0,0 +1,51 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+
+#include "proc-macros.S"
+
+ENTRY(nop_flush_icache_all)
+	ret	lr
+ENDPROC(nop_flush_icache_all)
+
+	.globl nop_flush_kern_cache_all
+	.equ nop_flush_kern_cache_all, nop_flush_icache_all
+
+	.globl nop_flush_kern_cache_louis
+	.equ nop_flush_kern_cache_louis, nop_flush_icache_all
+
+	.globl nop_flush_user_cache_all
+	.equ nop_flush_user_cache_all, nop_flush_icache_all
+
+	.globl nop_flush_user_cache_range
+	.equ nop_flush_user_cache_range, nop_flush_icache_all
+
+	.globl nop_coherent_kern_range
+	.equ nop_coherent_kern_range, nop_flush_icache_all
+
+ENTRY(nop_coherent_user_range)
+	mov	r0, 0
+	ret	lr
+ENDPROC(nop_coherent_user_range)
+
+	.globl nop_flush_kern_dcache_area
+	.equ nop_flush_kern_dcache_area, nop_flush_icache_all
+
+	.globl nop_dma_flush_range
+	.equ nop_dma_flush_range, nop_flush_icache_all
+
+	.globl nop_dma_map_area
+	.equ nop_dma_map_area, nop_flush_icache_all
+
+	.globl nop_dma_unmap_area
+	.equ nop_dma_unmap_area, nop_flush_icache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions nop
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
new file mode 100644
index 0000000..88255be
--- /dev/null
+++ b/arch/arm/mm/cache-tauros2.c
@@ -0,0 +1,306 @@
+/*
+ * arch/arm/mm/cache-tauros2.c - Tauros2 L2 cache controller support
+ *
+ * Copyright (C) 2008 Marvell Semiconductor
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * References:
+ * - PJ1 CPU Core Datasheet,
+ *   Document ID MV-S104837-01, Rev 0.7, January 24 2008.
+ * - PJ4 CPU Core Datasheet,
+ *   Document ID MV-S105190-00, Rev 0.7, March 14 2008.
+ */
+
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/hardware/cache-tauros2.h>
+
+/* CP15 PJ4 Control configuration register */
+#define CCR_L2C_PREFETCH_DISABLE	BIT(24)
+#define CCR_L2C_ECC_ENABLE		BIT(23)
+#define CCR_L2C_WAY7_4_DISABLE		BIT(21)
+#define CCR_L2C_BURST8_ENABLE		BIT(20)
+
+/*
+ * When Tauros2 is used on a CPU that supports the v7 hierarchical
+ * cache operations, the cache handling code in proc-v7.S takes care
+ * of everything, including handling DMA coherency.
+ *
+ * So, we only need to register outer cache operations here if we're
+ * being used on a pre-v7 CPU, and we only need to build support for
+ * outer cache operations into the kernel image if the kernel has been
+ * configured to support a pre-v7 CPU.
+ */
+#ifdef CONFIG_CPU_32v5
+/*
+ * Low-level cache maintenance operations.
+ */
+static inline void tauros2_clean_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c7, c11, 3" : : "r" (addr));
+}
+
+static inline void tauros2_clean_inv_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c7, c15, 3" : : "r" (addr));
+}
+
+static inline void tauros2_inv_pa(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c7, c7, 3" : : "r" (addr));
+}
+
+
+/*
+ * Linux primitives.
+ *
+ * Note that the end addresses passed to Linux primitives are
+ * noninclusive.
+ */
+#define CACHE_LINE_SIZE		32
+
+static void tauros2_inv_range(unsigned long start, unsigned long end)
+{
+	/*
+	 * Clean and invalidate partial first cache line.
+	 */
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		tauros2_clean_inv_pa(start & ~(CACHE_LINE_SIZE - 1));
+		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+	}
+
+	/*
+	 * Clean and invalidate partial last cache line.
+	 */
+	if (end & (CACHE_LINE_SIZE - 1)) {
+		tauros2_clean_inv_pa(end & ~(CACHE_LINE_SIZE - 1));
+		end &= ~(CACHE_LINE_SIZE - 1);
+	}
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	while (start < end) {
+		tauros2_inv_pa(start);
+		start += CACHE_LINE_SIZE;
+	}
+
+	dsb();
+}
+
+static void tauros2_clean_range(unsigned long start, unsigned long end)
+{
+	start &= ~(CACHE_LINE_SIZE - 1);
+	while (start < end) {
+		tauros2_clean_pa(start);
+		start += CACHE_LINE_SIZE;
+	}
+
+	dsb();
+}
+
+static void tauros2_flush_range(unsigned long start, unsigned long end)
+{
+	start &= ~(CACHE_LINE_SIZE - 1);
+	while (start < end) {
+		tauros2_clean_inv_pa(start);
+		start += CACHE_LINE_SIZE;
+	}
+
+	dsb();
+}
+
+static void tauros2_disable(void)
+{
+	__asm__ __volatile__ (
+	"mcr	p15, 1, %0, c7, c11, 0 @L2 Cache Clean All\n\t"
+	"mrc	p15, 0, %0, c1, c0, 0\n\t"
+	"bic	%0, %0, #(1 << 26)\n\t"
+	"mcr	p15, 0, %0, c1, c0, 0  @Disable L2 Cache\n\t"
+	: : "r" (0x0));
+}
+
+static void tauros2_resume(void)
+{
+	__asm__ __volatile__ (
+	"mcr	p15, 1, %0, c7, c7, 0 @L2 Cache Invalidate All\n\t"
+	"mrc	p15, 0, %0, c1, c0, 0\n\t"
+	"orr	%0, %0, #(1 << 26)\n\t"
+	"mcr	p15, 0, %0, c1, c0, 0 @Enable L2 Cache\n\t"
+	: : "r" (0x0));
+}
+#endif
+
+static inline u32 __init read_extra_features(void)
+{
+	u32 u;
+
+	__asm__("mrc p15, 1, %0, c15, c1, 0" : "=r" (u));
+
+	return u;
+}
+
+static inline void __init write_extra_features(u32 u)
+{
+	__asm__("mcr p15, 1, %0, c15, c1, 0" : : "r" (u));
+}
+
+static inline int __init cpuid_scheme(void)
+{
+	return !!((processor_id & 0x000f0000) == 0x000f0000);
+}
+
+static inline u32 __init read_mmfr3(void)
+{
+	u32 mmfr3;
+
+	__asm__("mrc p15, 0, %0, c0, c1, 7\n" : "=r" (mmfr3));
+
+	return mmfr3;
+}
+
+static inline u32 __init read_actlr(void)
+{
+	u32 actlr;
+
+	__asm__("mrc p15, 0, %0, c1, c0, 1\n" : "=r" (actlr));
+
+	return actlr;
+}
+
+static inline void __init write_actlr(u32 actlr)
+{
+	__asm__("mcr p15, 0, %0, c1, c0, 1\n" : : "r" (actlr));
+}
+
+static void enable_extra_feature(unsigned int features)
+{
+	u32 u;
+
+	u = read_extra_features();
+
+	if (features & CACHE_TAUROS2_PREFETCH_ON)
+		u &= ~CCR_L2C_PREFETCH_DISABLE;
+	else
+		u |= CCR_L2C_PREFETCH_DISABLE;
+	pr_info("Tauros2: %s L2 prefetch.\n",
+			(features & CACHE_TAUROS2_PREFETCH_ON)
+			? "Enabling" : "Disabling");
+
+	if (features & CACHE_TAUROS2_LINEFILL_BURST8)
+		u |= CCR_L2C_BURST8_ENABLE;
+	else
+		u &= ~CCR_L2C_BURST8_ENABLE;
+	pr_info("Tauros2: %s burst8 line fill.\n",
+			(features & CACHE_TAUROS2_LINEFILL_BURST8)
+			? "Enabling" : "Disabling");
+
+	write_extra_features(u);
+}
+
+static void __init tauros2_internal_init(unsigned int features)
+{
+	char *mode = NULL;
+
+	enable_extra_feature(features);
+
+#ifdef CONFIG_CPU_32v5
+	if ((processor_id & 0xff0f0000) == 0x56050000) {
+		u32 feat;
+
+		/*
+		 * v5 CPUs with Tauros2 have the L2 cache enable bit
+		 * located in the CPU Extra Features register.
+		 */
+		feat = read_extra_features();
+		if (!(feat & 0x00400000)) {
+			pr_info("Tauros2: Enabling L2 cache.\n");
+			write_extra_features(feat | 0x00400000);
+		}
+
+		mode = "ARMv5";
+		outer_cache.inv_range = tauros2_inv_range;
+		outer_cache.clean_range = tauros2_clean_range;
+		outer_cache.flush_range = tauros2_flush_range;
+		outer_cache.disable = tauros2_disable;
+		outer_cache.resume = tauros2_resume;
+	}
+#endif
+
+#ifdef CONFIG_CPU_32v7
+	/*
+	 * Check whether this CPU has support for the v7 hierarchical
+	 * cache ops.  (PJ4 is in its v7 personality mode if the MMFR3
+	 * register indicates support for the v7 hierarchical cache
+	 * ops.)
+	 *
+	 * (Although strictly speaking there may exist CPUs that
+	 * implement the v7 cache ops but are only ARMv6 CPUs (due to
+	 * not complying with all of the other ARMv7 requirements),
+	 * there are no real-life examples of Tauros2 being used on
+	 * such CPUs as of yet.)
+	 */
+	if (cpuid_scheme() && (read_mmfr3() & 0xf) == 1) {
+		u32 actlr;
+
+		/*
+		 * When Tauros2 is used in an ARMv7 system, the L2
+		 * enable bit is located in the Auxiliary System Control
+		 * Register (which is the only register allowed by the
+		 * ARMv7 spec to contain fine-grained cache control bits).
+		 */
+		actlr = read_actlr();
+		if (!(actlr & 0x00000002)) {
+			pr_info("Tauros2: Enabling L2 cache.\n");
+			write_actlr(actlr | 0x00000002);
+		}
+
+		mode = "ARMv7";
+	}
+#endif
+
+	if (mode == NULL) {
+		pr_crit("Tauros2: Unable to detect CPU mode.\n");
+		return;
+	}
+
+	pr_info("Tauros2: L2 cache support initialised "
+			 "in %s mode.\n", mode);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id tauros2_ids[] __initconst = {
+	{ .compatible = "marvell,tauros2-cache"},
+	{}
+};
+#endif
+
+void __init tauros2_init(unsigned int features)
+{
+#ifdef CONFIG_OF
+	struct device_node *node;
+	int ret;
+	unsigned int f;
+
+	node = of_find_matching_node(NULL, tauros2_ids);
+	if (!node) {
+		pr_info("Not found marvell,tauros2-cache, disable it\n");
+	} else {
+		ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f);
+		if (ret) {
+			pr_info("Not found marvell,tauros-cache-features property, "
+				"disable extra features\n");
+			features = 0;
+		} else
+			features = f;
+	}
+#endif
+	tauros2_internal_init(features);
+}
diff --git a/arch/arm/mm/cache-tauros3.h b/arch/arm/mm/cache-tauros3.h
new file mode 100644
index 0000000..02c0a97
--- /dev/null
+++ b/arch/arm/mm/cache-tauros3.h
@@ -0,0 +1,41 @@
+/*
+ * Marvell Tauros3 cache controller includes
+ *
+ * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+ *
+ * based on GPL'ed 2.6 kernel sources
+ *  (c) Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __ASM_ARM_HARDWARE_TAUROS3_H
+#define __ASM_ARM_HARDWARE_TAUROS3_H
+
+/*
+ * Marvell Tauros3 L2CC is compatible with PL310 r0p0
+ * but with PREFETCH_CTRL (r2p0) and an additional event counter.
+ * Also, there is AUX2_CTRL for some Marvell specific control.
+ */
+
+#define TAUROS3_EVENT_CNT2_CFG		0x224
+#define TAUROS3_EVENT_CNT2_VAL		0x228
+#define TAUROS3_INV_ALL			0x780
+#define TAUROS3_CLEAN_ALL		0x784
+#define TAUROS3_AUX2_CTRL		0x820
+
+/* Registers shifts and masks */
+#define TAUROS3_AUX2_CTRL_LINEFILL_BURST8_EN	(1 << 2)
+
+#endif
diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
new file mode 100644
index 0000000..f57b080
--- /dev/null
+++ b/arch/arm/mm/cache-uniphier.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2015-2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)		"uniphier: " fmt
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <asm/hardware/cache-uniphier.h>
+#include <asm/outercache.h>
+
+/* control registers */
+#define UNIPHIER_SSCC		0x0	/* Control Register */
+#define    UNIPHIER_SSCC_BST			BIT(20)	/* UCWG burst read */
+#define    UNIPHIER_SSCC_ACT			BIT(19)	/* Inst-Data separate */
+#define    UNIPHIER_SSCC_WTG			BIT(18)	/* WT gathering on */
+#define    UNIPHIER_SSCC_PRD			BIT(17)	/* enable pre-fetch */
+#define    UNIPHIER_SSCC_ON			BIT(0)	/* enable cache */
+#define UNIPHIER_SSCLPDAWCR	0x30	/* Unified/Data Active Way Control */
+#define UNIPHIER_SSCLPIAWCR	0x34	/* Instruction Active Way Control */
+
+/* revision registers */
+#define UNIPHIER_SSCID		0x0	/* ID Register */
+
+/* operation registers */
+#define UNIPHIER_SSCOPE		0x244	/* Cache Operation Primitive Entry */
+#define    UNIPHIER_SSCOPE_CM_INV		0x0	/* invalidate */
+#define    UNIPHIER_SSCOPE_CM_CLEAN		0x1	/* clean */
+#define    UNIPHIER_SSCOPE_CM_FLUSH		0x2	/* flush */
+#define    UNIPHIER_SSCOPE_CM_SYNC		0x8	/* sync (drain bufs) */
+#define    UNIPHIER_SSCOPE_CM_FLUSH_PREFETCH	0x9	/* flush p-fetch buf */
+#define UNIPHIER_SSCOQM		0x248	/* Cache Operation Queue Mode */
+#define    UNIPHIER_SSCOQM_S_MASK		(0x3 << 17)
+#define    UNIPHIER_SSCOQM_S_RANGE		(0x0 << 17)
+#define    UNIPHIER_SSCOQM_S_ALL		(0x1 << 17)
+#define    UNIPHIER_SSCOQM_CE			BIT(15)	/* notify completion */
+#define    UNIPHIER_SSCOQM_CM_INV		0x0	/* invalidate */
+#define    UNIPHIER_SSCOQM_CM_CLEAN		0x1	/* clean */
+#define    UNIPHIER_SSCOQM_CM_FLUSH		0x2	/* flush */
+#define UNIPHIER_SSCOQAD	0x24c	/* Cache Operation Queue Address */
+#define UNIPHIER_SSCOQSZ	0x250	/* Cache Operation Queue Size */
+#define UNIPHIER_SSCOPPQSEF	0x25c	/* Cache Operation Queue Set Complete*/
+#define    UNIPHIER_SSCOPPQSEF_FE		BIT(1)
+#define    UNIPHIER_SSCOPPQSEF_OE		BIT(0)
+#define UNIPHIER_SSCOLPQS	0x260	/* Cache Operation Queue Status */
+#define    UNIPHIER_SSCOLPQS_EF			BIT(2)
+#define    UNIPHIER_SSCOLPQS_EST		BIT(1)
+#define    UNIPHIER_SSCOLPQS_QST		BIT(0)
+
+/* Is the operation region specified by address range? */
+#define UNIPHIER_SSCOQM_S_IS_RANGE(op) \
+		((op & UNIPHIER_SSCOQM_S_MASK) == UNIPHIER_SSCOQM_S_RANGE)
+
+/**
+ * uniphier_cache_data - UniPhier outer cache specific data
+ *
+ * @ctrl_base: virtual base address of control registers
+ * @rev_base: virtual base address of revision registers
+ * @op_base: virtual base address of operation registers
+ * @way_mask: each bit specifies if the way is present
+ * @nsets: number of associativity sets
+ * @line_size: line size in bytes
+ * @range_op_max_size: max size that can be handled by a single range operation
+ * @list: list node to include this level in the whole cache hierarchy
+ */
+struct uniphier_cache_data {
+	void __iomem *ctrl_base;
+	void __iomem *rev_base;
+	void __iomem *op_base;
+	void __iomem *way_ctrl_base;
+	u32 way_mask;
+	u32 nsets;
+	u32 line_size;
+	u32 range_op_max_size;
+	struct list_head list;
+};
+
+/*
+ * List of the whole outer cache hierarchy.  This list is only modified during
+ * the early boot stage, so no mutex is taken for the access to the list.
+ */
+static LIST_HEAD(uniphier_cache_list);
+
+/**
+ * __uniphier_cache_sync - perform a sync point for a particular cache level
+ *
+ * @data: cache controller specific data
+ */
+static void __uniphier_cache_sync(struct uniphier_cache_data *data)
+{
+	/* This sequence need not be atomic.  Do not disable IRQ. */
+	writel_relaxed(UNIPHIER_SSCOPE_CM_SYNC,
+		       data->op_base + UNIPHIER_SSCOPE);
+	/* need a read back to confirm */
+	readl_relaxed(data->op_base + UNIPHIER_SSCOPE);
+}
+
+/**
+ * __uniphier_cache_maint_common - run a queue operation for a particular level
+ *
+ * @data: cache controller specific data
+ * @start: start address of range operation (don't care for "all" operation)
+ * @size: data size of range operation (don't care for "all" operation)
+ * @operation: flags to specify the desired cache operation
+ */
+static void __uniphier_cache_maint_common(struct uniphier_cache_data *data,
+					  unsigned long start,
+					  unsigned long size,
+					  u32 operation)
+{
+	unsigned long flags;
+
+	/*
+	 * No spin lock is necessary here because:
+	 *
+	 * [1] This outer cache controller is able to accept maintenance
+	 * operations from multiple CPUs at a time in an SMP system; if a
+	 * maintenance operation is under way and another operation is issued,
+	 * the new one is stored in the queue.  The controller performs one
+	 * operation after another.  If the queue is full, the status register,
+	 * UNIPHIER_SSCOPPQSEF, indicates that the queue registration has
+	 * failed.  The status registers, UNIPHIER_{SSCOPPQSEF, SSCOLPQS}, have
+	 * different instances for each CPU, i.e. each CPU can track the status
+	 * of the maintenance operations triggered by itself.
+	 *
+	 * [2] The cache command registers, UNIPHIER_{SSCOQM, SSCOQAD, SSCOQSZ,
+	 * SSCOQWN}, are shared between multiple CPUs, but the hardware still
+	 * guarantees the registration sequence is atomic; the write access to
+	 * them are arbitrated by the hardware.  The first accessor to the
+	 * register, UNIPHIER_SSCOQM, holds the access right and it is released
+	 * by reading the status register, UNIPHIER_SSCOPPQSEF.  While one CPU
+	 * is holding the access right, other CPUs fail to register operations.
+	 * One CPU should not hold the access right for a long time, so local
+	 * IRQs should be disabled while the following sequence.
+	 */
+	local_irq_save(flags);
+
+	/* clear the complete notification flag */
+	writel_relaxed(UNIPHIER_SSCOLPQS_EF, data->op_base + UNIPHIER_SSCOLPQS);
+
+	do {
+		/* set cache operation */
+		writel_relaxed(UNIPHIER_SSCOQM_CE | operation,
+			       data->op_base + UNIPHIER_SSCOQM);
+
+		/* set address range if needed */
+		if (likely(UNIPHIER_SSCOQM_S_IS_RANGE(operation))) {
+			writel_relaxed(start, data->op_base + UNIPHIER_SSCOQAD);
+			writel_relaxed(size, data->op_base + UNIPHIER_SSCOQSZ);
+		}
+	} while (unlikely(readl_relaxed(data->op_base + UNIPHIER_SSCOPPQSEF) &
+			  (UNIPHIER_SSCOPPQSEF_FE | UNIPHIER_SSCOPPQSEF_OE)));
+
+	/* wait until the operation is completed */
+	while (likely(readl_relaxed(data->op_base + UNIPHIER_SSCOLPQS) !=
+		      UNIPHIER_SSCOLPQS_EF))
+		cpu_relax();
+
+	local_irq_restore(flags);
+}
+
+static void __uniphier_cache_maint_all(struct uniphier_cache_data *data,
+				       u32 operation)
+{
+	__uniphier_cache_maint_common(data, 0, 0,
+				      UNIPHIER_SSCOQM_S_ALL | operation);
+
+	__uniphier_cache_sync(data);
+}
+
+static void __uniphier_cache_maint_range(struct uniphier_cache_data *data,
+					 unsigned long start, unsigned long end,
+					 u32 operation)
+{
+	unsigned long size;
+
+	/*
+	 * If the start address is not aligned,
+	 * perform a cache operation for the first cache-line
+	 */
+	start = start & ~(data->line_size - 1);
+
+	size = end - start;
+
+	if (unlikely(size >= (unsigned long)(-data->line_size))) {
+		/* this means cache operation for all range */
+		__uniphier_cache_maint_all(data, operation);
+		return;
+	}
+
+	/*
+	 * If the end address is not aligned,
+	 * perform a cache operation for the last cache-line
+	 */
+	size = ALIGN(size, data->line_size);
+
+	while (size) {
+		unsigned long chunk_size = min_t(unsigned long, size,
+						 data->range_op_max_size);
+
+		__uniphier_cache_maint_common(data, start, chunk_size,
+					UNIPHIER_SSCOQM_S_RANGE | operation);
+
+		start += chunk_size;
+		size -= chunk_size;
+	}
+
+	__uniphier_cache_sync(data);
+}
+
+static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on)
+{
+	u32 val = 0;
+
+	if (on)
+		val = UNIPHIER_SSCC_WTG | UNIPHIER_SSCC_PRD | UNIPHIER_SSCC_ON;
+
+	writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC);
+}
+
+static void __init __uniphier_cache_set_active_ways(
+					struct uniphier_cache_data *data)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		writel_relaxed(data->way_mask, data->way_ctrl_base + 4 * cpu);
+}
+
+static void uniphier_cache_maint_range(unsigned long start, unsigned long end,
+				       u32 operation)
+{
+	struct uniphier_cache_data *data;
+
+	list_for_each_entry(data, &uniphier_cache_list, list)
+		__uniphier_cache_maint_range(data, start, end, operation);
+}
+
+static void uniphier_cache_maint_all(u32 operation)
+{
+	struct uniphier_cache_data *data;
+
+	list_for_each_entry(data, &uniphier_cache_list, list)
+		__uniphier_cache_maint_all(data, operation);
+}
+
+static void uniphier_cache_inv_range(unsigned long start, unsigned long end)
+{
+	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_INV);
+}
+
+static void uniphier_cache_clean_range(unsigned long start, unsigned long end)
+{
+	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_CLEAN);
+}
+
+static void uniphier_cache_flush_range(unsigned long start, unsigned long end)
+{
+	uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_FLUSH);
+}
+
+static void __init uniphier_cache_inv_all(void)
+{
+	uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_INV);
+}
+
+static void uniphier_cache_flush_all(void)
+{
+	uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_FLUSH);
+}
+
+static void uniphier_cache_disable(void)
+{
+	struct uniphier_cache_data *data;
+
+	list_for_each_entry_reverse(data, &uniphier_cache_list, list)
+		__uniphier_cache_enable(data, false);
+
+	uniphier_cache_flush_all();
+}
+
+static void __init uniphier_cache_enable(void)
+{
+	struct uniphier_cache_data *data;
+
+	uniphier_cache_inv_all();
+
+	list_for_each_entry(data, &uniphier_cache_list, list) {
+		__uniphier_cache_enable(data, true);
+		__uniphier_cache_set_active_ways(data);
+	}
+}
+
+static void uniphier_cache_sync(void)
+{
+	struct uniphier_cache_data *data;
+
+	list_for_each_entry(data, &uniphier_cache_list, list)
+		__uniphier_cache_sync(data);
+}
+
+static const struct of_device_id uniphier_cache_match[] __initconst = {
+	{ .compatible = "socionext,uniphier-system-cache" },
+	{ /* sentinel */ }
+};
+
+static int __init __uniphier_cache_init(struct device_node *np,
+					unsigned int *cache_level)
+{
+	struct uniphier_cache_data *data;
+	u32 level, cache_size;
+	struct device_node *next_np;
+	int ret = 0;
+
+	if (!of_match_node(uniphier_cache_match, np)) {
+		pr_err("L%d: not compatible with uniphier cache\n",
+		       *cache_level);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(np, "cache-level", &level)) {
+		pr_err("L%d: cache-level is not specified\n", *cache_level);
+		return -EINVAL;
+	}
+
+	if (level != *cache_level) {
+		pr_err("L%d: cache-level is unexpected value %d\n",
+		       *cache_level, level);
+		return -EINVAL;
+	}
+
+	if (!of_property_read_bool(np, "cache-unified")) {
+		pr_err("L%d: cache-unified is not specified\n", *cache_level);
+		return -EINVAL;
+	}
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (of_property_read_u32(np, "cache-line-size", &data->line_size) ||
+	    !is_power_of_2(data->line_size)) {
+		pr_err("L%d: cache-line-size is unspecified or invalid\n",
+		       *cache_level);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (of_property_read_u32(np, "cache-sets", &data->nsets) ||
+	    !is_power_of_2(data->nsets)) {
+		pr_err("L%d: cache-sets is unspecified or invalid\n",
+		       *cache_level);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	if (of_property_read_u32(np, "cache-size", &cache_size) ||
+	    cache_size == 0 || cache_size % (data->nsets * data->line_size)) {
+		pr_err("L%d: cache-size is unspecified or invalid\n",
+		       *cache_level);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	data->way_mask = GENMASK(cache_size / data->nsets / data->line_size - 1,
+				 0);
+
+	data->ctrl_base = of_iomap(np, 0);
+	if (!data->ctrl_base) {
+		pr_err("L%d: failed to map control register\n", *cache_level);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	data->rev_base = of_iomap(np, 1);
+	if (!data->rev_base) {
+		pr_err("L%d: failed to map revision register\n", *cache_level);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	data->op_base = of_iomap(np, 2);
+	if (!data->op_base) {
+		pr_err("L%d: failed to map operation register\n", *cache_level);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	data->way_ctrl_base = data->ctrl_base + 0xc00;
+
+	if (*cache_level == 2) {
+		u32 revision = readl(data->rev_base + UNIPHIER_SSCID);
+		/*
+		 * The size of range operation is limited to (1 << 22) or less
+		 * for PH-sLD8 or older SoCs.
+		 */
+		if (revision <= 0x16)
+			data->range_op_max_size = (u32)1 << 22;
+
+		/*
+		 * Unfortunatly, the offset address of active way control base
+		 * varies from SoC to SoC.
+		 */
+		switch (revision) {
+		case 0x11:	/* sLD3 */
+			data->way_ctrl_base = data->ctrl_base + 0x870;
+			break;
+		case 0x12:	/* LD4 */
+		case 0x16:	/* sld8 */
+			data->way_ctrl_base = data->ctrl_base + 0x840;
+			break;
+		default:
+			break;
+		}
+	}
+
+	data->range_op_max_size -= data->line_size;
+
+	INIT_LIST_HEAD(&data->list);
+	list_add_tail(&data->list, &uniphier_cache_list); /* no mutex */
+
+	/*
+	 * OK, this level has been successfully initialized.  Look for the next
+	 * level cache.  Do not roll back even if the initialization of the
+	 * next level cache fails because we want to continue with available
+	 * cache levels.
+	 */
+	next_np = of_find_next_cache_node(np);
+	if (next_np) {
+		(*cache_level)++;
+		ret = __uniphier_cache_init(next_np, cache_level);
+	}
+	of_node_put(next_np);
+
+	return ret;
+err:
+	iounmap(data->op_base);
+	iounmap(data->rev_base);
+	iounmap(data->ctrl_base);
+	kfree(data);
+
+	return ret;
+}
+
+int __init uniphier_cache_init(void)
+{
+	struct device_node *np = NULL;
+	unsigned int cache_level;
+	int ret = 0;
+
+	/* look for level 2 cache */
+	while ((np = of_find_matching_node(np, uniphier_cache_match)))
+		if (!of_property_read_u32(np, "cache-level", &cache_level) &&
+		    cache_level == 2)
+			break;
+
+	if (!np)
+		return -ENODEV;
+
+	ret = __uniphier_cache_init(np, &cache_level);
+	of_node_put(np);
+
+	if (ret) {
+		/*
+		 * Error out iif L2 initialization fails.  Continue with any
+		 * error on L3 or outer because they are optional.
+		 */
+		if (cache_level == 2) {
+			pr_err("failed to initialize L2 cache\n");
+			return ret;
+		}
+
+		cache_level--;
+		ret = 0;
+	}
+
+	outer_cache.inv_range = uniphier_cache_inv_range;
+	outer_cache.clean_range = uniphier_cache_clean_range;
+	outer_cache.flush_range = uniphier_cache_flush_range;
+	outer_cache.flush_all = uniphier_cache_flush_all;
+	outer_cache.disable = uniphier_cache_disable;
+	outer_cache.sync = uniphier_cache_sync;
+
+	uniphier_cache_enable();
+
+	pr_info("enabled outer cache (cache level: %d)\n", cache_level);
+
+	return ret;
+}
diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S
new file mode 100644
index 0000000..91e3adf
--- /dev/null
+++ b/arch/arm/mm/cache-v4.S
@@ -0,0 +1,150 @@
+/*
+ *  linux/arch/arm/mm/cache-v4.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4_flush_icache_all)
+	ret	lr
+ENDPROC(v4_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ *
+ *	- mm	- mm_struct describing address space
+ */
+ENTRY(v4_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4_flush_kern_cache_all)
+#ifdef CONFIG_CPU_CP15
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
+	ret	lr
+#else
+	/* FALLTHROUGH */
+#endif
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4_flush_user_cache_range)
+#ifdef CONFIG_CPU_CP15
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ flush ID cache
+	ret	lr
+#else
+	/* FALLTHROUGH */
+#endif
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_coherent_user_range)
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v4_flush_kern_dcache_area)
+	/* FALLTHROUGH */
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4_dma_flush_range)
+#ifdef CONFIG_CPU_CP15
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ flush ID cache
+#endif
+	ret	lr
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4_dma_unmap_area)
+	teq	r2, #DMA_TO_DEVICE
+	bne	v4_dma_flush_range
+	/* FALLTHROUGH */
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4_dma_map_area)
+	ret	lr
+ENDPROC(v4_dma_unmap_area)
+ENDPROC(v4_dma_map_area)
+
+	.globl	v4_flush_kern_cache_louis
+	.equ	v4_flush_kern_cache_louis, v4_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v4
diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S
new file mode 100644
index 0000000..a5084ec
--- /dev/null
+++ b/arch/arm/mm/cache-v4wb.S
@@ -0,0 +1,262 @@
+/*
+ *  linux/arch/arm/mm/cache-v4wb.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The total size of the data cache.
+ */
+#if defined(CONFIG_CPU_SA110)
+# define CACHE_DSIZE	16384
+#elif defined(CONFIG_CPU_SA1100)
+# define CACHE_DSIZE	8192
+#else
+# error Unknown cache size
+#endif
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ *
+ *  Size  Clean (ticks) Dirty (ticks)
+ *   4096   21  20  21    53  55  54
+ *   8192   40  41  40   106 100 102
+ *  16384   77  77  76   140 140 138
+ *  32768  150 149 150   214 216 212 <---
+ *  65536  296 297 296   351 358 361
+ * 131072  591 591 591   656 657 651
+ *  Whole  132 136 132   221 217 207 <---
+ */
+#define CACHE_DLIMIT	(CACHE_DSIZE * 4)
+
+	.data
+	.align	2
+flush_base:
+	.long	FLUSH_BASE
+	.text
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4wb_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(v4wb_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(v4wb_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4wb_flush_kern_cache_all)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+__flush_whole_cache:
+	ldr	r3, =flush_base
+	ldr	r1, [r3, #0]
+	eor	r1, r1, #CACHE_DSIZE
+	str	r1, [r3, #0]
+	add	r2, r1, #CACHE_DSIZE
+1:	ldr	r3, [r1], #32
+	cmp	r1, r2
+	blo	1b
+#ifdef FLUSH_BASE_MINICACHE
+	add	r2, r2, #FLUSH_BASE_MINICACHE - FLUSH_BASE
+	sub	r1, r2, #512			@ only 512 bytes
+1:	ldr	r3, [r1], #32
+	cmp	r1, r2
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4wb_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	tst	r2, #VM_EXEC			@ executable region?
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+
+	cmp	r3, #CACHE_DLIMIT		@ total size >= limit?
+	bhs	__flush_whole_cache		@ flush whole D cache
+
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v4wb_flush_kern_dcache_area)
+	add	r1, r0, r1
+	/* fall through */
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_coherent_kern_range)
+	/* fall through */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wb_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+v4wb_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean (write back) the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+v4wb_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	This is actually the same as v4wb_coherent_kern_range()
+ */
+	.globl	v4wb_dma_flush_range
+	.set	v4wb_dma_flush_range, v4wb_coherent_kern_range
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4wb_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	v4wb_dma_clean_range
+	bcs	v4wb_dma_inv_range
+	b	v4wb_dma_flush_range
+ENDPROC(v4wb_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4wb_dma_unmap_area)
+	ret	lr
+ENDPROC(v4wb_dma_unmap_area)
+
+	.globl	v4wb_flush_kern_cache_louis
+	.equ	v4wb_flush_kern_cache_louis, v4wb_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v4wb
diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S
new file mode 100644
index 0000000..a0982ce
--- /dev/null
+++ b/arch/arm/mm/cache-v4wt.S
@@ -0,0 +1,206 @@
+/*
+ *  linux/arch/arm/mm/cache-v4wt.S
+ *
+ *  Copyright (C) 1997-2002 Russell king
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARMv4 write through cache operations support.
+ *
+ *  We assume that the write buffer is not enabled.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	8
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ *
+ * *** This needs benchmarking
+ */
+#define CACHE_DLIMIT	16384
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(v4wt_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(v4wt_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(v4wt_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(v4wt_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (inclusive, page aligned)
+ *	- end	- end address (exclusive, page aligned)
+ *	- flags	- vma_area_struct flags describing address space
+ */
+ENTRY(v4wt_flush_user_cache_range)
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_coherent_kern_range)
+	/* FALLTRHOUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(v4wt_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v4wt_flush_kern_dcache_area)
+	mov	r2, #0
+	mcr	p15, 0, r2, c7, c5, 0		@ invalidate I cache
+	add	r1, r0, r1
+	/* fallthrough */
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+v4wt_dma_inv_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+	.globl	v4wt_dma_flush_range
+	.equ	v4wt_dma_flush_range, v4wt_dma_inv_range
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4wt_dma_unmap_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v4wt_dma_inv_range
+	/* FALLTHROUGH */
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v4wt_dma_map_area)
+	ret	lr
+ENDPROC(v4wt_dma_unmap_area)
+ENDPROC(v4wt_dma_map_area)
+
+	.globl	v4wt_flush_kern_cache_louis
+	.equ	v4wt_flush_kern_cache_louis, v4wt_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v4wt
diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S
new file mode 100644
index 0000000..2465995
--- /dev/null
+++ b/arch/arm/mm/cache-v6.S
@@ -0,0 +1,335 @@
+/*
+ *  linux/arch/arm/mm/cache-v6.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv6 processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
+
+#include "proc-macros.S"
+
+#define HARVARD_CACHE
+#define CACHE_LINE_SIZE		32
+#define D_CACHE_LINE_SIZE	32
+#define BTB_FLUSH_SIZE		8
+
+/*
+ *	v6_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	ARM1136 erratum 411920 - Invalidate Instruction Cache operation can fail.
+ *	This erratum is present in 1136, 1156 and 1176. It does not affect the
+ *	MPCore.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ *	r1 - corrupted
+ */
+ENTRY(v6_flush_icache_all)
+	mov	r0, #0
+#ifdef CONFIG_ARM_ERRATA_411920
+	mrs	r1, cpsr
+	cpsid	ifa				@ disable interrupts
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate entire I-cache
+	msr	cpsr_cx, r1			@ restore interrupts
+	.rept	11				@ ARM Ltd recommends at least
+	nop					@ 11 NOPs
+	.endr
+#else
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I-cache
+#endif
+	ret	lr
+ENDPROC(v6_flush_icache_all)
+
+/*
+ *	v6_flush_cache_all()
+ *
+ *	Flush the entire cache.
+ *
+ *	It is assumed that:
+ */
+ENTRY(v6_flush_kern_cache_all)
+	mov	r0, #0
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 0		@ D cache clean+invalidate
+#ifndef CONFIG_ARM_ERRATA_411920
+	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
+#else
+	b	v6_flush_icache_all
+#endif
+#else
+	mcr	p15, 0, r0, c7, c15, 0		@ Cache clean+invalidate
+#endif
+	ret	lr
+
+/*
+ *	v6_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v6_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v6_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v6_flush_user_cache_range)
+	ret	lr
+
+/*
+ *	v6_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v6_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v6_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v6_coherent_user_range)
+ UNWIND(.fnstart		)
+#ifdef HARVARD_CACHE
+	bic	r0, r0, #CACHE_LINE_SIZE - 1
+1:
+ USER(	mcr	p15, 0, r0, c7, c10, 1	)	@ clean D line
+	add	r0, r0, #CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mov	r0, #0
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+#ifndef CONFIG_ARM_ERRATA_411920
+	mcr	p15, 0, r0, c7, c5, 0		@ I+BTB cache invalidate
+#else
+	b	v6_flush_icache_all
+#endif
+#else
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB
+#endif
+	ret	lr
+
+/*
+ * Fault handling for the cache operation above. If the virtual address in r0
+ * isn't mapped, fail with -EFAULT.
+ */
+9001:
+	mov	r0, #-EFAULT
+	ret	lr
+ UNWIND(.fnend		)
+ENDPROC(v6_coherent_user_range)
+ENDPROC(v6_coherent_kern_range)
+
+/*
+ *	v6_flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v6_flush_kern_dcache_area)
+	add	r1, r0, r1
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate unified line
+#endif	
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+#ifdef HARVARD_CACHE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4
+#endif
+	ret	lr
+
+
+/*
+ *	v6_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v6_dma_inv_range:
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldrb	r2, [r0]			@ read for ownership
+	strb	r2, [r0]			@ write for ownership
+#endif
+	tst	r0, #D_CACHE_LINE_SIZE - 1
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D line
+#else
+	mcrne	p15, 0, r0, c7, c11, 1		@ clean unified line
+#endif
+	tst	r1, #D_CACHE_LINE_SIZE - 1
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldrneb	r2, [r1, #-1]			@ read for ownership
+	strneb	r2, [r1, #-1]			@ write for ownership
+#endif
+	bic	r1, r1, #D_CACHE_LINE_SIZE - 1
+#ifdef HARVARD_CACHE
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcrne	p15, 0, r1, c7, c15, 1		@ clean & invalidate unified line
+#endif
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c7, 1		@ invalidate unified line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldrlo	r2, [r0]			@ read for ownership
+	strlo	r2, [r0]			@ write for ownership
+#endif
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	v6_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v6_dma_clean_range:
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldr	r2, [r0]			@ read for ownership
+#endif
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D line
+#else
+	mcr	p15, 0, r0, c7, c11, 1		@ clean unified line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	v6_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v6_dma_flush_range)
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldrb	r2, [r0]		@ read for ownership
+	strb	r2, [r0]		@ write for ownership
+#endif
+	bic	r0, r0, #D_CACHE_LINE_SIZE - 1
+1:
+#ifdef HARVARD_CACHE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line
+#else
+	mcr	p15, 0, r0, c7, c15, 1		@ clean & invalidate line
+#endif
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	cmp	r0, r1
+#ifdef CONFIG_DMA_CACHE_RWFO
+	ldrlob	r2, [r0]			@ read for ownership
+	strlob	r2, [r0]			@ write for ownership
+#endif
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v6_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_FROM_DEVICE
+	beq	v6_dma_inv_range
+#ifndef CONFIG_DMA_CACHE_RWFO
+	b	v6_dma_clean_range
+#else
+	teq	r2, #DMA_TO_DEVICE
+	beq	v6_dma_clean_range
+	b	v6_dma_flush_range
+#endif
+ENDPROC(v6_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v6_dma_unmap_area)
+#ifndef CONFIG_DMA_CACHE_RWFO
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v6_dma_inv_range
+#endif
+	ret	lr
+ENDPROC(v6_dma_unmap_area)
+
+	.globl	v6_flush_kern_cache_louis
+	.equ	v6_flush_kern_cache_louis, v6_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v6
diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S
new file mode 100644
index 0000000..2149b47
--- /dev/null
+++ b/arch/arm/mm/cache-v7.S
@@ -0,0 +1,471 @@
+/*
+ *  linux/arch/arm/mm/cache-v7.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2005 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7 processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
+#include <asm/hardware/cache-b15-rac.h>
+
+#include "proc-macros.S"
+
+/*
+ * The secondary kernel init calls v7_flush_dcache_all before it enables
+ * the L1; however, the L1 comes out of reset in an undefined state, so
+ * the clean + invalidate performed by v7_flush_dcache_all causes a bunch
+ * of cache lines with uninitialized data and uninitialized tags to get
+ * written out to memory, which does really unpleasant things to the main
+ * processor.  We fix this by performing an invalidate, rather than a
+ * clean + invalidate, before jumping into the kernel.
+ *
+ * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
+ * to be called for both secondary cores startup and primary core resume
+ * procedures.
+ */
+ENTRY(v7_invalidate_l1)
+       mov     r0, #0
+       mcr     p15, 2, r0, c0, c0, 0
+       mrc     p15, 1, r0, c0, c0, 0
+
+       movw    r1, #0x7fff
+       and     r2, r1, r0, lsr #13
+
+       movw    r1, #0x3ff
+
+       and     r3, r1, r0, lsr #3      @ NumWays - 1
+       add     r2, r2, #1              @ NumSets
+
+       and     r0, r0, #0x7
+       add     r0, r0, #4      @ SetShift
+
+       clz     r1, r3          @ WayShift
+       add     r4, r3, #1      @ NumWays
+1:     sub     r2, r2, #1      @ NumSets--
+       mov     r3, r4          @ Temp = NumWays
+2:     subs    r3, r3, #1      @ Temp--
+       mov     r5, r3, lsl r1
+       mov     r6, r2, lsl r0
+       orr     r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+       mcr     p15, 0, r5, c7, c6, 2
+       bgt     2b
+       cmp     r2, #0
+       bgt     1b
+       dsb     st
+       isb
+       ret     lr
+ENDPROC(v7_invalidate_l1)
+
+/*
+ *	v7_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ */
+ENTRY(v7_flush_icache_all)
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)		@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)		@ I+BTB cache invalidate
+	ret	lr
+ENDPROC(v7_flush_icache_all)
+
+ /*
+ *     v7_flush_dcache_louis()
+ *
+ *     Flush the D-cache up to the Level of Unification Inner Shareable
+ *
+ *     Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ */
+
+ENTRY(v7_flush_dcache_louis)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr, r0 = clidr
+ALT_SMP(mov	r3, r0, lsr #20)		@ move LoUIS into position
+ALT_UP(	mov	r3, r0, lsr #26)		@ move LoUU into position
+	ands	r3, r3, #7 << 1 		@ extract LoU*2 field from clidr
+	bne	start_flush_levels		@ LoU != 0, start flushing
+#ifdef CONFIG_ARM_ERRATA_643719
+ALT_SMP(mrc	p15, 0, r2, c0, c0, 0)		@ read main ID register
+ALT_UP(	ret	lr)				@ LoUU is zero, so nothing to do
+	movw	r1, #:lower16:(0x410fc090 >> 4)	@ ID of ARM Cortex A9 r0p?
+	movt	r1, #:upper16:(0x410fc090 >> 4)
+	teq	r1, r2, lsr #4			@ test for errata affected core and if so...
+	moveq	r3, #1 << 1			@   fix LoUIS value
+	beq	start_flush_levels		@   start flushing cache levels
+#endif
+	ret	lr
+ENDPROC(v7_flush_dcache_louis)
+
+/*
+ *	v7_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11 (r6 only in Thumb mode)
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v7_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	mrc	p15, 1, r0, c0, c0, 1		@ read clidr
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
+#endif
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	isb					@ isb to sych the new cssr&csidr
+	mrc	p15, 1, r1, c0, c0, 0		@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+ ARM(	orr	r11, r10, r4, lsl r5	)	@ factor way and cache number into r11
+ THUMB(	lsl	r6, r4, r5		)
+ THUMB(	orr	r11, r10, r6		)	@ factor way and cache number into r11
+ ARM(	orr	r11, r11, r9, lsl r2	)	@ factor index number into r11
+ THUMB(	lsl	r6, r9, r2		)
+ THUMB(	orr	r11, r11, r6		)	@ factor index number into r11
+	mcr	p15, 0, r11, c7, c14, 2		@ clean & invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ switch back to cache level 0
+	mcr	p15, 2, r10, c0, c0, 0		@ select current cache level in cssr
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7_flush_dcache_all)
+
+/*
+ *	v7_flush_cache_all()
+ *
+ *	Flush the entire cache system.
+ *  The data cache flush is now achieved using atomic clean / invalidates
+ *  working outwards from L1 cache. This is done using Set/Way based cache
+ *  maintenance instructions.
+ *  The instruction cache can still be invalidated back to the point of
+ *  unification in a single instruction.
+ *
+ */
+ENTRY(v7_flush_kern_cache_all)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	v7_flush_dcache_all
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	ret	lr
+ENDPROC(v7_flush_kern_cache_all)
+
+ /*
+ *     v7_flush_kern_cache_louis(void)
+ *
+ *     Flush the data cache up to Level of Unification Inner Shareable.
+ *     Invalidate the I-cache to the point of unification.
+ */
+ENTRY(v7_flush_kern_cache_louis)
+ ARM(	stmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	stmfd	sp!, {r4-r7, r9-r11, lr}	)
+	bl	v7_flush_dcache_louis
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 0)	@ invalidate I-cache inner shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 0)	@ I+BTB cache invalidate
+ ARM(	ldmfd	sp!, {r4-r5, r7, r9-r11, lr}	)
+ THUMB(	ldmfd	sp!, {r4-r7, r9-r11, lr}	)
+	ret	lr
+ENDPROC(v7_flush_kern_cache_louis)
+
+/*
+ *	v7_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v7_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v7_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v7_flush_user_cache_range)
+	ret	lr
+ENDPROC(v7_flush_user_cache_all)
+ENDPROC(v7_flush_user_cache_range)
+
+/*
+ *	v7_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v7_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7_coherent_user_range)
+ UNWIND(.fnstart		)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+	ALT_SMP(W(dsb))
+	ALT_UP(W(nop))
+#endif
+1:
+ USER(	mcr	p15, 0, r12, c7, c11, 1	)	@ clean D line to the point of unification
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
+	dsb	ishst
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+2:
+ USER(	mcr	p15, 0, r12, c7, c5, 1	)	@ invalidate I line
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+	mov	r0, #0
+	ALT_SMP(mcr	p15, 0, r0, c7, c1, 6)	@ invalidate BTB Inner Shareable
+	ALT_UP(mcr	p15, 0, r0, c7, c5, 6)	@ invalidate BTB
+	dsb	ishst
+	isb
+	ret	lr
+
+/*
+ * Fault handling for the cache operation above. If the virtual address in r0
+ * isn't mapped, fail with -EFAULT.
+ */
+9001:
+#ifdef CONFIG_ARM_ERRATA_775420
+	dsb
+#endif
+	mov	r0, #-EFAULT
+	ret	lr
+ UNWIND(.fnend		)
+ENDPROC(v7_coherent_kern_range)
+ENDPROC(v7_coherent_user_range)
+
+/*
+ *	v7_flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v7_flush_kern_dcache_area)
+	dcache_line_size r2, r3
+	add	r1, r0, r1
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+	ALT_SMP(W(dsb))
+	ALT_UP(W(nop))
+#endif
+1:
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D line / unified line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7_flush_kern_dcache_area)
+
+/*
+ *	v7_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7_dma_inv_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	tst	r0, r3
+	bic	r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+	ALT_SMP(W(dsb))
+	ALT_UP(W(nop))
+#endif
+	mcrne	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
+	addne	r0, r0, r2
+
+	tst	r1, r3
+	bic	r1, r1, r3
+	mcrne	p15, 0, r1, c7, c14, 1		@ clean & invalidate D / U line
+	cmp	r0, r1
+1:
+	mcrlo	p15, 0, r0, c7, c6, 1		@ invalidate D / U line
+	addlo	r0, r0, r2
+	cmplo	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7_dma_inv_range)
+
+/*
+ *	v7_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7_dma_clean_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+	ALT_SMP(W(dsb))
+	ALT_UP(W(nop))
+#endif
+1:
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7_dma_clean_range)
+
+/*
+ *	v7_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v7_dma_flush_range)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+#ifdef CONFIG_ARM_ERRATA_764369
+	ALT_SMP(W(dsb))
+	ALT_UP(W(nop))
+#endif
+1:
+	mcr	p15, 0, r0, c7, c14, 1		@ clean & invalidate D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7_dma_flush_range)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_FROM_DEVICE
+	beq	v7_dma_inv_range
+	b	v7_dma_clean_range
+ENDPROC(v7_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7_dma_unmap_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v7_dma_inv_range
+	ret	lr
+ENDPROC(v7_dma_unmap_area)
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v7
+
+	/* The Broadcom Brahma-B15 read-ahead cache requires some modifications
+	 * to the v7_cache_fns, we only override the ones we need
+	 */
+#ifndef CONFIG_CACHE_B15_RAC
+	globl_equ	b15_flush_kern_cache_all,	v7_flush_kern_cache_all
+#endif
+	globl_equ	b15_flush_icache_all,		v7_flush_icache_all
+	globl_equ	b15_flush_kern_cache_louis,	v7_flush_kern_cache_louis
+	globl_equ	b15_flush_user_cache_all,	v7_flush_user_cache_all
+	globl_equ	b15_flush_user_cache_range,	v7_flush_user_cache_range
+	globl_equ	b15_coherent_kern_range,	v7_coherent_kern_range
+	globl_equ	b15_coherent_user_range,	v7_coherent_user_range
+	globl_equ	b15_flush_kern_dcache_area,	v7_flush_kern_dcache_area
+
+	globl_equ	b15_dma_map_area,		v7_dma_map_area
+	globl_equ	b15_dma_unmap_area,		v7_dma_unmap_area
+	globl_equ	b15_dma_flush_range,		v7_dma_flush_range
+
+	define_cache_functions b15
diff --git a/arch/arm/mm/cache-v7m.S b/arch/arm/mm/cache-v7m.S
new file mode 100644
index 0000000..32aa2a2
--- /dev/null
+++ b/arch/arm/mm/cache-v7m.S
@@ -0,0 +1,457 @@
+/*
+ *  linux/arch/arm/mm/cache-v7m.S
+ *
+ *  Based on linux/arch/arm/mm/cache-v7.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2005 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7M processor support.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/errno.h>
+#include <asm/unwind.h>
+#include <asm/v7m.h>
+
+#include "proc-macros.S"
+
+/* Generic V7M read/write macros for memory mapped cache operations */
+.macro v7m_cache_read, rt, reg
+	movw	\rt, #:lower16:BASEADDR_V7M_SCB + \reg
+	movt	\rt, #:upper16:BASEADDR_V7M_SCB + \reg
+	ldr     \rt, [\rt]
+.endm
+
+.macro v7m_cacheop, rt, tmp, op, c = al
+	movw\c	\tmp, #:lower16:BASEADDR_V7M_SCB + \op
+	movt\c	\tmp, #:upper16:BASEADDR_V7M_SCB + \op
+	str\c	\rt, [\tmp]
+.endm
+
+
+.macro	read_ccsidr, rt
+	v7m_cache_read \rt, V7M_SCB_CCSIDR
+.endm
+
+.macro read_clidr, rt
+	v7m_cache_read \rt, V7M_SCB_CLIDR
+.endm
+
+.macro	write_csselr, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_CSSELR
+.endm
+
+/*
+ * dcisw: Invalidate data cache by set/way
+ */
+.macro dcisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCISW
+.endm
+
+/*
+ * dccisw: Clean and invalidate data cache by set/way
+ */
+.macro dccisw, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCISW
+.endm
+
+/*
+ * dccimvac: Clean and invalidate data cache line by MVA to PoC.
+ */
+.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+.macro dccimvac\c, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCIMVAC, \c
+.endm
+.endr
+
+/*
+ * dcimvac: Invalidate data cache line by MVA to PoC
+ */
+.irp    c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
+.macro dcimvac\c, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCIMVAC, \c
+.endm
+.endr
+
+/*
+ * dccmvau: Clean data cache line by MVA to PoU
+ */
+.macro dccmvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAU
+.endm
+
+/*
+ * dccmvac: Clean data cache line by MVA to PoC
+ */
+.macro dccmvac,  rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_DCCMVAC
+.endm
+
+/*
+ * icimvau: Invalidate instruction caches by MVA to PoU
+ */
+.macro icimvau, rt, tmp
+	v7m_cacheop \rt, \tmp, V7M_SCB_ICIMVAU
+.endm
+
+/*
+ * Invalidate the icache, inner shareable if SMP, invalidate BTB for UP.
+ * rt data ignored by ICIALLU(IS), so can be used for the address
+ */
+.macro invalidate_icache, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_ICIALLU
+	mov \rt, #0
+.endm
+
+/*
+ * Invalidate the BTB, inner shareable if SMP.
+ * rt data ignored by BPIALL, so it can be used for the address
+ */
+.macro invalidate_bp, rt
+	v7m_cacheop \rt, \rt, V7M_SCB_BPIALL
+	mov \rt, #0
+.endm
+
+ENTRY(v7m_invalidate_l1)
+	mov	r0, #0
+
+	write_csselr r0, r1
+	read_ccsidr r0
+
+	movw	r1, #0x7fff
+	and	r2, r1, r0, lsr #13
+
+	movw	r1, #0x3ff
+
+	and	r3, r1, r0, lsr #3      @ NumWays - 1
+	add	r2, r2, #1              @ NumSets
+
+	and	r0, r0, #0x7
+	add	r0, r0, #4      @ SetShift
+
+	clz	r1, r3          @ WayShift
+	add	r4, r3, #1      @ NumWays
+1:	sub	r2, r2, #1      @ NumSets--
+	mov	r3, r4          @ Temp = NumWays
+2:	subs	r3, r3, #1      @ Temp--
+	mov	r5, r3, lsl r1
+	mov	r6, r2, lsl r0
+	orr	r5, r5, r6      @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+	dcisw	r5, r6
+	bgt	2b
+	cmp	r2, #0
+	bgt	1b
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_invalidate_l1)
+
+/*
+ *	v7m_flush_icache_all()
+ *
+ *	Flush the whole I-cache.
+ *
+ *	Registers:
+ *	r0 - set to 0
+ */
+ENTRY(v7m_flush_icache_all)
+	invalidate_icache r0
+	ret	lr
+ENDPROC(v7m_flush_icache_all)
+
+/*
+ *	v7m_flush_dcache_all()
+ *
+ *	Flush the whole D-cache.
+ *
+ *	Corrupted registers: r0-r7, r9-r11
+ */
+ENTRY(v7m_flush_dcache_all)
+	dmb					@ ensure ordering with previous memory accesses
+	read_clidr r0
+	mov	r3, r0, lsr #23			@ move LoC into position
+	ands	r3, r3, #7 << 1			@ extract LoC*2 from clidr
+	beq	finished			@ if loc is 0, then no need to clean
+start_flush_levels:
+	mov	r10, #0				@ start clean at cache level 0
+flush_levels:
+	add	r2, r10, r10, lsr #1		@ work out 3x current cache level
+	mov	r1, r0, lsr r2			@ extract cache type bits from clidr
+	and	r1, r1, #7			@ mask of the bits for current cache only
+	cmp	r1, #2				@ see what cache we have at this level
+	blt	skip				@ skip if no cache, or just i-cache
+#ifdef CONFIG_PREEMPT
+	save_and_disable_irqs_notrace r9	@ make cssr&csidr read atomic
+#endif
+	write_csselr r10, r1			@ set current cache level
+	isb					@ isb to sych the new cssr&csidr
+	read_ccsidr r1				@ read the new csidr
+#ifdef CONFIG_PREEMPT
+	restore_irqs_notrace r9
+#endif
+	and	r2, r1, #7			@ extract the length of the cache lines
+	add	r2, r2, #4			@ add 4 (line length offset)
+	movw	r4, #0x3ff
+	ands	r4, r4, r1, lsr #3		@ find maximum number on the way size
+	clz	r5, r4				@ find bit position of way size increment
+	movw	r7, #0x7fff
+	ands	r7, r7, r1, lsr #13		@ extract max number of the index size
+loop1:
+	mov	r9, r7				@ create working copy of max index
+loop2:
+	lsl	r6, r4, r5
+	orr	r11, r10, r6			@ factor way and cache number into r11
+	lsl	r6, r9, r2
+	orr	r11, r11, r6			@ factor index number into r11
+	dccisw	r11, r6				@ clean/invalidate by set/way
+	subs	r9, r9, #1			@ decrement the index
+	bge	loop2
+	subs	r4, r4, #1			@ decrement the way
+	bge	loop1
+skip:
+	add	r10, r10, #2			@ increment cache number
+	cmp	r3, r10
+	bgt	flush_levels
+finished:
+	mov	r10, #0				@ switch back to cache level 0
+	write_csselr r10, r3			@ select current cache level in cssr
+	dsb	st
+	isb
+	ret	lr
+ENDPROC(v7m_flush_dcache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush the entire cache system.
+ *  The data cache flush is now achieved using atomic clean / invalidates
+ *  working outwards from L1 cache. This is done using Set/Way based cache
+ *  maintenance instructions.
+ *  The instruction cache can still be invalidated back to the point of
+ *  unification in a single instruction.
+ *
+ */
+ENTRY(v7m_flush_kern_cache_all)
+	stmfd	sp!, {r4-r7, r9-r11, lr}
+	bl	v7m_flush_dcache_all
+	invalidate_icache r0
+	ldmfd	sp!, {r4-r7, r9-r11, lr}
+	ret	lr
+ENDPROC(v7m_flush_kern_cache_all)
+
+/*
+ *	v7m_flush_cache_all()
+ *
+ *	Flush all TLB entries in a particular address space
+ *
+ *	- mm    - mm_struct describing address space
+ */
+ENTRY(v7m_flush_user_cache_all)
+	/*FALLTHROUGH*/
+
+/*
+ *	v7m_flush_cache_range(start, end, flags)
+ *
+ *	Flush a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- flags	- vm_area_struct flags describing address space
+ *
+ *	It is assumed that:
+ *	- we have a VIPT cache.
+ */
+ENTRY(v7m_flush_user_cache_range)
+	ret	lr
+ENDPROC(v7m_flush_user_cache_all)
+ENDPROC(v7m_flush_user_cache_range)
+
+/*
+ *	v7m_coherent_kern_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	v7m_coherent_user_range(start,end)
+ *
+ *	Ensure that the I and D caches are coherent within specified
+ *	region.  This is typically used when code has been written to
+ *	a memory region, and will be executed.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ *
+ *	It is assumed that:
+ *	- the Icache does not read data from the write buffer
+ */
+ENTRY(v7m_coherent_user_range)
+ UNWIND(.fnstart		)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+1:
+/*
+ * We use open coded version of dccmvau otherwise USER() would
+ * point at movw instruction.
+ */
+	dccmvau	r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	1b
+	dsb	ishst
+	icache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r12, r0, r3
+2:
+	icimvau r12, r3
+	add	r12, r12, r2
+	cmp	r12, r1
+	blo	2b
+	invalidate_bp r0
+	dsb	ishst
+	isb
+	ret	lr
+ UNWIND(.fnend		)
+ENDPROC(v7m_coherent_kern_range)
+ENDPROC(v7m_coherent_user_range)
+
+/*
+ *	v7m_flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure that the data held in the page kaddr is written back
+ *	to the page in question.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(v7m_flush_kern_dcache_area)
+	dcache_line_size r2, r3
+	add	r1, r0, r1
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3		@ clean & invalidate D line / unified line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_flush_kern_dcache_area)
+
+/*
+ *	v7m_dma_inv_range(start,end)
+ *
+ *	Invalidate the data cache within the specified region; we will
+ *	be performing a DMA operation in this region and we want to
+ *	purge old data in the cache.
+ *
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_inv_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	tst	r0, r3
+	bic	r0, r0, r3
+	dccimvacne r0, r3
+	addne	r0, r0, r2
+	subne	r3, r2, #1	@ restore r3, corrupted by v7m's dccimvac
+	tst	r1, r3
+	bic	r1, r1, r3
+	dccimvacne r1, r3
+	cmp	r0, r1
+1:
+	dcimvaclo r0, r3
+	addlo	r0, r0, r2
+	cmplo	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_inv_range)
+
+/*
+ *	v7m_dma_clean_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+v7m_dma_clean_range:
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccmvac r0, r3			@ clean D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_clean_range)
+
+/*
+ *	v7m_dma_flush_range(start,end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(v7m_dma_flush_range)
+	dcache_line_size r2, r3
+	sub	r3, r2, #1
+	bic	r0, r0, r3
+1:
+	dccimvac r0, r3			 @ clean & invalidate D / U line
+	add	r0, r0, r2
+	cmp	r0, r1
+	blo	1b
+	dsb	st
+	ret	lr
+ENDPROC(v7m_dma_flush_range)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_FROM_DEVICE
+	beq	v7m_dma_inv_range
+	b	v7m_dma_clean_range
+ENDPROC(v7m_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(v7m_dma_unmap_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	bne	v7m_dma_inv_range
+	ret	lr
+ENDPROC(v7m_dma_unmap_area)
+
+	.globl	v7m_flush_kern_cache_louis
+	.equ	v7m_flush_kern_cache_louis, v7m_flush_kern_cache_all
+
+	__INITDATA
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions v7m
diff --git a/arch/arm/mm/cache-xsc3l2.c b/arch/arm/mm/cache-xsc3l2.c
new file mode 100644
index 0000000..6c3edeb
--- /dev/null
+++ b/arch/arm/mm/cache-xsc3l2.c
@@ -0,0 +1,220 @@
+/*
+ * arch/arm/mm/cache-xsc3l2.c - XScale3 L2 cache controller support
+ *
+ * Copyright (C) 2007 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/cacheflush.h>
+
+#define CR_L2	(1 << 26)
+
+#define CACHE_LINE_SIZE		32
+#define CACHE_LINE_SHIFT	5
+#define CACHE_WAY_PER_SET	8
+
+#define CACHE_WAY_SIZE(l2ctype)	(8192 << (((l2ctype) >> 8) & 0xf))
+#define CACHE_SET_SIZE(l2ctype)	(CACHE_WAY_SIZE(l2ctype) >> CACHE_LINE_SHIFT)
+
+static inline int xsc3_l2_present(void)
+{
+	unsigned long l2ctype;
+
+	__asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+	return !!(l2ctype & 0xf8);
+}
+
+static inline void xsc3_l2_clean_mva(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c7, c11, 1" : : "r" (addr));
+}
+
+static inline void xsc3_l2_inv_mva(unsigned long addr)
+{
+	__asm__("mcr p15, 1, %0, c7, c7, 1" : : "r" (addr));
+}
+
+static inline void xsc3_l2_inv_all(void)
+{
+	unsigned long l2ctype, set_way;
+	int set, way;
+
+	__asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+	for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) {
+		for (way = 0; way < CACHE_WAY_PER_SET; way++) {
+			set_way = (way << 29) | (set << 5);
+			__asm__("mcr p15, 1, %0, c7, c11, 2" : : "r"(set_way));
+		}
+	}
+
+	dsb();
+}
+
+static inline void l2_unmap_va(unsigned long va)
+{
+#ifdef CONFIG_HIGHMEM
+	if (va != -1)
+		kunmap_atomic((void *)va);
+#endif
+}
+
+static inline unsigned long l2_map_va(unsigned long pa, unsigned long prev_va)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long va = prev_va & PAGE_MASK;
+	unsigned long pa_offset = pa << (32 - PAGE_SHIFT);
+	if (unlikely(pa_offset < (prev_va << (32 - PAGE_SHIFT)))) {
+		/*
+		 * Switching to a new page.  Because cache ops are
+		 * using virtual addresses only, we must put a mapping
+		 * in place for it.
+		 */
+		l2_unmap_va(prev_va);
+		va = (unsigned long)kmap_atomic_pfn(pa >> PAGE_SHIFT);
+	}
+	return va + (pa_offset >> (32 - PAGE_SHIFT));
+#else
+	return __phys_to_virt(pa);
+#endif
+}
+
+static void xsc3_l2_inv_range(unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+
+	if (start == 0 && end == -1ul) {
+		xsc3_l2_inv_all();
+		return;
+	}
+
+	vaddr = -1;  /* to force the first mapping */
+
+	/*
+	 * Clean and invalidate partial first cache line.
+	 */
+	if (start & (CACHE_LINE_SIZE - 1)) {
+		vaddr = l2_map_va(start & ~(CACHE_LINE_SIZE - 1), vaddr);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
+		start = (start | (CACHE_LINE_SIZE - 1)) + 1;
+	}
+
+	/*
+	 * Invalidate all full cache lines between 'start' and 'end'.
+	 */
+	while (start < (end & ~(CACHE_LINE_SIZE - 1))) {
+		vaddr = l2_map_va(start, vaddr);
+		xsc3_l2_inv_mva(vaddr);
+		start += CACHE_LINE_SIZE;
+	}
+
+	/*
+	 * Clean and invalidate partial last cache line.
+	 */
+	if (start < end) {
+		vaddr = l2_map_va(start, vaddr);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
+	}
+
+	l2_unmap_va(vaddr);
+
+	dsb();
+}
+
+static void xsc3_l2_clean_range(unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+
+	vaddr = -1;  /* to force the first mapping */
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	while (start < end) {
+		vaddr = l2_map_va(start, vaddr);
+		xsc3_l2_clean_mva(vaddr);
+		start += CACHE_LINE_SIZE;
+	}
+
+	l2_unmap_va(vaddr);
+
+	dsb();
+}
+
+/*
+ * optimize L2 flush all operation by set/way format
+ */
+static inline void xsc3_l2_flush_all(void)
+{
+	unsigned long l2ctype, set_way;
+	int set, way;
+
+	__asm__("mrc p15, 1, %0, c0, c0, 1" : "=r" (l2ctype));
+
+	for (set = 0; set < CACHE_SET_SIZE(l2ctype); set++) {
+		for (way = 0; way < CACHE_WAY_PER_SET; way++) {
+			set_way = (way << 29) | (set << 5);
+			__asm__("mcr p15, 1, %0, c7, c15, 2" : : "r"(set_way));
+		}
+	}
+
+	dsb();
+}
+
+static void xsc3_l2_flush_range(unsigned long start, unsigned long end)
+{
+	unsigned long vaddr;
+
+	if (start == 0 && end == -1ul) {
+		xsc3_l2_flush_all();
+		return;
+	}
+
+	vaddr = -1;  /* to force the first mapping */
+
+	start &= ~(CACHE_LINE_SIZE - 1);
+	while (start < end) {
+		vaddr = l2_map_va(start, vaddr);
+		xsc3_l2_clean_mva(vaddr);
+		xsc3_l2_inv_mva(vaddr);
+		start += CACHE_LINE_SIZE;
+	}
+
+	l2_unmap_va(vaddr);
+
+	dsb();
+}
+
+static int __init xsc3_l2_init(void)
+{
+	if (!cpu_is_xsc3() || !xsc3_l2_present())
+		return 0;
+
+	if (get_cr() & CR_L2) {
+		pr_info("XScale3 L2 cache enabled.\n");
+		xsc3_l2_inv_all();
+
+		outer_cache.inv_range = xsc3_l2_inv_range;
+		outer_cache.clean_range = xsc3_l2_clean_range;
+		outer_cache.flush_range = xsc3_l2_flush_range;
+	}
+
+	return 0;
+}
+core_initcall(xsc3_l2_init);
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
new file mode 100644
index 0000000..c8c8b9e
--- /dev/null
+++ b/arch/arm/mm/context.c
@@ -0,0 +1,280 @@
+/*
+ *  linux/arch/arm/mm/context.c
+ *
+ *  Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ *  Copyright (C) 2012 ARM Limited
+ *
+ *  Author: Will Deacon <will.deacon@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+
+#include <asm/mmu_context.h>
+#include <asm/smp_plat.h>
+#include <asm/thread_notify.h>
+#include <asm/tlbflush.h>
+#include <asm/proc-fns.h>
+
+/*
+ * On ARMv6, we have the following structure in the Context ID:
+ *
+ * 31                         7          0
+ * +-------------------------+-----------+
+ * |      process ID         |   ASID    |
+ * +-------------------------+-----------+
+ * |              context ID             |
+ * +-------------------------------------+
+ *
+ * The ASID is used to tag entries in the CPU caches and TLBs.
+ * The context ID is used by debuggers and trace logic, and
+ * should be unique within all running processes.
+ *
+ * In big endian operation, the two 32 bit words are swapped if accessed
+ * by non-64-bit operations.
+ */
+#define ASID_FIRST_VERSION	(1ULL << ASID_BITS)
+#define NUM_USER_ASIDS		ASID_FIRST_VERSION
+
+static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
+static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION);
+static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS);
+
+static DEFINE_PER_CPU(atomic64_t, active_asids);
+static DEFINE_PER_CPU(u64, reserved_asids);
+static cpumask_t tlb_flush_pending;
+
+#ifdef CONFIG_ARM_ERRATA_798181
+void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
+			     cpumask_t *mask)
+{
+	int cpu;
+	unsigned long flags;
+	u64 context_id, asid;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	context_id = mm->context.id.counter;
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		/*
+		 * We only need to send an IPI if the other CPUs are
+		 * running the same ASID as the one being invalidated.
+		 */
+		asid = per_cpu(active_asids, cpu).counter;
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, cpu);
+		if (context_id == asid)
+			cpumask_set_cpu(cpu, mask);
+	}
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+}
+#endif
+
+#ifdef CONFIG_ARM_LPAE
+/*
+ * With LPAE, the ASID and page tables are updated atomicly, so there is
+ * no need for a reserved set of tables (the active ASID tracking prevents
+ * any issues across a rollover).
+ */
+#define cpu_set_reserved_ttbr0()
+#else
+static void cpu_set_reserved_ttbr0(void)
+{
+	u32 ttb;
+	/*
+	 * Copy TTBR1 into TTBR0.
+	 * This points at swapper_pg_dir, which contains only global
+	 * entries so any speculative walks are perfectly safe.
+	 */
+	asm volatile(
+	"	mrc	p15, 0, %0, c2, c0, 1		@ read TTBR1\n"
+	"	mcr	p15, 0, %0, c2, c0, 0		@ set TTBR0\n"
+	: "=r" (ttb));
+	isb();
+}
+#endif
+
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+static int contextidr_notifier(struct notifier_block *unused, unsigned long cmd,
+			       void *t)
+{
+	u32 contextidr;
+	pid_t pid;
+	struct thread_info *thread = t;
+
+	if (cmd != THREAD_NOTIFY_SWITCH)
+		return NOTIFY_DONE;
+
+	pid = task_pid_nr(thread->task) << ASID_BITS;
+	asm volatile(
+	"	mrc	p15, 0, %0, c13, c0, 1\n"
+	"	and	%0, %0, %2\n"
+	"	orr	%0, %0, %1\n"
+	"	mcr	p15, 0, %0, c13, c0, 1\n"
+	: "=r" (contextidr), "+r" (pid)
+	: "I" (~ASID_MASK));
+	isb();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block contextidr_notifier_block = {
+	.notifier_call = contextidr_notifier,
+};
+
+static int __init contextidr_notifier_init(void)
+{
+	return thread_register_notifier(&contextidr_notifier_block);
+}
+arch_initcall(contextidr_notifier_init);
+#endif
+
+static void flush_context(unsigned int cpu)
+{
+	int i;
+	u64 asid;
+
+	/* Update the list of reserved ASIDs and the ASID bitmap. */
+	bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
+	for_each_possible_cpu(i) {
+		asid = atomic64_xchg(&per_cpu(active_asids, i), 0);
+		/*
+		 * If this CPU has already been through a
+		 * rollover, but hasn't run another task in
+		 * the meantime, we must preserve its reserved
+		 * ASID, as this is the only trace we have of
+		 * the process it is still running.
+		 */
+		if (asid == 0)
+			asid = per_cpu(reserved_asids, i);
+		__set_bit(asid & ~ASID_MASK, asid_map);
+		per_cpu(reserved_asids, i) = asid;
+	}
+
+	/* Queue a TLB invalidate and flush the I-cache if necessary. */
+	cpumask_setall(&tlb_flush_pending);
+
+	if (icache_is_vivt_asid_tagged())
+		__flush_icache_all();
+}
+
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
+{
+	int cpu;
+	bool hit = false;
+
+	/*
+	 * Iterate over the set of reserved ASIDs looking for a match.
+	 * If we find one, then we can update our mm to use newasid
+	 * (i.e. the same ASID in the current generation) but we can't
+	 * exit the loop early, since we need to ensure that all copies
+	 * of the old ASID are updated to reflect the mm. Failure to do
+	 * so could result in us missing the reserved ASID in a future
+	 * generation.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(reserved_asids, cpu) == asid) {
+			hit = true;
+			per_cpu(reserved_asids, cpu) = newasid;
+		}
+	}
+
+	return hit;
+}
+
+static u64 new_context(struct mm_struct *mm, unsigned int cpu)
+{
+	static u32 cur_idx = 1;
+	u64 asid = atomic64_read(&mm->context.id);
+	u64 generation = atomic64_read(&asid_generation);
+
+	if (asid != 0) {
+		u64 newasid = generation | (asid & ~ASID_MASK);
+
+		/*
+		 * If our current ASID was active during a rollover, we
+		 * can continue to use it and this was just a false alarm.
+		 */
+		if (check_update_reserved_asid(asid, newasid))
+			return newasid;
+
+		/*
+		 * We had a valid ASID in a previous life, so try to re-use
+		 * it if possible.,
+		 */
+		asid &= ~ASID_MASK;
+		if (!__test_and_set_bit(asid, asid_map))
+			return newasid;
+	}
+
+	/*
+	 * Allocate a free ASID. If we can't find one, take a note of the
+	 * currently active ASIDs and mark the TLBs as requiring flushes.
+	 * We always count from ASID #1, as we reserve ASID #0 to switch
+	 * via TTBR0 and to avoid speculative page table walks from hitting
+	 * in any partial walk caches, which could be populated from
+	 * overlapping level-1 descriptors used to map both the module
+	 * area and the userspace stack.
+	 */
+	asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
+	if (asid == NUM_USER_ASIDS) {
+		generation = atomic64_add_return(ASID_FIRST_VERSION,
+						 &asid_generation);
+		flush_context(cpu);
+		asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, 1);
+	}
+
+	__set_bit(asid, asid_map);
+	cur_idx = asid;
+	cpumask_clear(mm_cpumask(mm));
+	return asid | generation;
+}
+
+void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
+{
+	unsigned long flags;
+	unsigned int cpu = smp_processor_id();
+	u64 asid;
+
+	if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
+		__check_vmalloc_seq(mm);
+
+	/*
+	 * We cannot update the pgd and the ASID atomicly with classic
+	 * MMU, so switch exclusively to global mappings to avoid
+	 * speculative page table walking with the wrong TTBR.
+	 */
+	cpu_set_reserved_ttbr0();
+
+	asid = atomic64_read(&mm->context.id);
+	if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
+	    && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
+		goto switch_mm_fastpath;
+
+	raw_spin_lock_irqsave(&cpu_asid_lock, flags);
+	/* Check that our ASID belongs to the current generation. */
+	asid = atomic64_read(&mm->context.id);
+	if ((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) {
+		asid = new_context(mm, cpu);
+		atomic64_set(&mm->context.id, asid);
+	}
+
+	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
+		local_flush_bp_all();
+		local_flush_tlb_all();
+	}
+
+	atomic64_set(&per_cpu(active_asids, cpu), asid);
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
+
+switch_mm_fastpath:
+	cpu_switch_mm(mm->pgd, mm);
+}
diff --git a/arch/arm/mm/copypage-fa.c b/arch/arm/mm/copypage-fa.c
new file mode 100644
index 0000000..d130a5e
--- /dev/null
+++ b/arch/arm/mm/copypage-fa.c
@@ -0,0 +1,86 @@
+/*
+ *  linux/arch/arm/lib/copypage-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on copypage-v4wb.S:
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * Faraday optimised copy_user_page
+ */
+static void __naked
+fa_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r2, %0				@ 1\n\
+1:	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	r0, r0, #16			@ 1\n\
+	subs	r2, r2, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r2, c7, c10, 4		@ 1   drain WB\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "I" (PAGE_SIZE / 32));
+}
+
+void fa_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to);
+	kfrom = kmap_atomic(from);
+	fa_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom);
+	kunmap_atomic(kto);
+}
+
+/*
+ * Faraday optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void fa_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	stmia	%0, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ 1   clean and invalidate D line\n\
+	add	%0, %0, #16			@ 1\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns fa_user_fns __initdata = {
+	.cpu_clear_user_highpage = fa_clear_user_highpage,
+	.cpu_copy_user_highpage	= fa_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-feroceon.c b/arch/arm/mm/copypage-feroceon.c
new file mode 100644
index 0000000..49ee0c1
--- /dev/null
+++ b/arch/arm/mm/copypage-feroceon.c
@@ -0,0 +1,112 @@
+/*
+ *  linux/arch/arm/mm/copypage-feroceon.S
+ *
+ *  Copyright (C) 2008 Marvell Semiconductors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles copy_user_highpage and clear_user_page on Feroceon
+ * more optimally than the generic implementations.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+static void __naked
+feroceon_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4-r9, lr}		\n\
+	mov	ip, %2				\n\
+1:	mov	lr, r1				\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	pld	[lr, #32]			\n\
+	pld	[lr, #64]			\n\
+	pld	[lr, #96]			\n\
+	pld	[lr, #128]			\n\
+	pld	[lr, #160]			\n\
+	pld	[lr, #192]			\n\
+	pld	[lr, #224]			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	ldmia	r1!, {r2 - r9}			\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	stmia	r0, {r2 - r9}			\n\
+	subs	ip, ip, #(32 * 8)		\n\
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	r0, r0, #32			\n\
+	bne	1b				\n\
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB\n\
+	ldmfd	sp!, {r4-r9, pc}"
+	:
+	: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE));
+}
+
+void feroceon_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to);
+	kfrom = kmap_atomic(from);
+	flush_cache_page(vma, vaddr, page_to_pfn(from));
+	feroceon_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom);
+	kunmap_atomic(kto);
+}
+
+void feroceon_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile ("\
+	mov	r1, %2				\n\
+	mov	r2, #0				\n\
+	mov	r3, #0				\n\
+	mov	r4, #0				\n\
+	mov	r5, #0				\n\
+	mov	r6, #0				\n\
+	mov	r7, #0				\n\
+	mov	ip, #0				\n\
+	mov	lr, #0				\n\
+1:	stmia	%0, {r2-r7, ip, lr}		\n\
+	subs	r1, r1, #1			\n\
+	mcr	p15, 0, %0, c7, c14, 1		@ clean and invalidate D line\n\
+	add	%0, %0, #32			\n\
+	bne	1b				\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns feroceon_user_fns __initdata = {
+	.cpu_clear_user_highpage = feroceon_clear_user_highpage,
+	.cpu_copy_user_highpage	= feroceon_copy_user_highpage,
+};
+
diff --git a/arch/arm/mm/copypage-v4mc.c b/arch/arm/mm/copypage-v4mc.c
new file mode 100644
index 0000000..0224416
--- /dev/null
+++ b/arch/arm/mm/copypage-v4mc.c
@@ -0,0 +1,115 @@
+/*
+ *  linux/arch/arm/lib/copypage-armv4mc.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include "mm.h"
+
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_MT_MINICACHE)
+
+static DEFINE_RAW_SPINLOCK(minicache_lock);
+
+/*
+ * ARMv4 mini-dcache optimised copy_user_highpage
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_highpage that does the right thing.
+ */
+static void __naked
+mc_copy_user_page(void *from, void *to)
+{
+	asm volatile(
+	"stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r4, %2				@ 1\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+1:	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4+1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %1, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r4, r4, #1			@ 1\n\
+	stmia	%1!, {r2, r3, ip, lr}		@ 4\n\
+	ldmneia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	bne	1b				@ 1\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64));
+}
+
+void v4_mc_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto = kmap_atomic(to);
+
+	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
+		__flush_dcache_page(page_mapping_file(from), from);
+
+	raw_spin_lock(&minicache_lock);
+
+	set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot));
+
+	mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
+
+	raw_spin_unlock(&minicache_lock);
+
+	kunmap_atomic(kto);
+}
+
+/*
+ * ARMv4 optimised clear_user_page
+ */
+void v4_mc_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	mcr	p15, 0, %0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 64)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns v4_mc_user_fns __initdata = {
+	.cpu_clear_user_highpage = v4_mc_clear_user_highpage,
+	.cpu_copy_user_highpage	= v4_mc_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-v4wb.c b/arch/arm/mm/copypage-v4wb.c
new file mode 100644
index 0000000..067d0fd
--- /dev/null
+++ b/arch/arm/mm/copypage-v4wb.c
@@ -0,0 +1,95 @@
+/*
+ *  linux/arch/arm/mm/copypage-v4wb.c
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * ARMv4 optimised copy_user_highpage
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ *
+ * Note: We rely on all ARMv4 processors implementing the "invalidate D line"
+ * instruction.  If your processor does not supply this, you have to write your
+ * own copy_user_highpage that does the right thing.
+ */
+static void __naked
+v4wb_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r2, %2				@ 1\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+1:	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4+1\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	mcr	p15, 0, r0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	subs	r2, r2, #1			@ 1\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmneia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB\n\
+	ldmfd	 sp!, {r4, pc}			@ 3"
+	:
+	: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64));
+}
+
+void v4wb_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to);
+	kfrom = kmap_atomic(from);
+	flush_cache_page(vma, vaddr, page_to_pfn(from));
+	v4wb_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom);
+	kunmap_atomic(kto);
+}
+
+/*
+ * ARMv4 optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void v4wb_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	mcr	p15, 0, %0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	mcr	p15, 0, %0, c7, c6, 1		@ 1   invalidate D line\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r1, c7, c10, 4		@ 1   drain WB"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 64)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns v4wb_user_fns __initdata = {
+	.cpu_clear_user_highpage = v4wb_clear_user_highpage,
+	.cpu_copy_user_highpage	= v4wb_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-v4wt.c b/arch/arm/mm/copypage-v4wt.c
new file mode 100644
index 0000000..b85c5da
--- /dev/null
+++ b/arch/arm/mm/copypage-v4wt.c
@@ -0,0 +1,88 @@
+/*
+ *  linux/arch/arm/mm/copypage-v4wt.S
+ *
+ *  Copyright (C) 1995-1999 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is for CPUs with a writethrough cache and 'flush ID cache' is
+ *  the only supported cache operation.
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * ARMv4 optimised copy_user_highpage
+ *
+ * Since we have writethrough caches, we don't have to worry about
+ * dirty data in the cache.  However, we do have to ensure that
+ * subsequent reads are up to date.
+ */
+static void __naked
+v4wt_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, lr}			@ 2\n\
+	mov	r2, %2				@ 1\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+1:	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4+1\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	subs	r2, r2, #1			@ 1\n\
+	stmia	r0!, {r3, r4, ip, lr}		@ 4\n\
+	ldmneia	r1!, {r3, r4, ip, lr}		@ 4\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r2, c7, c7, 0		@ flush ID cache\n\
+	ldmfd	sp!, {r4, pc}			@ 3"
+	:
+	: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64));
+}
+
+void v4wt_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to);
+	kfrom = kmap_atomic(from);
+	v4wt_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom);
+	kunmap_atomic(kto);
+}
+
+/*
+ * ARMv4 optimised clear_user_page
+ *
+ * Same story as above.
+ */
+void v4wt_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile("\
+	mov	r1, %2				@ 1\n\
+	mov	r2, #0				@ 1\n\
+	mov	r3, #0				@ 1\n\
+	mov	ip, #0				@ 1\n\
+	mov	lr, #0				@ 1\n\
+1:	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	stmia	%0!, {r2, r3, ip, lr}		@ 4\n\
+	subs	r1, r1, #1			@ 1\n\
+	bne	1b				@ 1\n\
+	mcr	p15, 0, r2, c7, c7, 0		@ flush ID cache"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 64)
+	: "r1", "r2", "r3", "ip", "lr");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns v4wt_user_fns __initdata = {
+	.cpu_clear_user_highpage = v4wt_clear_user_highpage,
+	.cpu_copy_user_highpage	= v4wt_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c
new file mode 100644
index 0000000..a698e57
--- /dev/null
+++ b/arch/arm/mm/copypage-v6.c
@@ -0,0 +1,140 @@
+/*
+ *  linux/arch/arm/mm/copypage-v6.c
+ *
+ *  Copyright (C) 2002 Deep Blue Solutions Ltd, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/shmparam.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/cachetype.h>
+
+#include "mm.h"
+
+#if SHMLBA > 16384
+#error FIX ME
+#endif
+
+static DEFINE_RAW_SPINLOCK(v6_lock);
+
+/*
+ * Copy the user page.  No aliasing to deal with so we can just
+ * attack the kernel's existing mapping of these pages.
+ */
+static void v6_copy_user_highpage_nonaliasing(struct page *to,
+	struct page *from, unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kfrom = kmap_atomic(from);
+	kto = kmap_atomic(to);
+	copy_page(kto, kfrom);
+	kunmap_atomic(kto);
+	kunmap_atomic(kfrom);
+}
+
+/*
+ * Clear the user page.  No aliasing to deal with so we can just
+ * attack the kernel's existing mapping of this page.
+ */
+static void v6_clear_user_highpage_nonaliasing(struct page *page, unsigned long vaddr)
+{
+	void *kaddr = kmap_atomic(page);
+	clear_page(kaddr);
+	kunmap_atomic(kaddr);
+}
+
+/*
+ * Discard data in the kernel mapping for the new page.
+ * FIXME: needs this MCRR to be supported.
+ */
+static void discard_old_kernel_data(void *kto)
+{
+	__asm__("mcrr	p15, 0, %1, %0, c6	@ 0xec401f06"
+	   :
+	   : "r" (kto),
+	     "r" ((unsigned long)kto + PAGE_SIZE - 1)
+	   : "cc");
+}
+
+/*
+ * Copy the page, taking account of the cache colour.
+ */
+static void v6_copy_user_highpage_aliasing(struct page *to,
+	struct page *from, unsigned long vaddr, struct vm_area_struct *vma)
+{
+	unsigned int offset = CACHE_COLOUR(vaddr);
+	unsigned long kfrom, kto;
+
+	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
+		__flush_dcache_page(page_mapping_file(from), from);
+
+	/* FIXME: not highmem safe */
+	discard_old_kernel_data(page_address(to));
+
+	/*
+	 * Now copy the page using the same cache colour as the
+	 * pages ultimate destination.
+	 */
+	raw_spin_lock(&v6_lock);
+
+	kfrom = COPYPAGE_V6_FROM + (offset << PAGE_SHIFT);
+	kto   = COPYPAGE_V6_TO + (offset << PAGE_SHIFT);
+
+	set_top_pte(kfrom, mk_pte(from, PAGE_KERNEL));
+	set_top_pte(kto, mk_pte(to, PAGE_KERNEL));
+
+	copy_page((void *)kto, (void *)kfrom);
+
+	raw_spin_unlock(&v6_lock);
+}
+
+/*
+ * Clear the user page.  We need to deal with the aliasing issues,
+ * so remap the kernel page into the same cache colour as the user
+ * page.
+ */
+static void v6_clear_user_highpage_aliasing(struct page *page, unsigned long vaddr)
+{
+	unsigned long to = COPYPAGE_V6_TO + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
+
+	/* FIXME: not highmem safe */
+	discard_old_kernel_data(page_address(page));
+
+	/*
+	 * Now clear the page using the same cache colour as
+	 * the pages ultimate destination.
+	 */
+	raw_spin_lock(&v6_lock);
+
+	set_top_pte(to, mk_pte(page, PAGE_KERNEL));
+	clear_page((void *)to);
+
+	raw_spin_unlock(&v6_lock);
+}
+
+struct cpu_user_fns v6_user_fns __initdata = {
+	.cpu_clear_user_highpage = v6_clear_user_highpage_nonaliasing,
+	.cpu_copy_user_highpage	= v6_copy_user_highpage_nonaliasing,
+};
+
+static int __init v6_userpage_init(void)
+{
+	if (cache_is_vipt_aliasing()) {
+		cpu_user.cpu_clear_user_highpage = v6_clear_user_highpage_aliasing;
+		cpu_user.cpu_copy_user_highpage = v6_copy_user_highpage_aliasing;
+	}
+
+	return 0;
+}
+
+core_initcall(v6_userpage_init);
diff --git a/arch/arm/mm/copypage-xsc3.c b/arch/arm/mm/copypage-xsc3.c
new file mode 100644
index 0000000..03a2042
--- /dev/null
+++ b/arch/arm/mm/copypage-xsc3.c
@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/arm/mm/copypage-xsc3.S
+ *
+ *  Copyright (C) 2004 Intel Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Adapted for 3rd gen XScale core, no more mini-dcache
+ * Author: Matt Gilbert (matthew.m.gilbert@intel.com)
+ */
+#include <linux/init.h>
+#include <linux/highmem.h>
+
+/*
+ * General note:
+ *  We don't really want write-allocate cache behaviour for these functions
+ *  since that will just eat through 8K of the cache.
+ */
+
+/*
+ * XSC3 optimised copy_user_highpage
+ *  r0 = destination
+ *  r1 = source
+ *
+ * The source page may have some clean entries in the cache already, but we
+ * can safely ignore them - break_cow() will flush them out of the cache
+ * if we eventually end up using our copied page.
+ *
+ */
+static void __naked
+xsc3_mc_copy_user_page(void *kto, const void *kfrom)
+{
+	asm("\
+	stmfd	sp!, {r4, r5, lr}		\n\
+	mov	lr, %2				\n\
+						\n\
+	pld	[r1, #0]			\n\
+	pld	[r1, #32]			\n\
+1:	pld	[r1, #64]			\n\
+	pld	[r1, #96]			\n\
+						\n\
+2:	ldrd	r2, [r1], #8			\n\
+	mov	ip, r0				\n\
+	ldrd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate\n\
+	strd	r2, [r0], #8			\n\
+	ldrd	r2, [r1], #8			\n\
+	strd	r4, [r0], #8			\n\
+	ldrd	r4, [r1], #8			\n\
+	strd	r2, [r0], #8			\n\
+	strd	r4, [r0], #8			\n\
+	ldrd	r2, [r1], #8			\n\
+	mov	ip, r0				\n\
+	ldrd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate\n\
+	strd	r2, [r0], #8			\n\
+	ldrd	r2, [r1], #8			\n\
+	subs	lr, lr, #1			\n\
+	strd	r4, [r0], #8			\n\
+	ldrd	r4, [r1], #8			\n\
+	strd	r2, [r0], #8			\n\
+	strd	r4, [r0], #8			\n\
+	bgt	1b				\n\
+	beq	2b				\n\
+						\n\
+	ldmfd	sp!, {r4, r5, pc}"
+	:
+	: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64 - 1));
+}
+
+void xsc3_mc_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto, *kfrom;
+
+	kto = kmap_atomic(to);
+	kfrom = kmap_atomic(from);
+	flush_cache_page(vma, vaddr, page_to_pfn(from));
+	xsc3_mc_copy_user_page(kto, kfrom);
+	kunmap_atomic(kfrom);
+	kunmap_atomic(kto);
+}
+
+/*
+ * XScale optimised clear_user_page
+ *  r0 = destination
+ *  r1 = virtual user address of ultimate destination page
+ */
+void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile ("\
+	mov	r1, %2				\n\
+	mov	r2, #0				\n\
+	mov	r3, #0				\n\
+1:	mcr	p15, 0, %0, c7, c6, 1		@ invalidate line\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	subs	r1, r1, #1			\n\
+	bne	1b"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns xsc3_mc_user_fns __initdata = {
+	.cpu_clear_user_highpage = xsc3_mc_clear_user_highpage,
+	.cpu_copy_user_highpage	= xsc3_mc_copy_user_highpage,
+};
diff --git a/arch/arm/mm/copypage-xscale.c b/arch/arm/mm/copypage-xscale.c
new file mode 100644
index 0000000..9797237
--- /dev/null
+++ b/arch/arm/mm/copypage-xscale.c
@@ -0,0 +1,135 @@
+/*
+ *  linux/arch/arm/lib/copypage-xscale.S
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This handles the mini data cache, as found on SA11x0 and XScale
+ * processors.  When we copy a user page page, we map it in such a way
+ * that accesses to this page will not touch the main data cache, but
+ * will be cached in the mini data cache.  This prevents us thrashing
+ * the main data cache on page faults.
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include "mm.h"
+
+#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+				  L_PTE_MT_MINICACHE)
+
+static DEFINE_RAW_SPINLOCK(minicache_lock);
+
+/*
+ * XScale mini-dcache optimised copy_user_highpage
+ *
+ * We flush the destination cache lines just before we write the data into the
+ * corresponding address.  Since the Dcache is read-allocate, this removes the
+ * Dcache aliasing issue.  The writes will be forwarded to the write buffer,
+ * and merged as appropriate.
+ */
+static void __naked
+mc_copy_user_page(void *from, void *to)
+{
+	/*
+	 * Strangely enough, best performance is achieved
+	 * when prefetching destination as well.  (NP)
+	 */
+	asm volatile(
+	"stmfd	sp!, {r4, r5, lr}		\n\
+	mov	lr, %2				\n\
+	pld	[r0, #0]			\n\
+	pld	[r0, #32]			\n\
+	pld	[r1, #0]			\n\
+	pld	[r1, #32]			\n\
+1:	pld	[r0, #64]			\n\
+	pld	[r0, #96]			\n\
+	pld	[r1, #64]			\n\
+	pld	[r1, #96]			\n\
+2:	ldrd	r2, [r0], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	mov	ip, r1				\n\
+	strd	r2, [r1], #8			\n\
+	ldrd	r2, [r0], #8			\n\
+	strd	r4, [r1], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	strd	r2, [r1], #8			\n\
+	strd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	ldrd	r2, [r0], #8			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	ldrd	r4, [r0], #8			\n\
+	mov	ip, r1				\n\
+	strd	r2, [r1], #8			\n\
+	ldrd	r2, [r0], #8			\n\
+	strd	r4, [r1], #8			\n\
+	ldrd	r4, [r0], #8			\n\
+	strd	r2, [r1], #8			\n\
+	strd	r4, [r1], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	subs	lr, lr, #1			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	bgt	1b				\n\
+	beq	2b				\n\
+	ldmfd	sp!, {r4, r5, pc}		"
+	:
+	: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
+}
+
+void xscale_mc_copy_user_highpage(struct page *to, struct page *from,
+	unsigned long vaddr, struct vm_area_struct *vma)
+{
+	void *kto = kmap_atomic(to);
+
+	if (!test_and_set_bit(PG_dcache_clean, &from->flags))
+		__flush_dcache_page(page_mapping_file(from), from);
+
+	raw_spin_lock(&minicache_lock);
+
+	set_top_pte(COPYPAGE_MINICACHE, mk_pte(from, minicache_pgprot));
+
+	mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
+
+	raw_spin_unlock(&minicache_lock);
+
+	kunmap_atomic(kto);
+}
+
+/*
+ * XScale optimised clear_user_page
+ */
+void
+xscale_mc_clear_user_highpage(struct page *page, unsigned long vaddr)
+{
+	void *ptr, *kaddr = kmap_atomic(page);
+	asm volatile(
+	"mov	r1, %2				\n\
+	mov	r2, #0				\n\
+	mov	r3, #0				\n\
+1:	mov	ip, %0				\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	strd	r2, [%0], #8			\n\
+	mcr	p15, 0, ip, c7, c10, 1		@ clean D line\n\
+	subs	r1, r1, #1			\n\
+	mcr	p15, 0, ip, c7, c6, 1		@ invalidate D line\n\
+	bne	1b"
+	: "=r" (ptr)
+	: "0" (kaddr), "I" (PAGE_SIZE / 32)
+	: "r1", "r2", "r3", "ip");
+	kunmap_atomic(kaddr);
+}
+
+struct cpu_user_fns xscale_mc_user_fns __initdata = {
+	.cpu_clear_user_highpage = xscale_mc_clear_user_highpage,
+	.cpu_copy_user_highpage	= xscale_mc_copy_user_highpage,
+};
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
new file mode 100644
index 0000000..f448a06
--- /dev/null
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -0,0 +1,243 @@
+/*
+ *  Based on linux/arch/arm/mm/dma-mapping.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/scatterlist.h>
+
+#include <asm/cachetype.h>
+#include <asm/cacheflush.h>
+#include <asm/outercache.h>
+#include <asm/cp15.h>
+
+#include "dma.h"
+
+/*
+ *  dma_direct_ops is used if
+ *   - MMU/MPU is off
+ *   - cpu is v7m w/o cache support
+ *   - device is coherent
+ *  otherwise arm_nommu_dma_ops is used.
+ *
+ *  arm_nommu_dma_ops rely on consistent DMA memory (please, refer to
+ *  [1] on how to declare such memory).
+ *
+ *  [1] Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+ */
+
+static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
+				 dma_addr_t *dma_handle, gfp_t gfp,
+				 unsigned long attrs)
+
+{
+	void *ret;
+
+	/*
+	 * Try generic allocator first if we are advertised that
+	 * consistency is not required.
+	 */
+
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
+		return dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+
+	ret = dma_alloc_from_global_coherent(size, dma_handle);
+
+	/*
+	 * dma_alloc_from_global_coherent() may fail because:
+	 *
+	 * - no consistent DMA region has been defined, so we can't
+	 *   continue.
+	 * - there is no space left in consistent DMA region, so we
+	 *   only can fallback to generic allocator if we are
+	 *   advertised that consistency is not required.
+	 */
+
+	WARN_ON_ONCE(ret == NULL);
+	return ret;
+}
+
+static void arm_nommu_dma_free(struct device *dev, size_t size,
+			       void *cpu_addr, dma_addr_t dma_addr,
+			       unsigned long attrs)
+{
+	if (attrs & DMA_ATTR_NON_CONSISTENT) {
+		dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
+	} else {
+		int ret = dma_release_from_global_coherent(get_order(size),
+							   cpu_addr);
+
+		WARN_ON_ONCE(ret == 0);
+	}
+
+	return;
+}
+
+static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+			      void *cpu_addr, dma_addr_t dma_addr, size_t size,
+			      unsigned long attrs)
+{
+	int ret;
+
+	if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
+		return ret;
+
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+
+static void __dma_page_cpu_to_dev(phys_addr_t paddr, size_t size,
+				  enum dma_data_direction dir)
+{
+	dmac_map_area(__va(paddr), size, dir);
+
+	if (dir == DMA_FROM_DEVICE)
+		outer_inv_range(paddr, paddr + size);
+	else
+		outer_clean_range(paddr, paddr + size);
+}
+
+static void __dma_page_dev_to_cpu(phys_addr_t paddr, size_t size,
+				  enum dma_data_direction dir)
+{
+	if (dir != DMA_TO_DEVICE) {
+		outer_inv_range(paddr, paddr + size);
+		dmac_unmap_area(__va(paddr), size, dir);
+	}
+}
+
+static dma_addr_t arm_nommu_dma_map_page(struct device *dev, struct page *page,
+					 unsigned long offset, size_t size,
+					 enum dma_data_direction dir,
+					 unsigned long attrs)
+{
+	dma_addr_t handle = page_to_phys(page) + offset;
+
+	__dma_page_cpu_to_dev(handle, size, dir);
+
+	return handle;
+}
+
+static void arm_nommu_dma_unmap_page(struct device *dev, dma_addr_t handle,
+				     size_t size, enum dma_data_direction dir,
+				     unsigned long attrs)
+{
+	__dma_page_dev_to_cpu(handle, size, dir);
+}
+
+
+static int arm_nommu_dma_map_sg(struct device *dev, struct scatterlist *sgl,
+				int nents, enum dma_data_direction dir,
+				unsigned long attrs)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgl, sg, nents, i) {
+		sg_dma_address(sg) = sg_phys(sg);
+		sg_dma_len(sg) = sg->length;
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+	}
+
+	return nents;
+}
+
+static void arm_nommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+				   int nents, enum dma_data_direction dir,
+				   unsigned long attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	__dma_page_cpu_to_dev(handle, size, dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+					     int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_cpu_to_dev(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
+					  int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nents, i)
+		__dma_page_dev_to_cpu(sg_dma_address(sg), sg_dma_len(sg), dir);
+}
+
+const struct dma_map_ops arm_nommu_dma_ops = {
+	.alloc			= arm_nommu_dma_alloc,
+	.free			= arm_nommu_dma_free,
+	.mmap			= arm_nommu_dma_mmap,
+	.map_page		= arm_nommu_dma_map_page,
+	.unmap_page		= arm_nommu_dma_unmap_page,
+	.map_sg			= arm_nommu_dma_map_sg,
+	.unmap_sg		= arm_nommu_dma_unmap_sg,
+	.sync_single_for_device	= arm_nommu_dma_sync_single_for_device,
+	.sync_single_for_cpu	= arm_nommu_dma_sync_single_for_cpu,
+	.sync_sg_for_device	= arm_nommu_dma_sync_sg_for_device,
+	.sync_sg_for_cpu	= arm_nommu_dma_sync_sg_for_cpu,
+};
+EXPORT_SYMBOL(arm_nommu_dma_ops);
+
+static const struct dma_map_ops *arm_nommu_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &dma_direct_ops : &arm_nommu_dma_ops;
+}
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	const struct dma_map_ops *dma_ops;
+
+	if (IS_ENABLED(CONFIG_CPU_V7M)) {
+		/*
+		 * Cache support for v7m is optional, so can be treated as
+		 * coherent if no cache has been detected. Note that it is not
+		 * enough to check if MPU is in use or not since in absense of
+		 * MPU system memory map is used.
+		 */
+		dev->archdata.dma_coherent = (cacheid) ? coherent : true;
+	} else {
+		/*
+		 * Assume coherent DMA in case MMU/MPU has not been set up.
+		 */
+		dev->archdata.dma_coherent = (get_cr() & CR_M) ? coherent : true;
+	}
+
+	dma_ops = arm_nommu_get_dma_map_ops(dev->archdata.dma_coherent);
+
+	set_dma_ops(dev, dma_ops);
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+}
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
new file mode 100644
index 0000000..1cb9c0f
--- /dev/null
+++ b/arch/arm/mm/dma-mapping.c
@@ -0,0 +1,2403 @@
+/*
+ *  linux/arch/arm/mm/dma-mapping.c
+ *
+ *  Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  DMA uncached mapping support.
+ */
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/genalloc.h>
+#include <linux/gfp.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-contiguous.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/io.h>
+#include <linux/vmalloc.h>
+#include <linux/sizes.h>
+#include <linux/cma.h>
+
+#include <asm/memory.h>
+#include <asm/highmem.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/mach/arch.h>
+#include <asm/dma-iommu.h>
+#include <asm/mach/map.h>
+#include <asm/system_info.h>
+#include <asm/dma-contiguous.h>
+
+#include "dma.h"
+#include "mm.h"
+
+struct arm_dma_alloc_args {
+	struct device *dev;
+	size_t size;
+	gfp_t gfp;
+	pgprot_t prot;
+	const void *caller;
+	bool want_vaddr;
+	int coherent_flag;
+};
+
+struct arm_dma_free_args {
+	struct device *dev;
+	size_t size;
+	void *cpu_addr;
+	struct page *page;
+	bool want_vaddr;
+};
+
+#define NORMAL	    0
+#define COHERENT    1
+
+struct arm_dma_allocator {
+	void *(*alloc)(struct arm_dma_alloc_args *args,
+		       struct page **ret_page);
+	void (*free)(struct arm_dma_free_args *args);
+};
+
+struct arm_dma_buffer {
+	struct list_head list;
+	void *virt;
+	struct arm_dma_allocator *allocator;
+};
+
+static LIST_HEAD(arm_dma_bufs);
+static DEFINE_SPINLOCK(arm_dma_bufs_lock);
+
+static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
+{
+	struct arm_dma_buffer *buf, *found = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+	list_for_each_entry(buf, &arm_dma_bufs, list) {
+		if (buf->virt == virt) {
+			list_del(&buf->list);
+			found = buf;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+	return found;
+}
+
+/*
+ * The DMA API is built upon the notion of "buffer ownership".  A buffer
+ * is either exclusively owned by the CPU (and therefore may be accessed
+ * by it) or exclusively owned by the DMA device.  These helper functions
+ * represent the transitions between these two ownership states.
+ *
+ * Note, however, that on later ARMs, this notion does not work due to
+ * speculative prefetches.  We model our approach on the assumption that
+ * the CPU does do speculative prefetches, which means we clean caches
+ * before transfers and delay cache invalidation until transfer completion.
+ *
+ */
+static void __dma_page_cpu_to_dev(struct page *, unsigned long,
+		size_t, enum dma_data_direction);
+static void __dma_page_dev_to_cpu(struct page *, unsigned long,
+		size_t, enum dma_data_direction);
+
+/**
+ * arm_dma_map_page - map a portion of a page for streaming DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Ensure that any data held in the cache is appropriately discarded
+ * or written back.
+ *
+ * The device owns this memory once this call has completed.  The CPU
+ * can regain ownership by calling dma_unmap_page().
+ */
+static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir,
+	     unsigned long attrs)
+{
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		__dma_page_cpu_to_dev(page, offset, size, dir);
+	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
+static dma_addr_t arm_coherent_dma_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir,
+	     unsigned long attrs)
+{
+	return pfn_to_dma(dev, page_to_pfn(page)) + offset;
+}
+
+/**
+ * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page()
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * Unmap a page streaming mode DMA translation.  The handle and size
+ * must match what was provided in the previous dma_map_page() call.
+ * All other usages are undefined.
+ *
+ * After this call, reads by the CPU to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		__dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)),
+				      handle & ~PAGE_MASK, size, dir);
+}
+
+static void arm_dma_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned int offset = handle & (PAGE_SIZE - 1);
+	struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
+	__dma_page_dev_to_cpu(page, offset, size, dir);
+}
+
+static void arm_dma_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	unsigned int offset = handle & (PAGE_SIZE - 1);
+	struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset));
+	__dma_page_cpu_to_dev(page, offset, size, dir);
+}
+
+static int arm_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == ARM_MAPPING_ERROR;
+}
+
+const struct dma_map_ops arm_dma_ops = {
+	.alloc			= arm_dma_alloc,
+	.free			= arm_dma_free,
+	.mmap			= arm_dma_mmap,
+	.get_sgtable		= arm_dma_get_sgtable,
+	.map_page		= arm_dma_map_page,
+	.unmap_page		= arm_dma_unmap_page,
+	.map_sg			= arm_dma_map_sg,
+	.unmap_sg		= arm_dma_unmap_sg,
+	.sync_single_for_cpu	= arm_dma_sync_single_for_cpu,
+	.sync_single_for_device	= arm_dma_sync_single_for_device,
+	.sync_sg_for_cpu	= arm_dma_sync_sg_for_cpu,
+	.sync_sg_for_device	= arm_dma_sync_sg_for_device,
+	.mapping_error		= arm_dma_mapping_error,
+	.dma_supported		= arm_dma_supported,
+};
+EXPORT_SYMBOL(arm_dma_ops);
+
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+	dma_addr_t *handle, gfp_t gfp, unsigned long attrs);
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+				  dma_addr_t handle, unsigned long attrs);
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs);
+
+const struct dma_map_ops arm_coherent_dma_ops = {
+	.alloc			= arm_coherent_dma_alloc,
+	.free			= arm_coherent_dma_free,
+	.mmap			= arm_coherent_dma_mmap,
+	.get_sgtable		= arm_dma_get_sgtable,
+	.map_page		= arm_coherent_dma_map_page,
+	.map_sg			= arm_dma_map_sg,
+	.mapping_error		= arm_dma_mapping_error,
+	.dma_supported		= arm_dma_supported,
+};
+EXPORT_SYMBOL(arm_coherent_dma_ops);
+
+static int __dma_supported(struct device *dev, u64 mask, bool warn)
+{
+	unsigned long max_dma_pfn;
+
+	/*
+	 * If the mask allows for more memory than we can address,
+	 * and we actually have that much memory, then we must
+	 * indicate that DMA to this device is not supported.
+	 */
+	if (sizeof(mask) != sizeof(dma_addr_t) &&
+	    mask > (dma_addr_t)~0 &&
+	    dma_to_pfn(dev, ~0) < max_pfn - 1) {
+		if (warn) {
+			dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
+				 mask);
+			dev_warn(dev, "Driver did not use or check the return value from dma_set_coherent_mask()?\n");
+		}
+		return 0;
+	}
+
+	max_dma_pfn = min(max_pfn, arm_dma_pfn_limit);
+
+	/*
+	 * Translate the device's DMA mask to a PFN limit.  This
+	 * PFN number includes the page which we can DMA to.
+	 */
+	if (dma_to_pfn(dev, mask) < max_dma_pfn) {
+		if (warn)
+			dev_warn(dev, "Coherent DMA mask %#llx (pfn %#lx-%#lx) covers a smaller range of system memory than the DMA zone pfn 0x0-%#lx\n",
+				 mask,
+				 dma_to_pfn(dev, 0), dma_to_pfn(dev, mask) + 1,
+				 max_dma_pfn + 1);
+		return 0;
+	}
+
+	return 1;
+}
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+	u64 mask = (u64)DMA_BIT_MASK(32);
+
+	if (dev) {
+		mask = dev->coherent_dma_mask;
+
+		/*
+		 * Sanity check the DMA mask - it must be non-zero, and
+		 * must be able to be satisfied by a DMA allocation.
+		 */
+		if (mask == 0) {
+			dev_warn(dev, "coherent DMA mask is unset\n");
+			return 0;
+		}
+
+		if (!__dma_supported(dev, mask, true))
+			return 0;
+	}
+
+	return mask;
+}
+
+static void __dma_clear_buffer(struct page *page, size_t size, int coherent_flag)
+{
+	/*
+	 * Ensure that the allocated pages are zeroed, and that any data
+	 * lurking in the kernel direct-mapped region is invalidated.
+	 */
+	if (PageHighMem(page)) {
+		phys_addr_t base = __pfn_to_phys(page_to_pfn(page));
+		phys_addr_t end = base + size;
+		while (size > 0) {
+			void *ptr = kmap_atomic(page);
+			memset(ptr, 0, PAGE_SIZE);
+			if (coherent_flag != COHERENT)
+				dmac_flush_range(ptr, ptr + PAGE_SIZE);
+			kunmap_atomic(ptr);
+			page++;
+			size -= PAGE_SIZE;
+		}
+		if (coherent_flag != COHERENT)
+			outer_flush_range(base, end);
+	} else {
+		void *ptr = page_address(page);
+		memset(ptr, 0, size);
+		if (coherent_flag != COHERENT) {
+			dmac_flush_range(ptr, ptr + size);
+			outer_flush_range(__pa(ptr), __pa(ptr) + size);
+		}
+	}
+}
+
+/*
+ * Allocate a DMA buffer for 'dev' of size 'size' using the
+ * specified gfp mask.  Note that 'size' must be page aligned.
+ */
+static struct page *__dma_alloc_buffer(struct device *dev, size_t size,
+				       gfp_t gfp, int coherent_flag)
+{
+	unsigned long order = get_order(size);
+	struct page *page, *p, *e;
+
+	page = alloc_pages(gfp, order);
+	if (!page)
+		return NULL;
+
+	/*
+	 * Now split the huge page and free the excess pages
+	 */
+	split_page(page, order);
+	for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++)
+		__free_page(p);
+
+	__dma_clear_buffer(page, size, coherent_flag);
+
+	return page;
+}
+
+/*
+ * Free a DMA buffer.  'size' must be page aligned.
+ */
+static void __dma_free_buffer(struct page *page, size_t size)
+{
+	struct page *e = page + (size >> PAGE_SHIFT);
+
+	while (page < e) {
+		__free_page(page);
+		page++;
+	}
+}
+
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+				     pgprot_t prot, struct page **ret_page,
+				     const void *caller, bool want_vaddr,
+				     int coherent_flag, gfp_t gfp);
+
+static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+				 pgprot_t prot, struct page **ret_page,
+				 const void *caller, bool want_vaddr);
+
+static void *
+__dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot,
+	const void *caller)
+{
+	/*
+	 * DMA allocation can be mapped to user space, so lets
+	 * set VM_USERMAP flags too.
+	 */
+	return dma_common_contiguous_remap(page, size,
+			VM_ARM_DMA_CONSISTENT | VM_USERMAP,
+			prot, caller);
+}
+
+static void __dma_free_remap(void *cpu_addr, size_t size)
+{
+	dma_common_free_remap(cpu_addr, size,
+			VM_ARM_DMA_CONSISTENT | VM_USERMAP);
+}
+
+#define DEFAULT_DMA_COHERENT_POOL_SIZE	SZ_256K
+static struct gen_pool *atomic_pool __ro_after_init;
+
+static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
+
+static int __init early_coherent_pool(char *p)
+{
+	atomic_pool_size = memparse(p, &p);
+	return 0;
+}
+early_param("coherent_pool", early_coherent_pool);
+
+/*
+ * Initialise the coherent pool for atomic allocations.
+ */
+static int __init atomic_pool_init(void)
+{
+	pgprot_t prot = pgprot_dmacoherent(PAGE_KERNEL);
+	gfp_t gfp = GFP_KERNEL | GFP_DMA;
+	struct page *page;
+	void *ptr;
+
+	atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
+	if (!atomic_pool)
+		goto out;
+	/*
+	 * The atomic pool is only used for non-coherent allocations
+	 * so we must pass NORMAL for coherent_flag.
+	 */
+	if (dev_get_cma_area(NULL))
+		ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot,
+				      &page, atomic_pool_init, true, NORMAL,
+				      GFP_KERNEL);
+	else
+		ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot,
+					   &page, atomic_pool_init, true);
+	if (ptr) {
+		int ret;
+
+		ret = gen_pool_add_virt(atomic_pool, (unsigned long)ptr,
+					page_to_phys(page),
+					atomic_pool_size, -1);
+		if (ret)
+			goto destroy_genpool;
+
+		gen_pool_set_algo(atomic_pool,
+				gen_pool_first_fit_order_align,
+				NULL);
+		pr_info("DMA: preallocated %zu KiB pool for atomic coherent allocations\n",
+		       atomic_pool_size / 1024);
+		return 0;
+	}
+
+destroy_genpool:
+	gen_pool_destroy(atomic_pool);
+	atomic_pool = NULL;
+out:
+	pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
+	       atomic_pool_size / 1024);
+	return -ENOMEM;
+}
+/*
+ * CMA is activated by core_initcall, so we must be called after it.
+ */
+postcore_initcall(atomic_pool_init);
+
+struct dma_contig_early_reserve {
+	phys_addr_t base;
+	unsigned long size;
+};
+
+static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata;
+
+static int dma_mmu_remap_num __initdata;
+
+void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+	dma_mmu_remap[dma_mmu_remap_num].base = base;
+	dma_mmu_remap[dma_mmu_remap_num].size = size;
+	dma_mmu_remap_num++;
+}
+
+void __init dma_contiguous_remap(void)
+{
+	int i;
+	for (i = 0; i < dma_mmu_remap_num; i++) {
+		phys_addr_t start = dma_mmu_remap[i].base;
+		phys_addr_t end = start + dma_mmu_remap[i].size;
+		struct map_desc map;
+		unsigned long addr;
+
+		if (end > arm_lowmem_limit)
+			end = arm_lowmem_limit;
+		if (start >= end)
+			continue;
+
+		map.pfn = __phys_to_pfn(start);
+		map.virtual = __phys_to_virt(start);
+		map.length = end - start;
+		map.type = MT_MEMORY_DMA_READY;
+
+		/*
+		 * Clear previous low-memory mapping to ensure that the
+		 * TLB does not see any conflicting entries, then flush
+		 * the TLB of the old entries before creating new mappings.
+		 *
+		 * This ensures that any speculatively loaded TLB entries
+		 * (even though they may be rare) can not cause any problems,
+		 * and ensures that this code is architecturally compliant.
+		 */
+		for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
+		     addr += PMD_SIZE)
+			pmd_clear(pmd_off_k(addr));
+
+		flush_tlb_kernel_range(__phys_to_virt(start),
+				       __phys_to_virt(end));
+
+		iotable_init(&map, 1);
+	}
+}
+
+static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr,
+			    void *data)
+{
+	struct page *page = virt_to_page(addr);
+	pgprot_t prot = *(pgprot_t *)data;
+
+	set_pte_ext(pte, mk_pte(page, prot), 0);
+	return 0;
+}
+
+static void __dma_remap(struct page *page, size_t size, pgprot_t prot)
+{
+	unsigned long start = (unsigned long) page_address(page);
+	unsigned end = start + size;
+
+	apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot);
+	flush_tlb_kernel_range(start, end);
+}
+
+static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp,
+				 pgprot_t prot, struct page **ret_page,
+				 const void *caller, bool want_vaddr)
+{
+	struct page *page;
+	void *ptr = NULL;
+	/*
+	 * __alloc_remap_buffer is only called when the device is
+	 * non-coherent
+	 */
+	page = __dma_alloc_buffer(dev, size, gfp, NORMAL);
+	if (!page)
+		return NULL;
+	if (!want_vaddr)
+		goto out;
+
+	ptr = __dma_alloc_remap(page, size, gfp, prot, caller);
+	if (!ptr) {
+		__dma_free_buffer(page, size);
+		return NULL;
+	}
+
+ out:
+	*ret_page = page;
+	return ptr;
+}
+
+static void *__alloc_from_pool(size_t size, struct page **ret_page)
+{
+	unsigned long val;
+	void *ptr = NULL;
+
+	if (!atomic_pool) {
+		WARN(1, "coherent pool not initialised!\n");
+		return NULL;
+	}
+
+	val = gen_pool_alloc(atomic_pool, size);
+	if (val) {
+		phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
+
+		*ret_page = phys_to_page(phys);
+		ptr = (void *)val;
+	}
+
+	return ptr;
+}
+
+static bool __in_atomic_pool(void *start, size_t size)
+{
+	return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
+}
+
+static int __free_from_pool(void *start, size_t size)
+{
+	if (!__in_atomic_pool(start, size))
+		return 0;
+
+	gen_pool_free(atomic_pool, (unsigned long)start, size);
+
+	return 1;
+}
+
+static void *__alloc_from_contiguous(struct device *dev, size_t size,
+				     pgprot_t prot, struct page **ret_page,
+				     const void *caller, bool want_vaddr,
+				     int coherent_flag, gfp_t gfp)
+{
+	unsigned long order = get_order(size);
+	size_t count = size >> PAGE_SHIFT;
+	struct page *page;
+	void *ptr = NULL;
+
+	page = dma_alloc_from_contiguous(dev, count, order, gfp & __GFP_NOWARN);
+	if (!page)
+		return NULL;
+
+	__dma_clear_buffer(page, size, coherent_flag);
+
+	if (!want_vaddr)
+		goto out;
+
+	if (PageHighMem(page)) {
+		ptr = __dma_alloc_remap(page, size, GFP_KERNEL, prot, caller);
+		if (!ptr) {
+			dma_release_from_contiguous(dev, page, count);
+			return NULL;
+		}
+	} else {
+		__dma_remap(page, size, prot);
+		ptr = page_address(page);
+	}
+
+ out:
+	*ret_page = page;
+	return ptr;
+}
+
+static void __free_from_contiguous(struct device *dev, struct page *page,
+				   void *cpu_addr, size_t size, bool want_vaddr)
+{
+	if (want_vaddr) {
+		if (PageHighMem(page))
+			__dma_free_remap(cpu_addr, size);
+		else
+			__dma_remap(page, size, PAGE_KERNEL);
+	}
+	dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT);
+}
+
+static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot)
+{
+	prot = (attrs & DMA_ATTR_WRITE_COMBINE) ?
+			pgprot_writecombine(prot) :
+			pgprot_dmacoherent(prot);
+	return prot;
+}
+
+static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
+				   struct page **ret_page)
+{
+	struct page *page;
+	/* __alloc_simple_buffer is only called when the device is coherent */
+	page = __dma_alloc_buffer(dev, size, gfp, COHERENT);
+	if (!page)
+		return NULL;
+
+	*ret_page = page;
+	return page_address(page);
+}
+
+static void *simple_allocator_alloc(struct arm_dma_alloc_args *args,
+				    struct page **ret_page)
+{
+	return __alloc_simple_buffer(args->dev, args->size, args->gfp,
+				     ret_page);
+}
+
+static void simple_allocator_free(struct arm_dma_free_args *args)
+{
+	__dma_free_buffer(args->page, args->size);
+}
+
+static struct arm_dma_allocator simple_allocator = {
+	.alloc = simple_allocator_alloc,
+	.free = simple_allocator_free,
+};
+
+static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
+				 struct page **ret_page)
+{
+	return __alloc_from_contiguous(args->dev, args->size, args->prot,
+				       ret_page, args->caller,
+				       args->want_vaddr, args->coherent_flag,
+				       args->gfp);
+}
+
+static void cma_allocator_free(struct arm_dma_free_args *args)
+{
+	__free_from_contiguous(args->dev, args->page, args->cpu_addr,
+			       args->size, args->want_vaddr);
+}
+
+static struct arm_dma_allocator cma_allocator = {
+	.alloc = cma_allocator_alloc,
+	.free = cma_allocator_free,
+};
+
+static void *pool_allocator_alloc(struct arm_dma_alloc_args *args,
+				  struct page **ret_page)
+{
+	return __alloc_from_pool(args->size, ret_page);
+}
+
+static void pool_allocator_free(struct arm_dma_free_args *args)
+{
+	__free_from_pool(args->cpu_addr, args->size);
+}
+
+static struct arm_dma_allocator pool_allocator = {
+	.alloc = pool_allocator_alloc,
+	.free = pool_allocator_free,
+};
+
+static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
+				   struct page **ret_page)
+{
+	return __alloc_remap_buffer(args->dev, args->size, args->gfp,
+				    args->prot, ret_page, args->caller,
+				    args->want_vaddr);
+}
+
+static void remap_allocator_free(struct arm_dma_free_args *args)
+{
+	if (args->want_vaddr)
+		__dma_free_remap(args->cpu_addr, args->size);
+
+	__dma_free_buffer(args->page, args->size);
+}
+
+static struct arm_dma_allocator remap_allocator = {
+	.alloc = remap_allocator_alloc,
+	.free = remap_allocator_free,
+};
+
+static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+			 gfp_t gfp, pgprot_t prot, bool is_coherent,
+			 unsigned long attrs, const void *caller)
+{
+	u64 mask = get_coherent_dma_mask(dev);
+	struct page *page = NULL;
+	void *addr;
+	bool allowblock, cma;
+	struct arm_dma_buffer *buf;
+	struct arm_dma_alloc_args args = {
+		.dev = dev,
+		.size = PAGE_ALIGN(size),
+		.gfp = gfp,
+		.prot = prot,
+		.caller = caller,
+		.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
+		.coherent_flag = is_coherent ? COHERENT : NORMAL,
+	};
+
+#ifdef CONFIG_DMA_API_DEBUG
+	u64 limit = (mask + 1) & ~mask;
+	if (limit && size >= limit) {
+		dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n",
+			size, mask);
+		return NULL;
+	}
+#endif
+
+	if (!mask)
+		return NULL;
+
+	buf = kzalloc(sizeof(*buf),
+		      gfp & ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM));
+	if (!buf)
+		return NULL;
+
+	if (mask < 0xffffffffULL)
+		gfp |= GFP_DMA;
+
+	/*
+	 * Following is a work-around (a.k.a. hack) to prevent pages
+	 * with __GFP_COMP being passed to split_page() which cannot
+	 * handle them.  The real problem is that this flag probably
+	 * should be 0 on ARM as it is not supported on this
+	 * platform; see CONFIG_HUGETLBFS.
+	 */
+	gfp &= ~(__GFP_COMP);
+	args.gfp = gfp;
+
+	*handle = ARM_MAPPING_ERROR;
+	allowblock = gfpflags_allow_blocking(gfp);
+	cma = allowblock ? dev_get_cma_area(dev) : false;
+
+	if (cma)
+		buf->allocator = &cma_allocator;
+	else if (is_coherent)
+		buf->allocator = &simple_allocator;
+	else if (allowblock)
+		buf->allocator = &remap_allocator;
+	else
+		buf->allocator = &pool_allocator;
+
+	addr = buf->allocator->alloc(&args, &page);
+
+	if (page) {
+		unsigned long flags;
+
+		*handle = pfn_to_dma(dev, page_to_pfn(page));
+		buf->virt = args.want_vaddr ? addr : page;
+
+		spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+		list_add(&buf->list, &arm_dma_bufs);
+		spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+	} else {
+		kfree(buf);
+	}
+
+	return args.want_vaddr ? addr : page;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
+		    gfp_t gfp, unsigned long attrs)
+{
+	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
+
+	return __dma_alloc(dev, size, handle, gfp, prot, false,
+			   attrs, __builtin_return_address(0));
+}
+
+static void *arm_coherent_dma_alloc(struct device *dev, size_t size,
+	dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
+{
+	return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true,
+			   attrs, __builtin_return_address(0));
+}
+
+static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
+{
+	int ret = -ENXIO;
+	unsigned long nr_vma_pages = vma_pages(vma);
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long pfn = dma_to_pfn(dev, dma_addr);
+	unsigned long off = vma->vm_pgoff;
+
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+		return ret;
+
+	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      pfn + off,
+				      vma->vm_end - vma->vm_start,
+				      vma->vm_page_prot);
+	}
+
+	return ret;
+}
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
+{
+	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
+{
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+	return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+/*
+ * Free a buffer as defined by the above mapping.
+ */
+static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+			   dma_addr_t handle, unsigned long attrs,
+			   bool is_coherent)
+{
+	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
+	struct arm_dma_buffer *buf;
+	struct arm_dma_free_args args = {
+		.dev = dev,
+		.size = PAGE_ALIGN(size),
+		.cpu_addr = cpu_addr,
+		.page = page,
+		.want_vaddr = ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0),
+	};
+
+	buf = arm_dma_buffer_find(cpu_addr);
+	if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr))
+		return;
+
+	buf->allocator->free(&args);
+	kfree(buf);
+}
+
+void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
+		  dma_addr_t handle, unsigned long attrs)
+{
+	__arm_dma_free(dev, size, cpu_addr, handle, attrs, false);
+}
+
+static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr,
+				  dma_addr_t handle, unsigned long attrs)
+{
+	__arm_dma_free(dev, size, cpu_addr, handle, attrs, true);
+}
+
+/*
+ * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
+ * that the intention is to allow exporting memory allocated via the
+ * coherent DMA APIs through the dma_buf API, which only accepts a
+ * scattertable.  This presents a couple of problems:
+ * 1. Not all memory allocated via the coherent DMA APIs is backed by
+ *    a struct page
+ * 2. Passing coherent DMA memory into the streaming APIs is not allowed
+ *    as we will try to flush the memory through a different alias to that
+ *    actually being used (and the flushes are redundant.)
+ */
+int arm_dma_get_sgtable(struct device *dev, struct sg_table *sgt,
+		 void *cpu_addr, dma_addr_t handle, size_t size,
+		 unsigned long attrs)
+{
+	unsigned long pfn = dma_to_pfn(dev, handle);
+	struct page *page;
+	int ret;
+
+	/* If the PFN is not valid, we do not have a struct page */
+	if (!pfn_valid(pfn))
+		return -ENXIO;
+
+	page = pfn_to_page(pfn);
+
+	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (unlikely(ret))
+		return ret;
+
+	sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+	return 0;
+}
+
+static void dma_cache_maint_page(struct page *page, unsigned long offset,
+	size_t size, enum dma_data_direction dir,
+	void (*op)(const void *, size_t, int))
+{
+	unsigned long pfn;
+	size_t left = size;
+
+	pfn = page_to_pfn(page) + offset / PAGE_SIZE;
+	offset %= PAGE_SIZE;
+
+	/*
+	 * A single sg entry may refer to multiple physically contiguous
+	 * pages.  But we still need to process highmem pages individually.
+	 * If highmem is not configured then the bulk of this loop gets
+	 * optimized out.
+	 */
+	do {
+		size_t len = left;
+		void *vaddr;
+
+		page = pfn_to_page(pfn);
+
+		if (PageHighMem(page)) {
+			if (len + offset > PAGE_SIZE)
+				len = PAGE_SIZE - offset;
+
+			if (cache_is_vipt_nonaliasing()) {
+				vaddr = kmap_atomic(page);
+				op(vaddr + offset, len, dir);
+				kunmap_atomic(vaddr);
+			} else {
+				vaddr = kmap_high_get(page);
+				if (vaddr) {
+					op(vaddr + offset, len, dir);
+					kunmap_high(page);
+				}
+			}
+		} else {
+			vaddr = page_address(page) + offset;
+			op(vaddr, len, dir);
+		}
+		offset = 0;
+		pfn++;
+		left -= len;
+	} while (left);
+}
+
+/*
+ * Make an area consistent for devices.
+ * Note: Drivers should NOT use this function directly, as it will break
+ * platforms with CONFIG_DMABOUNCE.
+ * Use the driver DMA support - see dma-mapping.h (dma_sync_*)
+ */
+static void __dma_page_cpu_to_dev(struct page *page, unsigned long off,
+	size_t size, enum dma_data_direction dir)
+{
+	phys_addr_t paddr;
+
+	dma_cache_maint_page(page, off, size, dir, dmac_map_area);
+
+	paddr = page_to_phys(page) + off;
+	if (dir == DMA_FROM_DEVICE) {
+		outer_inv_range(paddr, paddr + size);
+	} else {
+		outer_clean_range(paddr, paddr + size);
+	}
+	/* FIXME: non-speculating: flush on bidirectional mappings? */
+}
+
+static void __dma_page_dev_to_cpu(struct page *page, unsigned long off,
+	size_t size, enum dma_data_direction dir)
+{
+	phys_addr_t paddr = page_to_phys(page) + off;
+
+	/* FIXME: non-speculating: not required */
+	/* in any case, don't bother invalidating if DMA to device */
+	if (dir != DMA_TO_DEVICE) {
+		outer_inv_range(paddr, paddr + size);
+
+		dma_cache_maint_page(page, off, size, dir, dmac_unmap_area);
+	}
+
+	/*
+	 * Mark the D-cache clean for these pages to avoid extra flushing.
+	 */
+	if (dir != DMA_TO_DEVICE && size >= PAGE_SIZE) {
+		unsigned long pfn;
+		size_t left = size;
+
+		pfn = page_to_pfn(page) + off / PAGE_SIZE;
+		off %= PAGE_SIZE;
+		if (off) {
+			pfn++;
+			left -= PAGE_SIZE - off;
+		}
+		while (left >= PAGE_SIZE) {
+			page = pfn_to_page(pfn++);
+			set_bit(PG_dcache_clean, &page->flags);
+			left -= PAGE_SIZE;
+		}
+	}
+}
+
+/**
+ * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the dma_map_single interface.
+ * Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}.
+ *
+ * Device ownership issues as mentioned for dma_map_single are the same
+ * here.
+ */
+int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	struct scatterlist *s;
+	int i, j;
+
+	for_each_sg(sg, s, nents, i) {
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		s->dma_length = s->length;
+#endif
+		s->dma_address = ops->map_page(dev, sg_page(s), s->offset,
+						s->length, dir, attrs);
+		if (dma_mapping_error(dev, s->dma_address))
+			goto bad_mapping;
+	}
+	return nents;
+
+ bad_mapping:
+	for_each_sg(sg, s, i, j)
+		ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
+	return 0;
+}
+
+/**
+ * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	struct scatterlist *s;
+
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs);
+}
+
+/**
+ * arm_dma_sync_sg_for_cpu
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length,
+					 dir);
+}
+
+/**
+ * arm_dma_sync_sg_for_device
+ * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		ops->sync_single_for_device(dev, sg_dma_address(s), s->length,
+					    dir);
+}
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask
+ * to this function.
+ */
+int arm_dma_supported(struct device *dev, u64 mask)
+{
+	return __dma_supported(dev, mask, false);
+}
+
+static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
+}
+
+#ifdef CONFIG_ARM_DMA_USE_IOMMU
+
+static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
+{
+	int prot = 0;
+
+	if (attrs & DMA_ATTR_PRIVILEGED)
+		prot |= IOMMU_PRIV;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		return prot | IOMMU_READ | IOMMU_WRITE;
+	case DMA_TO_DEVICE:
+		return prot | IOMMU_READ;
+	case DMA_FROM_DEVICE:
+		return prot | IOMMU_WRITE;
+	default:
+		return prot;
+	}
+}
+
+/* IOMMU */
+
+static int extend_iommu_mapping(struct dma_iommu_mapping *mapping);
+
+static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping,
+				      size_t size)
+{
+	unsigned int order = get_order(size);
+	unsigned int align = 0;
+	unsigned int count, start;
+	size_t mapping_size = mapping->bits << PAGE_SHIFT;
+	unsigned long flags;
+	dma_addr_t iova;
+	int i;
+
+	if (order > CONFIG_ARM_DMA_IOMMU_ALIGNMENT)
+		order = CONFIG_ARM_DMA_IOMMU_ALIGNMENT;
+
+	count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	align = (1 << order) - 1;
+
+	spin_lock_irqsave(&mapping->lock, flags);
+	for (i = 0; i < mapping->nr_bitmaps; i++) {
+		start = bitmap_find_next_zero_area(mapping->bitmaps[i],
+				mapping->bits, 0, count, align);
+
+		if (start > mapping->bits)
+			continue;
+
+		bitmap_set(mapping->bitmaps[i], start, count);
+		break;
+	}
+
+	/*
+	 * No unused range found. Try to extend the existing mapping
+	 * and perform a second attempt to reserve an IO virtual
+	 * address range of size bytes.
+	 */
+	if (i == mapping->nr_bitmaps) {
+		if (extend_iommu_mapping(mapping)) {
+			spin_unlock_irqrestore(&mapping->lock, flags);
+			return ARM_MAPPING_ERROR;
+		}
+
+		start = bitmap_find_next_zero_area(mapping->bitmaps[i],
+				mapping->bits, 0, count, align);
+
+		if (start > mapping->bits) {
+			spin_unlock_irqrestore(&mapping->lock, flags);
+			return ARM_MAPPING_ERROR;
+		}
+
+		bitmap_set(mapping->bitmaps[i], start, count);
+	}
+	spin_unlock_irqrestore(&mapping->lock, flags);
+
+	iova = mapping->base + (mapping_size * i);
+	iova += start << PAGE_SHIFT;
+
+	return iova;
+}
+
+static inline void __free_iova(struct dma_iommu_mapping *mapping,
+			       dma_addr_t addr, size_t size)
+{
+	unsigned int start, count;
+	size_t mapping_size = mapping->bits << PAGE_SHIFT;
+	unsigned long flags;
+	dma_addr_t bitmap_base;
+	u32 bitmap_index;
+
+	if (!size)
+		return;
+
+	bitmap_index = (u32) (addr - mapping->base) / (u32) mapping_size;
+	BUG_ON(addr < mapping->base || bitmap_index > mapping->extensions);
+
+	bitmap_base = mapping->base + mapping_size * bitmap_index;
+
+	start = (addr - bitmap_base) >>	PAGE_SHIFT;
+
+	if (addr + size > bitmap_base + mapping_size) {
+		/*
+		 * The address range to be freed reaches into the iova
+		 * range of the next bitmap. This should not happen as
+		 * we don't allow this in __alloc_iova (at the
+		 * moment).
+		 */
+		BUG();
+	} else
+		count = size >> PAGE_SHIFT;
+
+	spin_lock_irqsave(&mapping->lock, flags);
+	bitmap_clear(mapping->bitmaps[bitmap_index], start, count);
+	spin_unlock_irqrestore(&mapping->lock, flags);
+}
+
+/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */
+static const int iommu_order_array[] = { 9, 8, 4, 0 };
+
+static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
+					  gfp_t gfp, unsigned long attrs,
+					  int coherent_flag)
+{
+	struct page **pages;
+	int count = size >> PAGE_SHIFT;
+	int array_size = count * sizeof(struct page *);
+	int i = 0;
+	int order_idx = 0;
+
+	if (array_size <= PAGE_SIZE)
+		pages = kzalloc(array_size, GFP_KERNEL);
+	else
+		pages = vzalloc(array_size);
+	if (!pages)
+		return NULL;
+
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS)
+	{
+		unsigned long order = get_order(size);
+		struct page *page;
+
+		page = dma_alloc_from_contiguous(dev, count, order,
+						 gfp & __GFP_NOWARN);
+		if (!page)
+			goto error;
+
+		__dma_clear_buffer(page, size, coherent_flag);
+
+		for (i = 0; i < count; i++)
+			pages[i] = page + i;
+
+		return pages;
+	}
+
+	/* Go straight to 4K chunks if caller says it's OK. */
+	if (attrs & DMA_ATTR_ALLOC_SINGLE_PAGES)
+		order_idx = ARRAY_SIZE(iommu_order_array) - 1;
+
+	/*
+	 * IOMMU can map any pages, so himem can also be used here
+	 */
+	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+	while (count) {
+		int j, order;
+
+		order = iommu_order_array[order_idx];
+
+		/* Drop down when we get small */
+		if (__fls(count) < order) {
+			order_idx++;
+			continue;
+		}
+
+		if (order) {
+			/* See if it's easy to allocate a high-order chunk */
+			pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
+
+			/* Go down a notch at first sign of pressure */
+			if (!pages[i]) {
+				order_idx++;
+				continue;
+			}
+		} else {
+			pages[i] = alloc_pages(gfp, 0);
+			if (!pages[i])
+				goto error;
+		}
+
+		if (order) {
+			split_page(pages[i], order);
+			j = 1 << order;
+			while (--j)
+				pages[i + j] = pages[i] + j;
+		}
+
+		__dma_clear_buffer(pages[i], PAGE_SIZE << order, coherent_flag);
+		i += 1 << order;
+		count -= 1 << order;
+	}
+
+	return pages;
+error:
+	while (i--)
+		if (pages[i])
+			__free_pages(pages[i], 0);
+	kvfree(pages);
+	return NULL;
+}
+
+static int __iommu_free_buffer(struct device *dev, struct page **pages,
+			       size_t size, unsigned long attrs)
+{
+	int count = size >> PAGE_SHIFT;
+	int i;
+
+	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
+		dma_release_from_contiguous(dev, pages[0], count);
+	} else {
+		for (i = 0; i < count; i++)
+			if (pages[i])
+				__free_pages(pages[i], 0);
+	}
+
+	kvfree(pages);
+	return 0;
+}
+
+/*
+ * Create a CPU mapping for a specified pages
+ */
+static void *
+__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot,
+		    const void *caller)
+{
+	return dma_common_pages_remap(pages, size,
+			VM_ARM_DMA_CONSISTENT | VM_USERMAP, prot, caller);
+}
+
+/*
+ * Create a mapping in device IO address space for specified pages
+ */
+static dma_addr_t
+__iommu_create_mapping(struct device *dev, struct page **pages, size_t size,
+		       unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	dma_addr_t dma_addr, iova;
+	int i;
+
+	dma_addr = __alloc_iova(mapping, size);
+	if (dma_addr == ARM_MAPPING_ERROR)
+		return dma_addr;
+
+	iova = dma_addr;
+	for (i = 0; i < count; ) {
+		int ret;
+
+		unsigned int next_pfn = page_to_pfn(pages[i]) + 1;
+		phys_addr_t phys = page_to_phys(pages[i]);
+		unsigned int len, j;
+
+		for (j = i + 1; j < count; j++, next_pfn++)
+			if (page_to_pfn(pages[j]) != next_pfn)
+				break;
+
+		len = (j - i) << PAGE_SHIFT;
+		ret = iommu_map(mapping->domain, iova, phys, len,
+				__dma_info_to_prot(DMA_BIDIRECTIONAL, attrs));
+		if (ret < 0)
+			goto fail;
+		iova += len;
+		i = j;
+	}
+	return dma_addr;
+fail:
+	iommu_unmap(mapping->domain, dma_addr, iova-dma_addr);
+	__free_iova(mapping, dma_addr, size);
+	return ARM_MAPPING_ERROR;
+}
+
+static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+	/*
+	 * add optional in-page offset from iova to size and align
+	 * result to page size
+	 */
+	size = PAGE_ALIGN((iova & ~PAGE_MASK) + size);
+	iova &= PAGE_MASK;
+
+	iommu_unmap(mapping->domain, iova, size);
+	__free_iova(mapping, iova, size);
+	return 0;
+}
+
+static struct page **__atomic_get_pages(void *addr)
+{
+	struct page *page;
+	phys_addr_t phys;
+
+	phys = gen_pool_virt_to_phys(atomic_pool, (unsigned long)addr);
+	page = phys_to_page(phys);
+
+	return (struct page **)page;
+}
+
+static struct page **__iommu_get_pages(void *cpu_addr, unsigned long attrs)
+{
+	struct vm_struct *area;
+
+	if (__in_atomic_pool(cpu_addr, PAGE_SIZE))
+		return __atomic_get_pages(cpu_addr);
+
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+		return cpu_addr;
+
+	area = find_vm_area(cpu_addr);
+	if (area && (area->flags & VM_ARM_DMA_CONSISTENT))
+		return area->pages;
+	return NULL;
+}
+
+static void *__iommu_alloc_simple(struct device *dev, size_t size, gfp_t gfp,
+				  dma_addr_t *handle, int coherent_flag,
+				  unsigned long attrs)
+{
+	struct page *page;
+	void *addr;
+
+	if (coherent_flag  == COHERENT)
+		addr = __alloc_simple_buffer(dev, size, gfp, &page);
+	else
+		addr = __alloc_from_pool(size, &page);
+	if (!addr)
+		return NULL;
+
+	*handle = __iommu_create_mapping(dev, &page, size, attrs);
+	if (*handle == ARM_MAPPING_ERROR)
+		goto err_mapping;
+
+	return addr;
+
+err_mapping:
+	__free_from_pool(addr, size);
+	return NULL;
+}
+
+static void __iommu_free_atomic(struct device *dev, void *cpu_addr,
+			dma_addr_t handle, size_t size, int coherent_flag)
+{
+	__iommu_remove_mapping(dev, handle, size);
+	if (coherent_flag == COHERENT)
+		__dma_free_buffer(virt_to_page(cpu_addr), size);
+	else
+		__free_from_pool(cpu_addr, size);
+}
+
+static void *__arm_iommu_alloc_attrs(struct device *dev, size_t size,
+	    dma_addr_t *handle, gfp_t gfp, unsigned long attrs,
+	    int coherent_flag)
+{
+	pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL);
+	struct page **pages;
+	void *addr = NULL;
+
+	*handle = ARM_MAPPING_ERROR;
+	size = PAGE_ALIGN(size);
+
+	if (coherent_flag  == COHERENT || !gfpflags_allow_blocking(gfp))
+		return __iommu_alloc_simple(dev, size, gfp, handle,
+					    coherent_flag, attrs);
+
+	/*
+	 * Following is a work-around (a.k.a. hack) to prevent pages
+	 * with __GFP_COMP being passed to split_page() which cannot
+	 * handle them.  The real problem is that this flag probably
+	 * should be 0 on ARM as it is not supported on this
+	 * platform; see CONFIG_HUGETLBFS.
+	 */
+	gfp &= ~(__GFP_COMP);
+
+	pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag);
+	if (!pages)
+		return NULL;
+
+	*handle = __iommu_create_mapping(dev, pages, size, attrs);
+	if (*handle == ARM_MAPPING_ERROR)
+		goto err_buffer;
+
+	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING)
+		return pages;
+
+	addr = __iommu_alloc_remap(pages, size, gfp, prot,
+				   __builtin_return_address(0));
+	if (!addr)
+		goto err_mapping;
+
+	return addr;
+
+err_mapping:
+	__iommu_remove_mapping(dev, *handle, size);
+err_buffer:
+	__iommu_free_buffer(dev, pages, size, attrs);
+	return NULL;
+}
+
+static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
+	    dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
+{
+	return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, NORMAL);
+}
+
+static void *arm_coherent_iommu_alloc_attrs(struct device *dev, size_t size,
+		    dma_addr_t *handle, gfp_t gfp, unsigned long attrs)
+{
+	return __arm_iommu_alloc_attrs(dev, size, handle, gfp, attrs, COHERENT);
+}
+
+static int __arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
+		    void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		    unsigned long attrs)
+{
+	unsigned long uaddr = vma->vm_start;
+	unsigned long usize = vma->vm_end - vma->vm_start;
+	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
+
+	if (!pages)
+		return -ENXIO;
+
+	if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
+		return -ENXIO;
+
+	pages += off;
+
+	do {
+		int ret = vm_insert_page(vma, uaddr, *pages++);
+		if (ret) {
+			pr_err("Remapping memory failed: %d\n", ret);
+			return ret;
+		}
+		uaddr += PAGE_SIZE;
+		usize -= PAGE_SIZE;
+	} while (usize > 0);
+
+	return 0;
+}
+static int arm_iommu_mmap_attrs(struct device *dev,
+		struct vm_area_struct *vma, void *cpu_addr,
+		dma_addr_t dma_addr, size_t size, unsigned long attrs)
+{
+	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
+
+	return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+static int arm_coherent_iommu_mmap_attrs(struct device *dev,
+		struct vm_area_struct *vma, void *cpu_addr,
+		dma_addr_t dma_addr, size_t size, unsigned long attrs)
+{
+	return __arm_iommu_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, attrs);
+}
+
+/*
+ * free a page as defined by the above mapping.
+ * Must not be called with IRQs disabled.
+ */
+void __arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
+	dma_addr_t handle, unsigned long attrs, int coherent_flag)
+{
+	struct page **pages;
+	size = PAGE_ALIGN(size);
+
+	if (coherent_flag == COHERENT || __in_atomic_pool(cpu_addr, size)) {
+		__iommu_free_atomic(dev, cpu_addr, handle, size, coherent_flag);
+		return;
+	}
+
+	pages = __iommu_get_pages(cpu_addr, attrs);
+	if (!pages) {
+		WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+		return;
+	}
+
+	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) == 0) {
+		dma_common_free_remap(cpu_addr, size,
+			VM_ARM_DMA_CONSISTENT | VM_USERMAP);
+	}
+
+	__iommu_remove_mapping(dev, handle, size);
+	__iommu_free_buffer(dev, pages, size, attrs);
+}
+
+void arm_iommu_free_attrs(struct device *dev, size_t size,
+		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
+{
+	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, NORMAL);
+}
+
+void arm_coherent_iommu_free_attrs(struct device *dev, size_t size,
+		    void *cpu_addr, dma_addr_t handle, unsigned long attrs)
+{
+	__arm_iommu_free_attrs(dev, size, cpu_addr, handle, attrs, COHERENT);
+}
+
+static int arm_iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
+				 void *cpu_addr, dma_addr_t dma_addr,
+				 size_t size, unsigned long attrs)
+{
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+
+	if (!pages)
+		return -ENXIO;
+
+	return sg_alloc_table_from_pages(sgt, pages, count, 0, size,
+					 GFP_KERNEL);
+}
+
+/*
+ * Map a part of the scatter-gather list into contiguous io address space
+ */
+static int __map_sg_chunk(struct device *dev, struct scatterlist *sg,
+			  size_t size, dma_addr_t *handle,
+			  enum dma_data_direction dir, unsigned long attrs,
+			  bool is_coherent)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova, iova_base;
+	int ret = 0;
+	unsigned int count;
+	struct scatterlist *s;
+	int prot;
+
+	size = PAGE_ALIGN(size);
+	*handle = ARM_MAPPING_ERROR;
+
+	iova_base = iova = __alloc_iova(mapping, size);
+	if (iova == ARM_MAPPING_ERROR)
+		return -ENOMEM;
+
+	for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) {
+		phys_addr_t phys = page_to_phys(sg_page(s));
+		unsigned int len = PAGE_ALIGN(s->offset + s->length);
+
+		if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+			__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+
+		prot = __dma_info_to_prot(dir, attrs);
+
+		ret = iommu_map(mapping->domain, iova, phys, len, prot);
+		if (ret < 0)
+			goto fail;
+		count += len >> PAGE_SHIFT;
+		iova += len;
+	}
+	*handle = iova_base;
+
+	return 0;
+fail:
+	iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE);
+	__free_iova(mapping, iova_base, size);
+	return ret;
+}
+
+static int __iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		     enum dma_data_direction dir, unsigned long attrs,
+		     bool is_coherent)
+{
+	struct scatterlist *s = sg, *dma = sg, *start = sg;
+	int i, count = 0;
+	unsigned int offset = s->offset;
+	unsigned int size = s->offset + s->length;
+	unsigned int max = dma_get_max_seg_size(dev);
+
+	for (i = 1; i < nents; i++) {
+		s = sg_next(s);
+
+		s->dma_address = ARM_MAPPING_ERROR;
+		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) {
+			if (__map_sg_chunk(dev, start, size, &dma->dma_address,
+			    dir, attrs, is_coherent) < 0)
+				goto bad_mapping;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count += 1;
+		}
+		size += s->length;
+	}
+	if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir, attrs,
+		is_coherent) < 0)
+		goto bad_mapping;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count+1;
+
+bad_mapping:
+	for_each_sg(sg, s, count, i)
+		__iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s));
+	return 0;
+}
+
+/**
+ * arm_coherent_iommu_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of i/o coherent buffers described by scatterlist in streaming
+ * mode for DMA. The scatter gather list elements are merged together (if
+ * possible) and tagged with the appropriate dma address and length. They are
+ * obtained via sg_dma_{address,length}.
+ */
+int arm_coherent_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return __iommu_map_sg(dev, sg, nents, dir, attrs, true);
+}
+
+/**
+ * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map
+ * @dir: DMA transfer direction
+ *
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * The scatter gather list elements are merged together (if possible) and
+ * tagged with the appropriate dma address and length. They are obtained via
+ * sg_dma_{address,length}.
+ */
+int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	return __iommu_map_sg(dev, sg, nents, dir, attrs, false);
+}
+
+static void __iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs, bool is_coherent)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (sg_dma_len(s))
+			__iommu_remove_mapping(dev, sg_dma_address(s),
+					       sg_dma_len(s));
+		if (!is_coherent && (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+			__dma_page_dev_to_cpu(sg_page(s), s->offset,
+					      s->length, dir);
+	}
+}
+
+/**
+ * arm_coherent_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_coherent_iommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+		int nents, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	__iommu_unmap_sg(dev, sg, nents, dir, attrs, true);
+}
+
+/**
+ * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to unmap (same as was passed to dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ *
+ * Unmap a set of streaming mode DMA translations.  Again, CPU access
+ * rules concerning calls here are the same as for dma_unmap_single().
+ */
+void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+			enum dma_data_direction dir,
+			unsigned long attrs)
+{
+	__iommu_unmap_sg(dev, sg, nents, dir, attrs, false);
+}
+
+/**
+ * arm_iommu_sync_sg_for_cpu
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		__dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir);
+
+}
+
+/**
+ * arm_iommu_sync_sg_for_device
+ * @dev: valid struct device pointer
+ * @sg: list of buffers
+ * @nents: number of buffers to map (returned from dma_map_sg)
+ * @dir: DMA transfer direction (same as was passed to dma_map_sg)
+ */
+void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+			int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i)
+		__dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir);
+}
+
+
+/**
+ * arm_coherent_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * Coherent IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_coherent_iommu_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir,
+	     unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t dma_addr;
+	int ret, prot, len = PAGE_ALIGN(size + offset);
+
+	dma_addr = __alloc_iova(mapping, len);
+	if (dma_addr == ARM_MAPPING_ERROR)
+		return dma_addr;
+
+	prot = __dma_info_to_prot(dir, attrs);
+
+	ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, prot);
+	if (ret < 0)
+		goto fail;
+
+	return dma_addr + offset;
+fail:
+	__free_iova(mapping, dma_addr, len);
+	return ARM_MAPPING_ERROR;
+}
+
+/**
+ * arm_iommu_map_page
+ * @dev: valid struct device pointer
+ * @page: page that buffer resides in
+ * @offset: offset into page for start of buffer
+ * @size: size of buffer to map
+ * @dir: DMA transfer direction
+ *
+ * IOMMU aware version of arm_dma_map_page()
+ */
+static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page,
+	     unsigned long offset, size_t size, enum dma_data_direction dir,
+	     unsigned long attrs)
+{
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		__dma_page_cpu_to_dev(page, offset, size, dir);
+
+	return arm_coherent_iommu_map_page(dev, page, offset, size, dir, attrs);
+}
+
+/**
+ * arm_coherent_iommu_unmap_page
+ * @dev: valid struct device pointer
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * Coherent IOMMU aware version of arm_dma_unmap_page()
+ */
+static void arm_coherent_iommu_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova = handle & PAGE_MASK;
+	int offset = handle & ~PAGE_MASK;
+	int len = PAGE_ALIGN(size + offset);
+
+	if (!iova)
+		return;
+
+	iommu_unmap(mapping->domain, iova, len);
+	__free_iova(mapping, iova, len);
+}
+
+/**
+ * arm_iommu_unmap_page
+ * @dev: valid struct device pointer
+ * @handle: DMA address of buffer
+ * @size: size of buffer (same as passed to dma_map_page)
+ * @dir: DMA transfer direction (same as passed to dma_map_page)
+ *
+ * IOMMU aware version of arm_dma_unmap_page()
+ */
+static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	int offset = handle & ~PAGE_MASK;
+	int len = PAGE_ALIGN(size + offset);
+
+	if (!iova)
+		return;
+
+	if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+		__dma_page_dev_to_cpu(page, offset, size, dir);
+
+	iommu_unmap(mapping->domain, iova, len);
+	__free_iova(mapping, iova, len);
+}
+
+/**
+ * arm_iommu_map_resource - map a device resource for DMA
+ * @dev: valid struct device pointer
+ * @phys_addr: physical address of resource
+ * @size: size of resource to map
+ * @dir: DMA transfer direction
+ */
+static dma_addr_t arm_iommu_map_resource(struct device *dev,
+		phys_addr_t phys_addr, size_t size,
+		enum dma_data_direction dir, unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t dma_addr;
+	int ret, prot;
+	phys_addr_t addr = phys_addr & PAGE_MASK;
+	unsigned int offset = phys_addr & ~PAGE_MASK;
+	size_t len = PAGE_ALIGN(size + offset);
+
+	dma_addr = __alloc_iova(mapping, len);
+	if (dma_addr == ARM_MAPPING_ERROR)
+		return dma_addr;
+
+	prot = __dma_info_to_prot(dir, attrs) | IOMMU_MMIO;
+
+	ret = iommu_map(mapping->domain, dma_addr, addr, len, prot);
+	if (ret < 0)
+		goto fail;
+
+	return dma_addr + offset;
+fail:
+	__free_iova(mapping, dma_addr, len);
+	return ARM_MAPPING_ERROR;
+}
+
+/**
+ * arm_iommu_unmap_resource - unmap a device DMA resource
+ * @dev: valid struct device pointer
+ * @dma_handle: DMA address to resource
+ * @size: size of resource to map
+ * @dir: DMA transfer direction
+ */
+static void arm_iommu_unmap_resource(struct device *dev, dma_addr_t dma_handle,
+		size_t size, enum dma_data_direction dir,
+		unsigned long attrs)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova = dma_handle & PAGE_MASK;
+	unsigned int offset = dma_handle & ~PAGE_MASK;
+	size_t len = PAGE_ALIGN(size + offset);
+
+	if (!iova)
+		return;
+
+	iommu_unmap(mapping->domain, iova, len);
+	__free_iova(mapping, iova, len);
+}
+
+static void arm_iommu_sync_single_for_cpu(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	unsigned int offset = handle & ~PAGE_MASK;
+
+	if (!iova)
+		return;
+
+	__dma_page_dev_to_cpu(page, offset, size, dir);
+}
+
+static void arm_iommu_sync_single_for_device(struct device *dev,
+		dma_addr_t handle, size_t size, enum dma_data_direction dir)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+	dma_addr_t iova = handle & PAGE_MASK;
+	struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova));
+	unsigned int offset = handle & ~PAGE_MASK;
+
+	if (!iova)
+		return;
+
+	__dma_page_cpu_to_dev(page, offset, size, dir);
+}
+
+const struct dma_map_ops iommu_ops = {
+	.alloc		= arm_iommu_alloc_attrs,
+	.free		= arm_iommu_free_attrs,
+	.mmap		= arm_iommu_mmap_attrs,
+	.get_sgtable	= arm_iommu_get_sgtable,
+
+	.map_page		= arm_iommu_map_page,
+	.unmap_page		= arm_iommu_unmap_page,
+	.sync_single_for_cpu	= arm_iommu_sync_single_for_cpu,
+	.sync_single_for_device	= arm_iommu_sync_single_for_device,
+
+	.map_sg			= arm_iommu_map_sg,
+	.unmap_sg		= arm_iommu_unmap_sg,
+	.sync_sg_for_cpu	= arm_iommu_sync_sg_for_cpu,
+	.sync_sg_for_device	= arm_iommu_sync_sg_for_device,
+
+	.map_resource		= arm_iommu_map_resource,
+	.unmap_resource		= arm_iommu_unmap_resource,
+
+	.mapping_error		= arm_dma_mapping_error,
+	.dma_supported		= arm_dma_supported,
+};
+
+const struct dma_map_ops iommu_coherent_ops = {
+	.alloc		= arm_coherent_iommu_alloc_attrs,
+	.free		= arm_coherent_iommu_free_attrs,
+	.mmap		= arm_coherent_iommu_mmap_attrs,
+	.get_sgtable	= arm_iommu_get_sgtable,
+
+	.map_page	= arm_coherent_iommu_map_page,
+	.unmap_page	= arm_coherent_iommu_unmap_page,
+
+	.map_sg		= arm_coherent_iommu_map_sg,
+	.unmap_sg	= arm_coherent_iommu_unmap_sg,
+
+	.map_resource	= arm_iommu_map_resource,
+	.unmap_resource	= arm_iommu_unmap_resource,
+
+	.mapping_error		= arm_dma_mapping_error,
+	.dma_supported		= arm_dma_supported,
+};
+
+/**
+ * arm_iommu_create_mapping
+ * @bus: pointer to the bus holding the client device (for IOMMU calls)
+ * @base: start address of the valid IO address space
+ * @size: maximum size of the valid IO address space
+ *
+ * Creates a mapping structure which holds information about used/unused
+ * IO address ranges, which is required to perform memory allocation and
+ * mapping with IOMMU aware functions.
+ *
+ * The client device need to be attached to the mapping with
+ * arm_iommu_attach_device function.
+ */
+struct dma_iommu_mapping *
+arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, u64 size)
+{
+	unsigned int bits = size >> PAGE_SHIFT;
+	unsigned int bitmap_size = BITS_TO_LONGS(bits) * sizeof(long);
+	struct dma_iommu_mapping *mapping;
+	int extensions = 1;
+	int err = -ENOMEM;
+
+	/* currently only 32-bit DMA address space is supported */
+	if (size > DMA_BIT_MASK(32) + 1)
+		return ERR_PTR(-ERANGE);
+
+	if (!bitmap_size)
+		return ERR_PTR(-EINVAL);
+
+	if (bitmap_size > PAGE_SIZE) {
+		extensions = bitmap_size / PAGE_SIZE;
+		bitmap_size = PAGE_SIZE;
+	}
+
+	mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL);
+	if (!mapping)
+		goto err;
+
+	mapping->bitmap_size = bitmap_size;
+	mapping->bitmaps = kcalloc(extensions, sizeof(unsigned long *),
+				   GFP_KERNEL);
+	if (!mapping->bitmaps)
+		goto err2;
+
+	mapping->bitmaps[0] = kzalloc(bitmap_size, GFP_KERNEL);
+	if (!mapping->bitmaps[0])
+		goto err3;
+
+	mapping->nr_bitmaps = 1;
+	mapping->extensions = extensions;
+	mapping->base = base;
+	mapping->bits = BITS_PER_BYTE * bitmap_size;
+
+	spin_lock_init(&mapping->lock);
+
+	mapping->domain = iommu_domain_alloc(bus);
+	if (!mapping->domain)
+		goto err4;
+
+	kref_init(&mapping->kref);
+	return mapping;
+err4:
+	kfree(mapping->bitmaps[0]);
+err3:
+	kfree(mapping->bitmaps);
+err2:
+	kfree(mapping);
+err:
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(arm_iommu_create_mapping);
+
+static void release_iommu_mapping(struct kref *kref)
+{
+	int i;
+	struct dma_iommu_mapping *mapping =
+		container_of(kref, struct dma_iommu_mapping, kref);
+
+	iommu_domain_free(mapping->domain);
+	for (i = 0; i < mapping->nr_bitmaps; i++)
+		kfree(mapping->bitmaps[i]);
+	kfree(mapping->bitmaps);
+	kfree(mapping);
+}
+
+static int extend_iommu_mapping(struct dma_iommu_mapping *mapping)
+{
+	int next_bitmap;
+
+	if (mapping->nr_bitmaps >= mapping->extensions)
+		return -EINVAL;
+
+	next_bitmap = mapping->nr_bitmaps;
+	mapping->bitmaps[next_bitmap] = kzalloc(mapping->bitmap_size,
+						GFP_ATOMIC);
+	if (!mapping->bitmaps[next_bitmap])
+		return -ENOMEM;
+
+	mapping->nr_bitmaps++;
+
+	return 0;
+}
+
+void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
+{
+	if (mapping)
+		kref_put(&mapping->kref, release_iommu_mapping);
+}
+EXPORT_SYMBOL_GPL(arm_iommu_release_mapping);
+
+static int __arm_iommu_attach_device(struct device *dev,
+				     struct dma_iommu_mapping *mapping)
+{
+	int err;
+
+	err = iommu_attach_device(mapping->domain, dev);
+	if (err)
+		return err;
+
+	kref_get(&mapping->kref);
+	to_dma_iommu_mapping(dev) = mapping;
+
+	pr_debug("Attached IOMMU controller to %s device.\n", dev_name(dev));
+	return 0;
+}
+
+/**
+ * arm_iommu_attach_device
+ * @dev: valid struct device pointer
+ * @mapping: io address space mapping structure (returned from
+ *	arm_iommu_create_mapping)
+ *
+ * Attaches specified io address space mapping to the provided device.
+ * This replaces the dma operations (dma_map_ops pointer) with the
+ * IOMMU aware version.
+ *
+ * More than one client might be attached to the same io address space
+ * mapping.
+ */
+int arm_iommu_attach_device(struct device *dev,
+			    struct dma_iommu_mapping *mapping)
+{
+	int err;
+
+	err = __arm_iommu_attach_device(dev, mapping);
+	if (err)
+		return err;
+
+	set_dma_ops(dev, &iommu_ops);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
+
+/**
+ * arm_iommu_detach_device
+ * @dev: valid struct device pointer
+ *
+ * Detaches the provided device from a previously attached map.
+ * This voids the dma operations (dma_map_ops pointer)
+ */
+void arm_iommu_detach_device(struct device *dev)
+{
+	struct dma_iommu_mapping *mapping;
+
+	mapping = to_dma_iommu_mapping(dev);
+	if (!mapping) {
+		dev_warn(dev, "Not attached\n");
+		return;
+	}
+
+	iommu_detach_device(mapping->domain, dev);
+	kref_put(&mapping->kref, release_iommu_mapping);
+	to_dma_iommu_mapping(dev) = NULL;
+	set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent));
+
+	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
+}
+EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
+
+static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
+{
+	return coherent ? &iommu_coherent_ops : &iommu_ops;
+}
+
+static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				    const struct iommu_ops *iommu)
+{
+	struct dma_iommu_mapping *mapping;
+
+	if (!iommu)
+		return false;
+
+	mapping = arm_iommu_create_mapping(dev->bus, dma_base, size);
+	if (IS_ERR(mapping)) {
+		pr_warn("Failed to create %llu-byte IOMMU mapping for device %s\n",
+				size, dev_name(dev));
+		return false;
+	}
+
+	if (__arm_iommu_attach_device(dev, mapping)) {
+		pr_warn("Failed to attached device %s to IOMMU_mapping\n",
+				dev_name(dev));
+		arm_iommu_release_mapping(mapping);
+		return false;
+	}
+
+	return true;
+}
+
+static void arm_teardown_iommu_dma_ops(struct device *dev)
+{
+	struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+	if (!mapping)
+		return;
+
+	arm_iommu_detach_device(dev);
+	arm_iommu_release_mapping(mapping);
+}
+
+#else
+
+static bool arm_setup_iommu_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				    const struct iommu_ops *iommu)
+{
+	return false;
+}
+
+static void arm_teardown_iommu_dma_ops(struct device *dev) { }
+
+#define arm_get_iommu_dma_map_ops arm_get_dma_map_ops
+
+#endif	/* CONFIG_ARM_DMA_USE_IOMMU */
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	const struct dma_map_ops *dma_ops;
+
+	dev->archdata.dma_coherent = coherent;
+
+	/*
+	 * Don't override the dma_ops if they have already been set. Ideally
+	 * this should be the only location where dma_ops are set, remove this
+	 * check when all other callers of set_dma_ops will have disappeared.
+	 */
+	if (dev->dma_ops)
+		return;
+
+	if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu))
+		dma_ops = arm_get_iommu_dma_map_ops(coherent);
+	else
+		dma_ops = arm_get_dma_map_ops(coherent);
+
+	set_dma_ops(dev, dma_ops);
+
+#ifdef CONFIG_XEN
+	if (xen_initial_domain()) {
+		dev->archdata.dev_dma_ops = dev->dma_ops;
+		dev->dma_ops = xen_dma_ops;
+	}
+#endif
+	dev->archdata.dma_ops_setup = true;
+}
+
+void arch_teardown_dma_ops(struct device *dev)
+{
+	if (!dev->archdata.dma_ops_setup)
+		return;
+
+	arm_teardown_iommu_dma_ops(dev);
+}
diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h
new file mode 100644
index 0000000..aaef64b
--- /dev/null
+++ b/arch/arm/mm/dma.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef DMA_H
+#define DMA_H
+
+#include <asm/glue-cache.h>
+
+#ifndef MULTI_CACHE
+#define dmac_map_area			__glue(_CACHE,_dma_map_area)
+#define dmac_unmap_area 		__glue(_CACHE,_dma_unmap_area)
+
+/*
+ * These are private to the dma-mapping API.  Do not use directly.
+ * Their sole purpose is to ensure that data held in the cache
+ * is visible to DMA, or data written by DMA to system memory is
+ * visible to the CPU.
+ */
+extern void dmac_map_area(const void *, size_t, int);
+extern void dmac_unmap_area(const void *, size_t, int);
+
+#else
+
+/*
+ * These are private to the dma-mapping API.  Do not use directly.
+ * Their sole purpose is to ensure that data held in the cache
+ * is visible to DMA, or data written by DMA to system memory is
+ * visible to the CPU.
+ */
+#define dmac_map_area			cpu_cache.dma_map_area
+#define dmac_unmap_area 		cpu_cache.dma_unmap_area
+
+#endif
+
+#endif
diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c
new file mode 100644
index 0000000..084779c
--- /dev/null
+++ b/arch/arm/mm/dump.c
@@ -0,0 +1,456 @@
+/*
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * Derived from x86 implementation:
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/seq_file.h>
+
+#include <asm/domain.h>
+#include <asm/fixmap.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+#include <asm/ptdump.h>
+
+static struct addr_marker address_markers[] = {
+	{ MODULES_VADDR,	"Modules" },
+	{ PAGE_OFFSET,		"Kernel Mapping" },
+	{ 0,			"vmalloc() Area" },
+	{ VMALLOC_END,		"vmalloc() End" },
+	{ FIXADDR_START,	"Fixmap Area" },
+	{ VECTORS_BASE,	"Vectors" },
+	{ VECTORS_BASE + PAGE_SIZE * 2, "Vectors End" },
+	{ -1,			NULL },
+};
+
+#define pt_dump_seq_printf(m, fmt, args...) \
+({                      \
+	if (m)					\
+		seq_printf(m, fmt, ##args);	\
+})
+
+#define pt_dump_seq_puts(m, fmt)    \
+({						\
+	if (m)					\
+		seq_printf(m, fmt);	\
+})
+
+struct pg_state {
+	struct seq_file *seq;
+	const struct addr_marker *marker;
+	unsigned long start_address;
+	unsigned level;
+	u64 current_prot;
+	bool check_wx;
+	unsigned long wx_pages;
+	const char *current_domain;
+};
+
+struct prot_bits {
+	u64		mask;
+	u64		val;
+	const char	*set;
+	const char	*clear;
+	bool		ro_bit;
+	bool		nx_bit;
+};
+
+static const struct prot_bits pte_bits[] = {
+	{
+		.mask	= L_PTE_USER,
+		.val	= L_PTE_USER,
+		.set	= "USR",
+		.clear	= "   ",
+	}, {
+		.mask	= L_PTE_RDONLY,
+		.val	= L_PTE_RDONLY,
+		.set	= "ro",
+		.clear	= "RW",
+		.ro_bit	= true,
+	}, {
+		.mask	= L_PTE_XN,
+		.val	= L_PTE_XN,
+		.set	= "NX",
+		.clear	= "x ",
+		.nx_bit	= true,
+	}, {
+		.mask	= L_PTE_SHARED,
+		.val	= L_PTE_SHARED,
+		.set	= "SHD",
+		.clear	= "   ",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_UNCACHED,
+		.set	= "SO/UNCACHED",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_BUFFERABLE,
+		.set	= "MEM/BUFFERABLE/WC",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITETHROUGH,
+		.set	= "MEM/CACHED/WT",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITEBACK,
+		.set	= "MEM/CACHED/WBRA",
+#ifndef CONFIG_ARM_LPAE
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_MINICACHE,
+		.set	= "MEM/MINICACHE",
+#endif
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_WRITEALLOC,
+		.set	= "MEM/CACHED/WBWA",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_SHARED,
+		.set	= "DEV/SHARED",
+#ifndef CONFIG_ARM_LPAE
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_NONSHARED,
+		.set	= "DEV/NONSHARED",
+#endif
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_WC,
+		.set	= "DEV/WC",
+	}, {
+		.mask	= L_PTE_MT_MASK,
+		.val	= L_PTE_MT_DEV_CACHED,
+		.set	= "DEV/CACHED",
+	},
+};
+
+static const struct prot_bits section_bits[] = {
+#ifdef CONFIG_ARM_LPAE
+	{
+		.mask	= PMD_SECT_USER,
+		.val	= PMD_SECT_USER,
+		.set	= "USR",
+	}, {
+		.mask	= L_PMD_SECT_RDONLY | PMD_SECT_AP2,
+		.val	= L_PMD_SECT_RDONLY | PMD_SECT_AP2,
+		.set	= "ro",
+		.clear	= "RW",
+		.ro_bit	= true,
+#elif __LINUX_ARM_ARCH__ >= 6
+	{
+		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_APX | PMD_SECT_AP_WRITE,
+		.set	= "    ro",
+		.ro_bit	= true,
+	}, {
+		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_WRITE,
+		.set	= "    RW",
+	}, {
+		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_READ,
+		.set	= "USR ro",
+	}, {
+		.mask	= PMD_SECT_APX | PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val	= PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.set	= "USR RW",
+#else /* ARMv4/ARMv5  */
+	/* These are approximate */
+	{
+		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val    = 0,
+		.set    = "    ro",
+		.ro_bit	= true,
+	}, {
+		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val    = PMD_SECT_AP_WRITE,
+		.set    = "    RW",
+	}, {
+		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val    = PMD_SECT_AP_READ,
+		.set    = "USR ro",
+	}, {
+		.mask   = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.val    = PMD_SECT_AP_READ | PMD_SECT_AP_WRITE,
+		.set    = "USR RW",
+#endif
+	}, {
+		.mask	= PMD_SECT_XN,
+		.val	= PMD_SECT_XN,
+		.set	= "NX",
+		.clear	= "x ",
+		.nx_bit	= true,
+	}, {
+		.mask	= PMD_SECT_S,
+		.val	= PMD_SECT_S,
+		.set	= "SHD",
+		.clear	= "   ",
+	},
+};
+
+struct pg_level {
+	const struct prot_bits *bits;
+	size_t num;
+	u64 mask;
+	const struct prot_bits *ro_bit;
+	const struct prot_bits *nx_bit;
+};
+
+static struct pg_level pg_level[] = {
+	{
+	}, { /* pgd */
+	}, { /* pud */
+	}, { /* pmd */
+		.bits	= section_bits,
+		.num	= ARRAY_SIZE(section_bits),
+	}, { /* pte */
+		.bits	= pte_bits,
+		.num	= ARRAY_SIZE(pte_bits),
+	},
+};
+
+static void dump_prot(struct pg_state *st, const struct prot_bits *bits, size_t num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++, bits++) {
+		const char *s;
+
+		if ((st->current_prot & bits->mask) == bits->val)
+			s = bits->set;
+		else
+			s = bits->clear;
+
+		if (s)
+			pt_dump_seq_printf(st->seq, " %s", s);
+	}
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+	if (!st->check_wx)
+		return;
+	if ((st->current_prot & pg_level[st->level].ro_bit->mask) ==
+				pg_level[st->level].ro_bit->val)
+		return;
+	if ((st->current_prot & pg_level[st->level].nx_bit->mask) ==
+				pg_level[st->level].nx_bit->val)
+		return;
+
+	WARN_ONCE(1, "arm/mm: Found insecure W+X mapping at address %pS\n",
+			(void *)st->start_address);
+
+	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_page(struct pg_state *st, unsigned long addr,
+		      unsigned int level, u64 val, const char *domain)
+{
+	static const char units[] = "KMGTPE";
+	u64 prot = val & pg_level[level].mask;
+
+	if (!st->level) {
+		st->level = level;
+		st->current_prot = prot;
+		st->current_domain = domain;
+		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != st->current_prot || level != st->level ||
+		   domain != st->current_domain ||
+		   addr >= st->marker[1].start_address) {
+		const char *unit = units;
+		unsigned long delta;
+
+		if (st->current_prot) {
+			note_prot_wx(st, addr);
+			pt_dump_seq_printf(st->seq, "0x%08lx-0x%08lx   ",
+				   st->start_address, addr);
+
+			delta = (addr - st->start_address) >> 10;
+			while (!(delta & 1023) && unit[1]) {
+				delta >>= 10;
+				unit++;
+			}
+			pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
+			if (st->current_domain)
+				pt_dump_seq_printf(st->seq, " %s",
+							st->current_domain);
+			if (pg_level[st->level].bits)
+				dump_prot(st, pg_level[st->level].bits, pg_level[st->level].num);
+			pt_dump_seq_printf(st->seq, "\n");
+		}
+
+		if (addr >= st->marker[1].start_address) {
+			st->marker++;
+			pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
+							st->marker->name);
+		}
+		st->start_address = addr;
+		st->current_prot = prot;
+		st->current_domain = domain;
+		st->level = level;
+	}
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start,
+		     const char *domain)
+{
+	pte_t *pte = pte_offset_kernel(pmd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+		addr = start + i * PAGE_SIZE;
+		note_page(st, addr, 4, pte_val(*pte), domain);
+	}
+}
+
+static const char *get_domain_name(pmd_t *pmd)
+{
+#ifndef CONFIG_ARM_LPAE
+	switch (pmd_val(*pmd) & PMD_DOMAIN_MASK) {
+	case PMD_DOMAIN(DOMAIN_KERNEL):
+		return "KERNEL ";
+	case PMD_DOMAIN(DOMAIN_USER):
+		return "USER   ";
+	case PMD_DOMAIN(DOMAIN_IO):
+		return "IO     ";
+	case PMD_DOMAIN(DOMAIN_VECTORS):
+		return "VECTORS";
+	default:
+		return "unknown";
+	}
+#endif
+	return NULL;
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long addr;
+	unsigned i;
+	const char *domain;
+
+	for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+		addr = start + i * PMD_SIZE;
+		domain = get_domain_name(pmd);
+		if (pmd_none(*pmd) || pmd_large(*pmd) || !pmd_present(*pmd))
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
+		else
+			walk_pte(st, pmd, addr, domain);
+
+		if (SECTION_SIZE < PMD_SIZE && pmd_large(pmd[1])) {
+			addr += SECTION_SIZE;
+			pmd++;
+			domain = get_domain_name(pmd);
+			note_page(st, addr, 3, pmd_val(*pmd), domain);
+		}
+	}
+}
+
+static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+	pud_t *pud = pud_offset(pgd, 0);
+	unsigned long addr;
+	unsigned i;
+
+	for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+		addr = start + i * PUD_SIZE;
+		if (!pud_none(*pud)) {
+			walk_pmd(st, pud, addr);
+		} else {
+			note_page(st, addr, 2, pud_val(*pud), NULL);
+		}
+	}
+}
+
+static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
+			unsigned long start)
+{
+	pgd_t *pgd = pgd_offset(mm, 0UL);
+	unsigned i;
+	unsigned long addr;
+
+	for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+		addr = start + i * PGDIR_SIZE;
+		if (!pgd_none(*pgd)) {
+			walk_pud(st, pgd, addr);
+		} else {
+			note_page(st, addr, 1, pgd_val(*pgd), NULL);
+		}
+	}
+}
+
+void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info)
+{
+	struct pg_state st = {
+		.seq = m,
+		.marker = info->markers,
+		.check_wx = false,
+	};
+
+	walk_pgd(&st, info->mm, info->base_addr);
+	note_page(&st, 0, 0, 0, NULL);
+}
+
+static void ptdump_initialize(void)
+{
+	unsigned i, j;
+
+	for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+		if (pg_level[i].bits)
+			for (j = 0; j < pg_level[i].num; j++) {
+				pg_level[i].mask |= pg_level[i].bits[j].mask;
+				if (pg_level[i].bits[j].ro_bit)
+					pg_level[i].ro_bit = &pg_level[i].bits[j];
+				if (pg_level[i].bits[j].nx_bit)
+					pg_level[i].nx_bit = &pg_level[i].bits[j];
+			}
+
+	address_markers[2].start_address = VMALLOC_START;
+}
+
+static struct ptdump_info kernel_ptdump_info = {
+	.mm = &init_mm,
+	.markers = address_markers,
+	.base_addr = 0,
+};
+
+void ptdump_check_wx(void)
+{
+	struct pg_state st = {
+		.seq = NULL,
+		.marker = (struct addr_marker[]) {
+			{ 0, NULL},
+			{ -1, NULL},
+		},
+		.check_wx = true,
+	};
+
+	walk_pgd(&st, &init_mm, 0);
+	note_page(&st, 0, 0, 0, NULL);
+	if (st.wx_pages)
+		pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+			st.wx_pages);
+	else
+		pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
+static int ptdump_init(void)
+{
+	ptdump_initialize();
+	return ptdump_debugfs_register(&kernel_ptdump_info,
+					"kernel_page_tables");
+}
+__initcall(ptdump_init);
diff --git a/arch/arm/mm/extable.c b/arch/arm/mm/extable.c
new file mode 100644
index 0000000..fc33564
--- /dev/null
+++ b/arch/arm/mm/extable.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/arm/mm/extable.c
+ */
+#include <linux/extable.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *fixup;
+
+	fixup = search_exception_tables(instruction_pointer(regs));
+	if (fixup) {
+		regs->ARM_pc = fixup->fixup;
+#ifdef CONFIG_THUMB2_KERNEL
+		/* Clear the IT state to avoid nasty surprises in the fixup */
+		regs->ARM_cpsr &= ~PSR_IT_MASK;
+#endif
+	}
+
+	return fixup != NULL;
+}
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
new file mode 100644
index 0000000..4d75dae
--- /dev/null
+++ b/arch/arm/mm/fault-armv.c
@@ -0,0 +1,269 @@
+/*
+ *  linux/arch/arm/mm/fault-armv.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/gfp.h>
+
+#include <asm/bugs.h>
+#include <asm/cacheflush.h>
+#include <asm/cachetype.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+#include "mm.h"
+
+static pteval_t shared_pte_mask = L_PTE_MT_BUFFERABLE;
+
+#if __LINUX_ARM_ARCH__ < 6
+/*
+ * We take the easy way out of this problem - we make the
+ * PTE uncacheable.  However, we leave the write buffer on.
+ *
+ * Note that the pte lock held when calling update_mmu_cache must also
+ * guard the pte (somewhere else in the same mm) that we modify here.
+ * Therefore those configurations which might call adjust_pte (those
+ * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
+ */
+static int do_adjust_pte(struct vm_area_struct *vma, unsigned long address,
+	unsigned long pfn, pte_t *ptep)
+{
+	pte_t entry = *ptep;
+	int ret;
+
+	/*
+	 * If this page is present, it's actually being shared.
+	 */
+	ret = pte_present(entry);
+
+	/*
+	 * If this page isn't present, or is already setup to
+	 * fault (ie, is old), we can safely ignore any issues.
+	 */
+	if (ret && (pte_val(entry) & L_PTE_MT_MASK) != shared_pte_mask) {
+		flush_cache_page(vma, address, pfn);
+		outer_flush_range((pfn << PAGE_SHIFT),
+				  (pfn << PAGE_SHIFT) + PAGE_SIZE);
+		pte_val(entry) &= ~L_PTE_MT_MASK;
+		pte_val(entry) |= shared_pte_mask;
+		set_pte_at(vma->vm_mm, address, ptep, entry);
+		flush_tlb_page(vma, address);
+	}
+
+	return ret;
+}
+
+#if USE_SPLIT_PTE_PTLOCKS
+/*
+ * If we are using split PTE locks, then we need to take the page
+ * lock here.  Otherwise we are using shared mm->page_table_lock
+ * which is already locked, thus cannot take it.
+ */
+static inline void do_pte_lock(spinlock_t *ptl)
+{
+	/*
+	 * Use nested version here to indicate that we are already
+	 * holding one similar spinlock.
+	 */
+	spin_lock_nested(ptl, SINGLE_DEPTH_NESTING);
+}
+
+static inline void do_pte_unlock(spinlock_t *ptl)
+{
+	spin_unlock(ptl);
+}
+#else /* !USE_SPLIT_PTE_PTLOCKS */
+static inline void do_pte_lock(spinlock_t *ptl) {}
+static inline void do_pte_unlock(spinlock_t *ptl) {}
+#endif /* USE_SPLIT_PTE_PTLOCKS */
+
+static int adjust_pte(struct vm_area_struct *vma, unsigned long address,
+	unsigned long pfn)
+{
+	spinlock_t *ptl;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	int ret;
+
+	pgd = pgd_offset(vma->vm_mm, address);
+	if (pgd_none_or_clear_bad(pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (pud_none_or_clear_bad(pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 0;
+
+	/*
+	 * This is called while another page table is mapped, so we
+	 * must use the nested version.  This also means we need to
+	 * open-code the spin-locking.
+	 */
+	ptl = pte_lockptr(vma->vm_mm, pmd);
+	pte = pte_offset_map(pmd, address);
+	do_pte_lock(ptl);
+
+	ret = do_adjust_pte(vma, address, pfn, pte);
+
+	do_pte_unlock(ptl);
+	pte_unmap(pte);
+
+	return ret;
+}
+
+static void
+make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
+	unsigned long addr, pte_t *ptep, unsigned long pfn)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct vm_area_struct *mpnt;
+	unsigned long offset;
+	pgoff_t pgoff;
+	int aliases = 0;
+
+	pgoff = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+	/*
+	 * If we have any shared mappings that are in the same mm
+	 * space, then we need to handle them specially to maintain
+	 * cache coherency.
+	 */
+	flush_dcache_mmap_lock(mapping);
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
+		/*
+		 * If this VMA is not in our MM, we can ignore it.
+		 * Note that we intentionally mask out the VMA
+		 * that we are fixing up.
+		 */
+		if (mpnt->vm_mm != mm || mpnt == vma)
+			continue;
+		if (!(mpnt->vm_flags & VM_MAYSHARE))
+			continue;
+		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
+		aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn);
+	}
+	flush_dcache_mmap_unlock(mapping);
+	if (aliases)
+		do_adjust_pte(vma, addr, pfn, ptep);
+}
+
+/*
+ * Take care of architecture specific things when placing a new PTE into
+ * a page table, or changing an existing PTE.  Basically, there are two
+ * things that we need to take care of:
+ *
+ *  1. If PG_dcache_clean is not set for the page, we need to ensure
+ *     that any cache entries for the kernels virtual memory
+ *     range are written back to the page.
+ *  2. If we have multiple shared mappings of the same space in
+ *     an object, we need to deal with the cache aliasing issues.
+ *
+ * Note that the pte lock will be held.
+ */
+void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr,
+	pte_t *ptep)
+{
+	unsigned long pfn = pte_pfn(*ptep);
+	struct address_space *mapping;
+	struct page *page;
+
+	if (!pfn_valid(pfn))
+		return;
+
+	/*
+	 * The zero page is never written to, so never has any dirty
+	 * cache lines, and therefore never needs to be flushed.
+	 */
+	page = pfn_to_page(pfn);
+	if (page == ZERO_PAGE(0))
+		return;
+
+	mapping = page_mapping_file(page);
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		__flush_dcache_page(mapping, page);
+	if (mapping) {
+		if (cache_is_vivt())
+			make_coherent(mapping, vma, addr, ptep, pfn);
+		else if (vma->vm_flags & VM_EXEC)
+			__flush_icache_all();
+	}
+}
+#endif	/* __LINUX_ARM_ARCH__ < 6 */
+
+/*
+ * Check whether the write buffer has physical address aliasing
+ * issues.  If it has, we need to avoid them for the case where
+ * we have several shared mappings of the same object in user
+ * space.
+ */
+static int __init check_writebuffer(unsigned long *p1, unsigned long *p2)
+{
+	register unsigned long zero = 0, one = 1, val;
+
+	local_irq_disable();
+	mb();
+	*p1 = one;
+	mb();
+	*p2 = zero;
+	mb();
+	val = *p1;
+	mb();
+	local_irq_enable();
+	return val != zero;
+}
+
+void __init check_writebuffer_bugs(void)
+{
+	struct page *page;
+	const char *reason;
+	unsigned long v = 1;
+
+	pr_info("CPU: Testing write buffer coherency: ");
+
+	page = alloc_page(GFP_KERNEL);
+	if (page) {
+		unsigned long *p1, *p2;
+		pgprot_t prot = __pgprot_modify(PAGE_KERNEL,
+					L_PTE_MT_MASK, L_PTE_MT_BUFFERABLE);
+
+		p1 = vmap(&page, 1, VM_IOREMAP, prot);
+		p2 = vmap(&page, 1, VM_IOREMAP, prot);
+
+		if (p1 && p2) {
+			v = check_writebuffer(p1, p2);
+			reason = "enabling work-around";
+		} else {
+			reason = "unable to map memory\n";
+		}
+
+		vunmap(p1);
+		vunmap(p2);
+		put_page(page);
+	} else {
+		reason = "unable to grab page\n";
+	}
+
+	if (v) {
+		pr_cont("failed, %s\n", reason);
+		shared_pte_mask = L_PTE_MT_UNCACHED;
+	} else {
+		pr_cont("ok\n");
+	}
+}
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
new file mode 100644
index 0000000..3232afb
--- /dev/null
+++ b/arch/arm/mm/fault.c
@@ -0,0 +1,652 @@
+/*
+ *  linux/arch/arm/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Modifications for ARM processor (c) 1995-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/extable.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/hardirq.h>
+#include <linux/init.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/page-flags.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/debug.h>
+#include <linux/highmem.h>
+#include <linux/perf_event.h>
+
+#include <asm/pgtable.h>
+#include <asm/system_misc.h>
+#include <asm/system_info.h>
+#include <asm/tlbflush.h>
+
+#include "fault.h"
+
+#ifdef CONFIG_MMU
+
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
+{
+	int ret = 0;
+
+	if (!user_mode(regs)) {
+		/* kprobe_running() needs smp_processor_id() */
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, fsr))
+			ret = 1;
+		preempt_enable();
+	}
+
+	return ret;
+}
+#else
+static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
+{
+	return 0;
+}
+#endif
+
+/*
+ * This is useful to dump out the page tables associated with
+ * 'addr' in mm 'mm'.
+ */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{
+	pgd_t *pgd;
+
+	if (!mm)
+		mm = &init_mm;
+
+	pr_alert("pgd = %p\n", mm->pgd);
+	pgd = pgd_offset(mm, addr);
+	pr_alert("[%08lx] *pgd=%08llx",
+			addr, (long long)pgd_val(*pgd));
+
+	do {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		if (pgd_none(*pgd))
+			break;
+
+		if (pgd_bad(*pgd)) {
+			pr_cont("(bad)");
+			break;
+		}
+
+		pud = pud_offset(pgd, addr);
+		if (PTRS_PER_PUD != 1)
+			pr_cont(", *pud=%08llx", (long long)pud_val(*pud));
+
+		if (pud_none(*pud))
+			break;
+
+		if (pud_bad(*pud)) {
+			pr_cont("(bad)");
+			break;
+		}
+
+		pmd = pmd_offset(pud, addr);
+		if (PTRS_PER_PMD != 1)
+			pr_cont(", *pmd=%08llx", (long long)pmd_val(*pmd));
+
+		if (pmd_none(*pmd))
+			break;
+
+		if (pmd_bad(*pmd)) {
+			pr_cont("(bad)");
+			break;
+		}
+
+		/* We must not map this if we have highmem enabled */
+		if (PageHighMem(pfn_to_page(pmd_val(*pmd) >> PAGE_SHIFT)))
+			break;
+
+		pte = pte_offset_map(pmd, addr);
+		pr_cont(", *pte=%08llx", (long long)pte_val(*pte));
+#ifndef CONFIG_ARM_LPAE
+		pr_cont(", *ppte=%08llx",
+		       (long long)pte_val(pte[PTE_HWTABLE_PTRS]));
+#endif
+		pte_unmap(pte);
+	} while(0);
+
+	pr_cont("\n");
+}
+#else					/* CONFIG_MMU */
+void show_pte(struct mm_struct *mm, unsigned long addr)
+{ }
+#endif					/* CONFIG_MMU */
+
+/*
+ * Oops.  The kernel tried to access some page that wasn't present.
+ */
+static void
+__do_kernel_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		  struct pt_regs *regs)
+{
+	/*
+	 * Are we prepared to handle this kernel fault?
+	 */
+	if (fixup_exception(regs))
+		return;
+
+	/*
+	 * No handler, we'll have to terminate things with extreme prejudice.
+	 */
+	bust_spinlocks(1);
+	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
+		 (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+		 "paging request", addr);
+
+	show_pte(mm, addr);
+	die("Oops", regs, fsr);
+	bust_spinlocks(0);
+	do_exit(SIGKILL);
+}
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * User mode accesses just cause a SIGSEGV
+ */
+static void
+__do_user_fault(struct task_struct *tsk, unsigned long addr,
+		unsigned int fsr, unsigned int sig, int code,
+		struct pt_regs *regs)
+{
+	struct siginfo si;
+
+	if (addr > TASK_SIZE)
+		harden_branch_predictor();
+
+	clear_siginfo(&si);
+
+#ifdef CONFIG_DEBUG_USER
+	if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) ||
+	    ((user_debug & UDBG_BUS)  && (sig == SIGBUS))) {
+		printk(KERN_DEBUG "%s: unhandled page fault (%d) at 0x%08lx, code 0x%03x\n",
+		       tsk->comm, sig, addr, fsr);
+		show_pte(tsk->mm, addr);
+		show_regs(regs);
+	}
+#endif
+
+	tsk->thread.address = addr;
+	tsk->thread.error_code = fsr;
+	tsk->thread.trap_no = 14;
+	si.si_signo = sig;
+	si.si_errno = 0;
+	si.si_code = code;
+	si.si_addr = (void __user *)addr;
+	force_sig_info(sig, &si, tsk);
+}
+
+void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->active_mm;
+
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (user_mode(regs))
+		__do_user_fault(tsk, addr, fsr, SIGSEGV, SEGV_MAPERR, regs);
+	else
+		__do_kernel_fault(mm, addr, fsr, regs);
+}
+
+#ifdef CONFIG_MMU
+#define VM_FAULT_BADMAP		0x010000
+#define VM_FAULT_BADACCESS	0x020000
+
+/*
+ * Check that the permissions on the VMA allow for the fault which occurred.
+ * If we encountered a write fault, we must have write permission, otherwise
+ * we allow any permission.
+ */
+static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
+{
+	unsigned int mask = VM_READ | VM_WRITE | VM_EXEC;
+
+	if (fsr & FSR_WRITE)
+		mask = VM_WRITE;
+	if (fsr & FSR_LNX_PF)
+		mask = VM_EXEC;
+
+	return vma->vm_flags & mask ? false : true;
+}
+
+static vm_fault_t __kprobes
+__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
+		unsigned int flags, struct task_struct *tsk)
+{
+	struct vm_area_struct *vma;
+	vm_fault_t fault;
+
+	vma = find_vma(mm, addr);
+	fault = VM_FAULT_BADMAP;
+	if (unlikely(!vma))
+		goto out;
+	if (unlikely(vma->vm_start > addr))
+		goto check_stack;
+
+	/*
+	 * Ok, we have a good vm_area for this
+	 * memory access, so we can handle it.
+	 */
+good_area:
+	if (access_error(fsr, vma)) {
+		fault = VM_FAULT_BADACCESS;
+		goto out;
+	}
+
+	return handle_mm_fault(vma, addr & PAGE_MASK, flags);
+
+check_stack:
+	/* Don't allow expansion below FIRST_USER_ADDRESS */
+	if (vma->vm_flags & VM_GROWSDOWN &&
+	    addr >= FIRST_USER_ADDRESS && !expand_stack(vma, addr))
+		goto good_area;
+out:
+	return fault;
+}
+
+static int __kprobes
+do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	int sig, code;
+	vm_fault_t fault;
+	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+
+	if (notify_page_fault(regs, fsr))
+		return 0;
+
+	tsk = current;
+	mm  = tsk->mm;
+
+	/* Enable interrupts if they were enabled in the parent context. */
+	if (interrupts_enabled(regs))
+		local_irq_enable();
+
+	/*
+	 * If we're in an interrupt or have no user
+	 * context, we must not take the fault..
+	 */
+	if (faulthandler_disabled() || !mm)
+		goto no_context;
+
+	if (user_mode(regs))
+		flags |= FAULT_FLAG_USER;
+	if (fsr & FSR_WRITE)
+		flags |= FAULT_FLAG_WRITE;
+
+	/*
+	 * As per x86, we may deadlock here.  However, since the kernel only
+	 * validly references user space from well defined areas of the code,
+	 * we can bug out early if this is from code which shouldn't.
+	 */
+	if (!down_read_trylock(&mm->mmap_sem)) {
+		if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc))
+			goto no_context;
+retry:
+		down_read(&mm->mmap_sem);
+	} else {
+		/*
+		 * The above down_read_trylock() might have succeeded in
+		 * which case, we'll have missed the might_sleep() from
+		 * down_read()
+		 */
+		might_sleep();
+#ifdef CONFIG_DEBUG_VM
+		if (!user_mode(regs) &&
+		    !search_exception_tables(regs->ARM_pc))
+			goto no_context;
+#endif
+	}
+
+	fault = __do_page_fault(mm, addr, fsr, flags, tsk);
+
+	/* If we need to retry but a fatal signal is pending, handle the
+	 * signal first. We do not need to release the mmap_sem because
+	 * it would already be released in __lock_page_or_retry in
+	 * mm/filemap.c. */
+	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+		if (!user_mode(regs))
+			goto no_context;
+		return 0;
+	}
+
+	/*
+	 * Major/minor page fault accounting is only done on the
+	 * initial attempt. If we go through a retry, it is extremely
+	 * likely that the page will be found in page cache at that point.
+	 */
+
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+	if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
+		if (fault & VM_FAULT_MAJOR) {
+			tsk->maj_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+					regs, addr);
+		} else {
+			tsk->min_flt++;
+			perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+					regs, addr);
+		}
+		if (fault & VM_FAULT_RETRY) {
+			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+			* of starvation. */
+			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
+			goto retry;
+		}
+	}
+
+	up_read(&mm->mmap_sem);
+
+	/*
+	 * Handle the "normal" case first - VM_FAULT_MAJOR
+	 */
+	if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
+		return 0;
+
+	/*
+	 * If we are in kernel mode at this point, we
+	 * have no context to handle this fault with.
+	 */
+	if (!user_mode(regs))
+		goto no_context;
+
+	if (fault & VM_FAULT_OOM) {
+		/*
+		 * We ran out of memory, call the OOM killer, and return to
+		 * userspace (which will retry the fault, or kill us if we
+		 * got oom-killed)
+		 */
+		pagefault_out_of_memory();
+		return 0;
+	}
+
+	if (fault & VM_FAULT_SIGBUS) {
+		/*
+		 * We had some memory, but were unable to
+		 * successfully fix up this page fault.
+		 */
+		sig = SIGBUS;
+		code = BUS_ADRERR;
+	} else {
+		/*
+		 * Something tried to access memory that
+		 * isn't in our memory map..
+		 */
+		sig = SIGSEGV;
+		code = fault == VM_FAULT_BADACCESS ?
+			SEGV_ACCERR : SEGV_MAPERR;
+	}
+
+	__do_user_fault(tsk, addr, fsr, sig, code, regs);
+	return 0;
+
+no_context:
+	__do_kernel_fault(mm, addr, fsr, regs);
+	return 0;
+}
+#else					/* CONFIG_MMU */
+static int
+do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	return 0;
+}
+#endif					/* CONFIG_MMU */
+
+/*
+ * First Level Translation Fault Handler
+ *
+ * We enter here because the first level page table doesn't contain
+ * a valid entry for the address.
+ *
+ * If the address is in kernel space (>= TASK_SIZE), then we are
+ * probably faulting in the vmalloc() area.
+ *
+ * If the init_task's first level page tables contains the relevant
+ * entry, we copy the it to this task.  If not, we send the process
+ * a signal, fixup the exception, or oops the kernel.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may be in an
+ * interrupt or a critical region, and should only copy the information
+ * from the master page table, nothing more.
+ */
+#ifdef CONFIG_MMU
+static int __kprobes
+do_translation_fault(unsigned long addr, unsigned int fsr,
+		     struct pt_regs *regs)
+{
+	unsigned int index;
+	pgd_t *pgd, *pgd_k;
+	pud_t *pud, *pud_k;
+	pmd_t *pmd, *pmd_k;
+
+	if (addr < TASK_SIZE)
+		return do_page_fault(addr, fsr, regs);
+
+	if (user_mode(regs))
+		goto bad_area;
+
+	index = pgd_index(addr);
+
+	pgd = cpu_get_pgd() + index;
+	pgd_k = init_mm.pgd + index;
+
+	if (pgd_none(*pgd_k))
+		goto bad_area;
+	if (!pgd_present(*pgd))
+		set_pgd(pgd, *pgd_k);
+
+	pud = pud_offset(pgd, addr);
+	pud_k = pud_offset(pgd_k, addr);
+
+	if (pud_none(*pud_k))
+		goto bad_area;
+	if (!pud_present(*pud))
+		set_pud(pud, *pud_k);
+
+	pmd = pmd_offset(pud, addr);
+	pmd_k = pmd_offset(pud_k, addr);
+
+#ifdef CONFIG_ARM_LPAE
+	/*
+	 * Only one hardware entry per PMD with LPAE.
+	 */
+	index = 0;
+#else
+	/*
+	 * On ARM one Linux PGD entry contains two hardware entries (see page
+	 * tables layout in pgtable.h). We normally guarantee that we always
+	 * fill both L1 entries. But create_mapping() doesn't follow the rule.
+	 * It can create inidividual L1 entries, so here we have to call
+	 * pmd_none() check for the entry really corresponded to address, not
+	 * for the first of pair.
+	 */
+	index = (addr >> SECTION_SHIFT) & 1;
+#endif
+	if (pmd_none(pmd_k[index]))
+		goto bad_area;
+
+	copy_pmd(pmd, pmd_k);
+	return 0;
+
+bad_area:
+	do_bad_area(addr, fsr, regs);
+	return 0;
+}
+#else					/* CONFIG_MMU */
+static int
+do_translation_fault(unsigned long addr, unsigned int fsr,
+		     struct pt_regs *regs)
+{
+	return 0;
+}
+#endif					/* CONFIG_MMU */
+
+/*
+ * Some section permission faults need to be handled gracefully.
+ * They can happen due to a __{get,put}_user during an oops.
+ */
+#ifndef CONFIG_ARM_LPAE
+static int
+do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	do_bad_area(addr, fsr, regs);
+	return 0;
+}
+#endif /* CONFIG_ARM_LPAE */
+
+/*
+ * This abort handler always returns "fault".
+ */
+static int
+do_bad(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	return 1;
+}
+
+struct fsr_info {
+	int	(*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+	int	sig;
+	int	code;
+	const char *name;
+};
+
+/* FSR definition */
+#ifdef CONFIG_ARM_LPAE
+#include "fsr-3level.c"
+#else
+#include "fsr-2level.c"
+#endif
+
+void __init
+hook_fault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+		int sig, int code, const char *name)
+{
+	if (nr < 0 || nr >= ARRAY_SIZE(fsr_info))
+		BUG();
+
+	fsr_info[nr].fn   = fn;
+	fsr_info[nr].sig  = sig;
+	fsr_info[nr].code = code;
+	fsr_info[nr].name = name;
+}
+
+/*
+ * Dispatch a data abort to the relevant handler.
+ */
+asmlinkage void
+do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
+{
+	const struct fsr_info *inf = fsr_info + fsr_fs(fsr);
+	struct siginfo info;
+
+	if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs))
+		return;
+
+	pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
+		inf->name, fsr, addr);
+	show_pte(current->mm, addr);
+
+	clear_siginfo(&info);
+	info.si_signo = inf->sig;
+	info.si_errno = 0;
+	info.si_code  = inf->code;
+	info.si_addr  = (void __user *)addr;
+	arm_notify_die("", regs, &info, fsr, 0);
+}
+
+void __init
+hook_ifault_code(int nr, int (*fn)(unsigned long, unsigned int, struct pt_regs *),
+		 int sig, int code, const char *name)
+{
+	if (nr < 0 || nr >= ARRAY_SIZE(ifsr_info))
+		BUG();
+
+	ifsr_info[nr].fn   = fn;
+	ifsr_info[nr].sig  = sig;
+	ifsr_info[nr].code = code;
+	ifsr_info[nr].name = name;
+}
+
+asmlinkage void
+do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs)
+{
+	const struct fsr_info *inf = ifsr_info + fsr_fs(ifsr);
+	struct siginfo info;
+
+	if (!inf->fn(addr, ifsr | FSR_LNX_PF, regs))
+		return;
+
+	pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n",
+		inf->name, ifsr, addr);
+
+	clear_siginfo(&info);
+	info.si_signo = inf->sig;
+	info.si_errno = 0;
+	info.si_code  = inf->code;
+	info.si_addr  = (void __user *)addr;
+	arm_notify_die("", regs, &info, ifsr, 0);
+}
+
+/*
+ * Abort handler to be used only during first unmasking of asynchronous aborts
+ * on the boot CPU. This makes sure that the machine will not die if the
+ * firmware/bootloader left an imprecise abort pending for us to trip over.
+ */
+static int __init early_abort_handler(unsigned long addr, unsigned int fsr,
+				      struct pt_regs *regs)
+{
+	pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during "
+		"first unmask, this is most likely caused by a "
+		"firmware/bootloader bug.\n", fsr);
+
+	return 0;
+}
+
+void __init early_abt_enable(void)
+{
+	fsr_info[FSR_FS_AEA].fn = early_abort_handler;
+	local_abt_enable();
+	fsr_info[FSR_FS_AEA].fn = do_bad;
+}
+
+#ifndef CONFIG_ARM_LPAE
+static int __init exceptions_init(void)
+{
+	if (cpu_architecture() >= CPU_ARCH_ARMv6) {
+		hook_fault_code(4, do_translation_fault, SIGSEGV, SEGV_MAPERR,
+				"I-cache maintenance fault");
+	}
+
+	if (cpu_architecture() >= CPU_ARCH_ARMv7) {
+		/*
+		 * TODO: Access flag faults introduced in ARMv6K.
+		 * Runtime check for 'K' extension is needed
+		 */
+		hook_fault_code(3, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+		hook_fault_code(6, do_bad, SIGSEGV, SEGV_MAPERR,
+				"section access flag fault");
+	}
+
+	return 0;
+}
+
+arch_initcall(exceptions_init);
+#endif
diff --git a/arch/arm/mm/fault.h b/arch/arm/mm/fault.h
new file mode 100644
index 0000000..c063708
--- /dev/null
+++ b/arch/arm/mm/fault.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_ARM_FAULT_H
+#define __ARCH_ARM_FAULT_H
+
+/*
+ * Fault status register encodings.  We steal bit 31 for our own purposes.
+ */
+#define FSR_LNX_PF		(1 << 31)
+#define FSR_WRITE		(1 << 11)
+#define FSR_FS4			(1 << 10)
+#define FSR_FS3_0		(15)
+#define FSR_FS5_0		(0x3f)
+
+#ifdef CONFIG_ARM_LPAE
+#define FSR_FS_AEA		17
+
+static inline int fsr_fs(unsigned int fsr)
+{
+	return fsr & FSR_FS5_0;
+}
+#else
+#define FSR_FS_AEA		22
+
+static inline int fsr_fs(unsigned int fsr)
+{
+	return (fsr & FSR_FS3_0) | (fsr & FSR_FS4) >> 6;
+}
+#endif
+
+void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs);
+void early_abt_enable(void);
+
+#endif	/* __ARCH_ARM_FAULT_H */
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
new file mode 100644
index 0000000..5846962
--- /dev/null
+++ b/arch/arm/mm/flush.c
@@ -0,0 +1,423 @@
+/*
+ *  linux/arch/arm/mm/flush.c
+ *
+ *  Copyright (C) 1995-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachetype.h>
+#include <asm/highmem.h>
+#include <asm/smp_plat.h>
+#include <asm/tlbflush.h>
+#include <linux/hugetlb.h>
+
+#include "mm.h"
+
+#ifdef CONFIG_ARM_HEAVY_MB
+void (*soc_mb)(void);
+
+void arm_heavy_mb(void)
+{
+#ifdef CONFIG_OUTER_CACHE_SYNC
+	if (outer_cache.sync)
+		outer_cache.sync();
+#endif
+	if (soc_mb)
+		soc_mb();
+}
+EXPORT_SYMBOL(arm_heavy_mb);
+#endif
+
+#ifdef CONFIG_CPU_CACHE_VIPT
+
+static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr)
+{
+	unsigned long to = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
+	const int zero = 0;
+
+	set_top_pte(to, pfn_pte(pfn, PAGE_KERNEL));
+
+	asm(	"mcrr	p15, 0, %1, %0, c14\n"
+	"	mcr	p15, 0, %2, c7, c10, 4"
+	    :
+	    : "r" (to), "r" (to + PAGE_SIZE - 1), "r" (zero)
+	    : "cc");
+}
+
+static void flush_icache_alias(unsigned long pfn, unsigned long vaddr, unsigned long len)
+{
+	unsigned long va = FLUSH_ALIAS_START + (CACHE_COLOUR(vaddr) << PAGE_SHIFT);
+	unsigned long offset = vaddr & (PAGE_SIZE - 1);
+	unsigned long to;
+
+	set_top_pte(va, pfn_pte(pfn, PAGE_KERNEL));
+	to = va + offset;
+	flush_icache_range(to, to + len);
+}
+
+void flush_cache_mm(struct mm_struct *mm)
+{
+	if (cache_is_vivt()) {
+		vivt_flush_cache_mm(mm);
+		return;
+	}
+
+	if (cache_is_vipt_aliasing()) {
+		asm(	"mcr	p15, 0, %0, c7, c14, 0\n"
+		"	mcr	p15, 0, %0, c7, c10, 4"
+		    :
+		    : "r" (0)
+		    : "cc");
+	}
+}
+
+void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+	if (cache_is_vivt()) {
+		vivt_flush_cache_range(vma, start, end);
+		return;
+	}
+
+	if (cache_is_vipt_aliasing()) {
+		asm(	"mcr	p15, 0, %0, c7, c14, 0\n"
+		"	mcr	p15, 0, %0, c7, c10, 4"
+		    :
+		    : "r" (0)
+		    : "cc");
+	}
+
+	if (vma->vm_flags & VM_EXEC)
+		__flush_icache_all();
+}
+
+void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
+{
+	if (cache_is_vivt()) {
+		vivt_flush_cache_page(vma, user_addr, pfn);
+		return;
+	}
+
+	if (cache_is_vipt_aliasing()) {
+		flush_pfn_alias(pfn, user_addr);
+		__flush_icache_all();
+	}
+
+	if (vma->vm_flags & VM_EXEC && icache_is_vivt_asid_tagged())
+		__flush_icache_all();
+}
+
+#else
+#define flush_pfn_alias(pfn,vaddr)		do { } while (0)
+#define flush_icache_alias(pfn,vaddr,len)	do { } while (0)
+#endif
+
+#define FLAG_PA_IS_EXEC 1
+#define FLAG_PA_CORE_IN_MM 2
+
+static void flush_ptrace_access_other(void *args)
+{
+	__flush_icache_all();
+}
+
+static inline
+void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr,
+			   unsigned long len, unsigned int flags)
+{
+	if (cache_is_vivt()) {
+		if (flags & FLAG_PA_CORE_IN_MM) {
+			unsigned long addr = (unsigned long)kaddr;
+			__cpuc_coherent_kern_range(addr, addr + len);
+		}
+		return;
+	}
+
+	if (cache_is_vipt_aliasing()) {
+		flush_pfn_alias(page_to_pfn(page), uaddr);
+		__flush_icache_all();
+		return;
+	}
+
+	/* VIPT non-aliasing D-cache */
+	if (flags & FLAG_PA_IS_EXEC) {
+		unsigned long addr = (unsigned long)kaddr;
+		if (icache_is_vipt_aliasing())
+			flush_icache_alias(page_to_pfn(page), uaddr, len);
+		else
+			__cpuc_coherent_kern_range(addr, addr + len);
+		if (cache_ops_need_broadcast())
+			smp_call_function(flush_ptrace_access_other,
+					  NULL, 1);
+	}
+}
+
+static
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+			 unsigned long uaddr, void *kaddr, unsigned long len)
+{
+	unsigned int flags = 0;
+	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+		flags |= FLAG_PA_CORE_IN_MM;
+	if (vma->vm_flags & VM_EXEC)
+		flags |= FLAG_PA_IS_EXEC;
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+			     void *kaddr, unsigned long len)
+{
+	unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC;
+
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
+/*
+ * Copy user data from/to a page which is mapped into a different
+ * processes address space.  Really, we want to allow our "user
+ * space" model to handle this.
+ *
+ * Note that this code needs to run on the current CPU.
+ */
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long uaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+#ifdef CONFIG_SMP
+	preempt_disable();
+#endif
+	memcpy(dst, src, len);
+	flush_ptrace_access(vma, page, uaddr, dst, len);
+#ifdef CONFIG_SMP
+	preempt_enable();
+#endif
+}
+
+void __flush_dcache_page(struct address_space *mapping, struct page *page)
+{
+	/*
+	 * Writeback any data associated with the kernel mapping of this
+	 * page.  This ensures that data in the physical page is mutually
+	 * coherent with the kernels mapping.
+	 */
+	if (!PageHighMem(page)) {
+		size_t page_size = PAGE_SIZE << compound_order(page);
+		__cpuc_flush_dcache_area(page_address(page), page_size);
+	} else {
+		unsigned long i;
+		if (cache_is_vipt_nonaliasing()) {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_atomic(page + i);
+				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+				kunmap_atomic(addr);
+			}
+		} else {
+			for (i = 0; i < (1 << compound_order(page)); i++) {
+				void *addr = kmap_high_get(page + i);
+				if (addr) {
+					__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+					kunmap_high(page + i);
+				}
+			}
+		}
+	}
+
+	/*
+	 * If this is a page cache page, and we have an aliasing VIPT cache,
+	 * we only need to do one flush - which would be at the relevant
+	 * userspace colour, which is congruent with page->index.
+	 */
+	if (mapping && cache_is_vipt_aliasing())
+		flush_pfn_alias(page_to_pfn(page),
+				page->index << PAGE_SHIFT);
+}
+
+static void __flush_dcache_aliases(struct address_space *mapping, struct page *page)
+{
+	struct mm_struct *mm = current->active_mm;
+	struct vm_area_struct *mpnt;
+	pgoff_t pgoff;
+
+	/*
+	 * There are possible user space mappings of this page:
+	 * - VIVT cache: we need to also write back and invalidate all user
+	 *   data in the current VM view associated with this page.
+	 * - aliasing VIPT: we only need to find one mapping of this page.
+	 */
+	pgoff = page->index;
+
+	flush_dcache_mmap_lock(mapping);
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
+		unsigned long offset;
+
+		/*
+		 * If this VMA is not in our MM, we can ignore it.
+		 */
+		if (mpnt->vm_mm != mm)
+			continue;
+		if (!(mpnt->vm_flags & VM_MAYSHARE))
+			continue;
+		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
+		flush_cache_page(mpnt, mpnt->vm_start + offset, page_to_pfn(page));
+	}
+	flush_dcache_mmap_unlock(mapping);
+}
+
+#if __LINUX_ARM_ARCH__ >= 6
+void __sync_icache_dcache(pte_t pteval)
+{
+	unsigned long pfn;
+	struct page *page;
+	struct address_space *mapping;
+
+	if (cache_is_vipt_nonaliasing() && !pte_exec(pteval))
+		/* only flush non-aliasing VIPT caches for exec mappings */
+		return;
+	pfn = pte_pfn(pteval);
+	if (!pfn_valid(pfn))
+		return;
+
+	page = pfn_to_page(pfn);
+	if (cache_is_vipt_aliasing())
+		mapping = page_mapping_file(page);
+	else
+		mapping = NULL;
+
+	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+		__flush_dcache_page(mapping, page);
+
+	if (pte_exec(pteval))
+		__flush_icache_all();
+}
+#endif
+
+/*
+ * Ensure cache coherency between kernel mapping and userspace mapping
+ * of this page.
+ *
+ * We have three cases to consider:
+ *  - VIPT non-aliasing cache: fully coherent so nothing required.
+ *  - VIVT: fully aliasing, so we need to handle every alias in our
+ *          current VM view.
+ *  - VIPT aliasing: need to handle one alias in our current VM view.
+ *
+ * If we need to handle aliasing:
+ *  If the page only exists in the page cache and there are no user
+ *  space mappings, we can be lazy and remember that we may have dirty
+ *  kernel cache lines for later.  Otherwise, we assume we have
+ *  aliasing mappings.
+ *
+ * Note that we disable the lazy flush for SMP configurations where
+ * the cache maintenance operations are not automatically broadcasted.
+ */
+void flush_dcache_page(struct page *page)
+{
+	struct address_space *mapping;
+
+	/*
+	 * The zero page is never written to, so never has any dirty
+	 * cache lines, and therefore never needs to be flushed.
+	 */
+	if (page == ZERO_PAGE(0))
+		return;
+
+	if (!cache_ops_need_broadcast() && cache_is_vipt_nonaliasing()) {
+		if (test_bit(PG_dcache_clean, &page->flags))
+			clear_bit(PG_dcache_clean, &page->flags);
+		return;
+	}
+
+	mapping = page_mapping_file(page);
+
+	if (!cache_ops_need_broadcast() &&
+	    mapping && !page_mapcount(page))
+		clear_bit(PG_dcache_clean, &page->flags);
+	else {
+		__flush_dcache_page(mapping, page);
+		if (mapping && cache_is_vivt())
+			__flush_dcache_aliases(mapping, page);
+		else if (mapping)
+			__flush_icache_all();
+		set_bit(PG_dcache_clean, &page->flags);
+	}
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+/*
+ * Ensure cache coherency for the kernel mapping of this page. We can
+ * assume that the page is pinned via kmap.
+ *
+ * If the page only exists in the page cache and there are no user
+ * space mappings, this is a no-op since the page was already marked
+ * dirty at creation.  Otherwise, we need to flush the dirty kernel
+ * cache lines directly.
+ */
+void flush_kernel_dcache_page(struct page *page)
+{
+	if (cache_is_vivt() || cache_is_vipt_aliasing()) {
+		struct address_space *mapping;
+
+		mapping = page_mapping_file(page);
+
+		if (!mapping || mapping_mapped(mapping)) {
+			void *addr;
+
+			addr = page_address(page);
+			/*
+			 * kmap_atomic() doesn't set the page virtual
+			 * address for highmem pages, and
+			 * kunmap_atomic() takes care of cache
+			 * flushing already.
+			 */
+			if (!IS_ENABLED(CONFIG_HIGHMEM) || addr)
+				__cpuc_flush_dcache_area(addr, PAGE_SIZE);
+		}
+	}
+}
+EXPORT_SYMBOL(flush_kernel_dcache_page);
+
+/*
+ * Flush an anonymous page so that users of get_user_pages()
+ * can safely access the data.  The expected sequence is:
+ *
+ *  get_user_pages()
+ *    -> flush_anon_page
+ *  memcpy() to/from page
+ *  if written to page, flush_dcache_page()
+ */
+void __flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
+{
+	unsigned long pfn;
+
+	/* VIPT non-aliasing caches need do nothing */
+	if (cache_is_vipt_nonaliasing())
+		return;
+
+	/*
+	 * Write back and invalidate userspace mapping.
+	 */
+	pfn = page_to_pfn(page);
+	if (cache_is_vivt()) {
+		flush_cache_page(vma, vmaddr, pfn);
+	} else {
+		/*
+		 * For aliasing VIPT, we can flush an alias of the
+		 * userspace address only.
+		 */
+		flush_pfn_alias(pfn, vmaddr);
+		__flush_icache_all();
+	}
+
+	/*
+	 * Invalidate kernel mapping.  No data should be contained
+	 * in this mapping of the page.  FIXME: this is overkill
+	 * since we actually ask for a write-back and invalidate.
+	 */
+	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+}
diff --git a/arch/arm/mm/fsr-2level.c b/arch/arm/mm/fsr-2level.c
new file mode 100644
index 0000000..f2be951
--- /dev/null
+++ b/arch/arm/mm/fsr-2level.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+static struct fsr_info fsr_info[] = {
+	/*
+	 * The following are the standard ARMv3 and ARMv4 aborts.  ARMv5
+	 * defines these to be "precise" aborts.
+	 */
+	{ do_bad,		SIGSEGV, 0,		"vector exception"		   },
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
+	{ do_bad,		SIGKILL, 0,		"terminal exception"		   },
+	{ do_bad,		SIGBUS,	 BUS_ADRALN,	"alignment exception"		   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"section translation fault"	   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on linefetch"	   },
+	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"page translation fault"	   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on non-linefetch"  },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"section domain fault"		   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on non-linefetch"  },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"page domain fault"		   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on translation"	   },
+	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"section permission fault"	   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on translation"	   },
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"page permission fault"		   },
+	/*
+	 * The following are "imprecise" aborts, which are signalled by bit
+	 * 10 of the FSR, and may not be recoverable.  These are only
+	 * supported if the CPU abort handler supports bit 10.
+	 */
+	{ do_bad,		SIGBUS,  0,		"unknown 16"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 17"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 18"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 19"			   },
+	{ do_bad,		SIGBUS,  0,		"lock abort"			   }, /* xscale */
+	{ do_bad,		SIGBUS,  0,		"unknown 21"			   },
+	{ do_bad,		SIGBUS,  BUS_OBJERR,	"imprecise external abort"	   }, /* xscale */
+	{ do_bad,		SIGBUS,  0,		"unknown 23"			   },
+	{ do_bad,		SIGBUS,  0,		"dcache parity error"		   }, /* xscale */
+	{ do_bad,		SIGBUS,  0,		"unknown 25"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 26"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 27"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 28"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 29"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 30"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 31"			   },
+};
+
+static struct fsr_info ifsr_info[] = {
+	{ do_bad,		SIGBUS,  0,		"unknown 0"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 1"			   },
+	{ do_bad,		SIGBUS,  0,		"debug event"			   },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"section access flag fault"	   },
+	{ do_bad,		SIGBUS,  0,		"unknown 4"			   },
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"section translation fault"	   },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"page access flag fault"	   },
+	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"page translation fault"	   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on non-linefetch"  },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"section domain fault"		   },
+	{ do_bad,		SIGBUS,  0,		"unknown 10"			   },
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"page domain fault"		   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on translation"	   },
+	{ do_sect_fault,	SIGSEGV, SEGV_ACCERR,	"section permission fault"	   },
+	{ do_bad,		SIGBUS,	 0,		"external abort on translation"	   },
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"page permission fault"		   },
+	{ do_bad,		SIGBUS,  0,		"unknown 16"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 17"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 18"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 19"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 20"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 21"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 22"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 23"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 24"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 25"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 26"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 27"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 28"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 29"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 30"			   },
+	{ do_bad,		SIGBUS,  0,		"unknown 31"			   },
+};
diff --git a/arch/arm/mm/fsr-3level.c b/arch/arm/mm/fsr-3level.c
new file mode 100644
index 0000000..d0ae296
--- /dev/null
+++ b/arch/arm/mm/fsr-3level.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+static struct fsr_info fsr_info[] = {
+	{ do_bad,		SIGBUS,  0,		"unknown 0"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 1"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 2"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 3"			},
+	{ do_bad,		SIGBUS,  0,		"reserved translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 1 translation fault"	},
+	{ do_translation_fault,	SIGSEGV, SEGV_MAPERR,	"level 2 translation fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_MAPERR,	"level 3 translation fault"	},
+	{ do_bad,		SIGBUS,  0,		"reserved access flag fault"	},
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 access flag fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 access flag fault"	},
+	{ do_bad,		SIGBUS,  0,		"reserved permission fault"	},
+	{ do_bad,		SIGSEGV, SEGV_ACCERR,	"level 1 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 2 permission fault"	},
+	{ do_page_fault,	SIGSEGV, SEGV_ACCERR,	"level 3 permission fault"	},
+	{ do_bad,		SIGBUS,  0,		"synchronous external abort"	},
+	{ do_bad,		SIGBUS,  0,		"asynchronous external abort"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 18"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 19"			},
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous abort (translation table walk)" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error"	},
+	{ do_bad,		SIGBUS,  0,		"asynchronous parity error"	},
+	{ do_bad,		SIGBUS,  0,		"unknown 26"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 27"			},
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"synchronous parity error (translation table walk" },
+	{ do_bad,		SIGBUS,  0,		"unknown 32"			},
+	{ do_bad,		SIGBUS,  BUS_ADRALN,	"alignment fault"		},
+	{ do_bad,		SIGBUS,  0,		"debug event"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 35"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 36"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 37"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 38"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 39"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 40"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 41"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 42"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 43"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 44"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 45"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 46"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 47"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 48"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 49"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 50"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 51"			},
+	{ do_bad,		SIGBUS,  0,		"implementation fault (lockdown abort)" },
+	{ do_bad,		SIGBUS,  0,		"unknown 53"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 54"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 55"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 56"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 57"			},
+	{ do_bad,		SIGBUS,  0,		"implementation fault (coprocessor abort)" },
+	{ do_bad,		SIGBUS,  0,		"unknown 59"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 60"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 61"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 62"			},
+	{ do_bad,		SIGBUS,  0,		"unknown 63"			},
+};
+
+#define ifsr_info	fsr_info
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
new file mode 100644
index 0000000..d02f818
--- /dev/null
+++ b/arch/arm/mm/highmem.c
@@ -0,0 +1,149 @@
+/*
+ * arch/arm/mm/highmem.c -- ARM highmem support
+ *
+ * Author:	Nicolas Pitre
+ * Created:	september 8, 2008
+ * Copyright:	Marvell Semiconductors Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include "mm.h"
+
+static inline void set_fixmap_pte(int idx, pte_t pte)
+{
+	unsigned long vaddr = __fix_to_virt(idx);
+	pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+	set_pte_ext(ptep, pte, 0);
+	local_flush_tlb_kernel_page(vaddr);
+}
+
+static inline pte_t get_fixmap_pte(unsigned long vaddr)
+{
+	pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+
+	return *ptep;
+}
+
+void *kmap(struct page *page)
+{
+	might_sleep();
+	if (!PageHighMem(page))
+		return page_address(page);
+	return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+	BUG_ON(in_interrupt());
+	if (!PageHighMem(page))
+		return;
+	kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+void *kmap_atomic(struct page *page)
+{
+	unsigned int idx;
+	unsigned long vaddr;
+	void *kmap;
+	int type;
+
+	preempt_disable();
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+	/*
+	 * There is no cache coherency issue when non VIVT, so force the
+	 * dedicated kmap usage for better debugging purposes in that case.
+	 */
+	if (!cache_is_vivt())
+		kmap = NULL;
+	else
+#endif
+		kmap = kmap_high_get(page);
+	if (kmap)
+		return kmap;
+
+	type = kmap_atomic_idx_push();
+
+	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	/*
+	 * With debugging enabled, kunmap_atomic forces that entry to 0.
+	 * Make sure it was indeed properly unmapped.
+	 */
+	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
+#endif
+	/*
+	 * When debugging is off, kunmap_atomic leaves the previous mapping
+	 * in place, so the contained TLB flush ensures the TLB is updated
+	 * with the new mapping.
+	 */
+	set_fixmap_pte(idx, mk_pte(page, kmap_prot));
+
+	return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+	int idx, type;
+
+	if (kvaddr >= (void *)FIXADDR_START) {
+		type = kmap_atomic_idx();
+		idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+
+		if (cache_is_vivt())
+			__cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE);
+#ifdef CONFIG_DEBUG_HIGHMEM
+		BUG_ON(vaddr != __fix_to_virt(idx));
+		set_fixmap_pte(idx, __pte(0));
+#else
+		(void) idx;  /* to kill a warning */
+#endif
+		kmap_atomic_idx_pop();
+	} else if (vaddr >= PKMAP_ADDR(0) && vaddr < PKMAP_ADDR(LAST_PKMAP)) {
+		/* this address was obtained through kmap_high_get() */
+		kunmap_high(pte_page(pkmap_page_table[PKMAP_NR(vaddr)]));
+	}
+	pagefault_enable();
+	preempt_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+void *kmap_atomic_pfn(unsigned long pfn)
+{
+	unsigned long vaddr;
+	int idx, type;
+	struct page *page = pfn_to_page(pfn);
+
+	preempt_disable();
+	pagefault_disable();
+	if (!PageHighMem(page))
+		return page_address(page);
+
+	type = kmap_atomic_idx_push();
+	idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id();
+	vaddr = __fix_to_virt(idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+	BUG_ON(!pte_none(get_fixmap_pte(vaddr)));
+#endif
+	set_fixmap_pte(idx, pfn_pte(pfn, kmap_prot));
+
+	return (void *)vaddr;
+}
diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
new file mode 100644
index 0000000..fcafb52
--- /dev/null
+++ b/arch/arm/mm/hugetlbpage.c
@@ -0,0 +1,47 @@
+/*
+ * arch/arm/mm/hugetlbpage.c
+ *
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * Based on arch/x86/include/asm/hugetlb.h and Bill Carson's patches
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/sysctl.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+
+/*
+ * On ARM, huge pages are backed by pmd's rather than pte's, so we do a lot
+ * of type casting from pmd_t * to pte_t *.
+ */
+
+int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+int pmd_huge(pmd_t pmd)
+{
+	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
+}
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
new file mode 100644
index 0000000..1d1edd0
--- /dev/null
+++ b/arch/arm/mm/idmap.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm_types.h>
+
+#include <asm/cputype.h>
+#include <asm/idmap.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
+#include <asm/system_info.h>
+
+/*
+ * Note: accesses outside of the kernel image and the identity map area
+ * are not supported on any CPU using the idmap tables as its current
+ * page tables.
+ */
+pgd_t *idmap_pgd __ro_after_init;
+long long arch_phys_to_idmap_offset __ro_after_init;
+
+#ifdef CONFIG_ARM_LPAE
+static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+	unsigned long prot)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) {
+		pmd = pmd_alloc_one(&init_mm, addr);
+		if (!pmd) {
+			pr_warn("Failed to allocate identity pmd.\n");
+			return;
+		}
+		/*
+		 * Copy the original PMD to ensure that the PMD entries for
+		 * the kernel image are preserved.
+		 */
+		if (!pud_none(*pud))
+			memcpy(pmd, pmd_offset(pud, 0),
+			       PTRS_PER_PMD * sizeof(pmd_t));
+		pud_populate(&init_mm, pud, pmd);
+		pmd += pmd_index(addr);
+	} else
+		pmd = pmd_offset(pud, addr);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		*pmd = __pmd((addr & PMD_MASK) | prot);
+		flush_pmd_entry(pmd);
+	} while (pmd++, addr = next, addr != end);
+}
+#else	/* !CONFIG_ARM_LPAE */
+static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+	unsigned long prot)
+{
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	addr = (addr & PMD_MASK) | prot;
+	pmd[0] = __pmd(addr);
+	addr += SECTION_SIZE;
+	pmd[1] = __pmd(addr);
+	flush_pmd_entry(pmd);
+}
+#endif	/* CONFIG_ARM_LPAE */
+
+static void idmap_add_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
+	unsigned long prot)
+{
+	pud_t *pud = pud_offset(pgd, addr);
+	unsigned long next;
+
+	do {
+		next = pud_addr_end(addr, end);
+		idmap_add_pmd(pud, addr, next, prot);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void identity_mapping_add(pgd_t *pgd, const char *text_start,
+				 const char *text_end, unsigned long prot)
+{
+	unsigned long addr, end;
+	unsigned long next;
+
+	addr = virt_to_idmap(text_start);
+	end = virt_to_idmap(text_end);
+	pr_info("Setting up static identity map for 0x%lx - 0x%lx\n", addr, end);
+
+	prot |= PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AF;
+
+	if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale_family())
+		prot |= PMD_BIT4;
+
+	pgd += pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		idmap_add_pud(pgd, addr, next, prot);
+	} while (pgd++, addr = next, addr != end);
+}
+
+extern char  __idmap_text_start[], __idmap_text_end[];
+
+static int __init init_static_idmap(void)
+{
+	idmap_pgd = pgd_alloc(&init_mm);
+	if (!idmap_pgd)
+		return -ENOMEM;
+
+	identity_mapping_add(idmap_pgd, __idmap_text_start,
+			     __idmap_text_end, 0);
+
+	/* Flush L1 for the hardware to see this page table content */
+	flush_cache_louis();
+
+	return 0;
+}
+early_initcall(init_static_idmap);
+
+/*
+ * In order to soft-boot, we need to switch to a 1:1 mapping for the
+ * cpu_reset functions. This will then ensure that we have predictable
+ * results when turning off the mmu.
+ */
+void setup_mm_for_reboot(void)
+{
+	/* Switch to the identity mapping. */
+	cpu_switch_mm(idmap_pgd, &init_mm);
+	local_flush_bp_all();
+
+#ifdef CONFIG_CPU_HAS_ASID
+	/*
+	 * We don't have a clean ASID for the identity mapping, which
+	 * may clash with virtual addresses of the previous page tables
+	 * and therefore potentially in the TLB.
+	 */
+	local_flush_tlb_all();
+#endif
+}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
new file mode 100644
index 0000000..0cc8e04
--- /dev/null
+++ b/arch/arm/mm/init.c
@@ -0,0 +1,803 @@
+/*
+ *  linux/arch/arm/mm/init.c
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/mman.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/export.h>
+#include <linux/nodemask.h>
+#include <linux/initrd.h>
+#include <linux/of_fdt.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/memblock.h>
+#include <linux/dma-contiguous.h>
+#include <linux/sizes.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cp15.h>
+#include <asm/mach-types.h>
+#include <asm/memblock.h>
+#include <asm/memory.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/system_info.h>
+#include <asm/tlb.h>
+#include <asm/fixmap.h>
+#include <asm/ptdump.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+
+#include "mm.h"
+
+#ifdef CONFIG_CPU_CP15_MMU
+unsigned long __init __clear_cr(unsigned long mask)
+{
+	cr_alignment = cr_alignment & ~mask;
+	return cr_alignment;
+}
+#endif
+
+static phys_addr_t phys_initrd_start __initdata = 0;
+static unsigned long phys_initrd_size __initdata = 0;
+
+static int __init early_initrd(char *p)
+{
+	phys_addr_t start;
+	unsigned long size;
+	char *endp;
+
+	start = memparse(p, &endp);
+	if (*endp == ',') {
+		size = memparse(endp + 1, NULL);
+
+		phys_initrd_start = start;
+		phys_initrd_size = size;
+	}
+	return 0;
+}
+early_param("initrd", early_initrd);
+
+static int __init parse_tag_initrd(const struct tag *tag)
+{
+	pr_warn("ATAG_INITRD is deprecated; "
+		"please update your bootloader.\n");
+	phys_initrd_start = __virt_to_phys(tag->u.initrd.start);
+	phys_initrd_size = tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD, parse_tag_initrd);
+
+static int __init parse_tag_initrd2(const struct tag *tag)
+{
+	phys_initrd_start = tag->u.initrd.start;
+	phys_initrd_size = tag->u.initrd.size;
+	return 0;
+}
+
+__tagtable(ATAG_INITRD2, parse_tag_initrd2);
+
+static void __init find_limits(unsigned long *min, unsigned long *max_low,
+			       unsigned long *max_high)
+{
+	*max_low = PFN_DOWN(memblock_get_current_limit());
+	*min = PFN_UP(memblock_start_of_DRAM());
+	*max_high = PFN_DOWN(memblock_end_of_DRAM());
+}
+
+#ifdef CONFIG_ZONE_DMA
+
+phys_addr_t arm_dma_zone_size __read_mostly;
+EXPORT_SYMBOL(arm_dma_zone_size);
+
+/*
+ * The DMA mask corresponding to the maximum bus address allocatable
+ * using GFP_DMA.  The default here places no restriction on DMA
+ * allocations.  This must be the smallest DMA mask in the system,
+ * so a successful GFP_DMA allocation will always satisfy this.
+ */
+phys_addr_t arm_dma_limit;
+unsigned long arm_dma_pfn_limit;
+
+static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole,
+	unsigned long dma_size)
+{
+	if (size[0] <= dma_size)
+		return;
+
+	size[ZONE_NORMAL] = size[0] - dma_size;
+	size[ZONE_DMA] = dma_size;
+	hole[ZONE_NORMAL] = hole[0];
+	hole[ZONE_DMA] = 0;
+}
+#endif
+
+void __init setup_dma_zone(const struct machine_desc *mdesc)
+{
+#ifdef CONFIG_ZONE_DMA
+	if (mdesc->dma_zone_size) {
+		arm_dma_zone_size = mdesc->dma_zone_size;
+		arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1;
+	} else
+		arm_dma_limit = 0xffffffff;
+	arm_dma_pfn_limit = arm_dma_limit >> PAGE_SHIFT;
+#endif
+}
+
+static void __init zone_sizes_init(unsigned long min, unsigned long max_low,
+	unsigned long max_high)
+{
+	unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
+	struct memblock_region *reg;
+
+	/*
+	 * initialise the zones.
+	 */
+	memset(zone_size, 0, sizeof(zone_size));
+
+	/*
+	 * The memory size has already been determined.  If we need
+	 * to do anything fancy with the allocation of this memory
+	 * to the zones, now is the time to do it.
+	 */
+	zone_size[0] = max_low - min;
+#ifdef CONFIG_HIGHMEM
+	zone_size[ZONE_HIGHMEM] = max_high - max_low;
+#endif
+
+	/*
+	 * Calculate the size of the holes.
+	 *  holes = node_size - sum(bank_sizes)
+	 */
+	memcpy(zhole_size, zone_size, sizeof(zhole_size));
+	for_each_memblock(memory, reg) {
+		unsigned long start = memblock_region_memory_base_pfn(reg);
+		unsigned long end = memblock_region_memory_end_pfn(reg);
+
+		if (start < max_low) {
+			unsigned long low_end = min(end, max_low);
+			zhole_size[0] -= low_end - start;
+		}
+#ifdef CONFIG_HIGHMEM
+		if (end > max_low) {
+			unsigned long high_start = max(start, max_low);
+			zhole_size[ZONE_HIGHMEM] -= end - high_start;
+		}
+#endif
+	}
+
+#ifdef CONFIG_ZONE_DMA
+	/*
+	 * Adjust the sizes according to any special requirements for
+	 * this machine type.
+	 */
+	if (arm_dma_zone_size)
+		arm_adjust_dma_zone(zone_size, zhole_size,
+			arm_dma_zone_size >> PAGE_SHIFT);
+#endif
+
+	free_area_init_node(0, zone_size, min, zhole_size);
+}
+
+#ifdef CONFIG_HAVE_ARCH_PFN_VALID
+int pfn_valid(unsigned long pfn)
+{
+	return memblock_is_map_memory(__pfn_to_phys(pfn));
+}
+EXPORT_SYMBOL(pfn_valid);
+#endif
+
+#ifndef CONFIG_SPARSEMEM
+static void __init arm_memory_present(void)
+{
+}
+#else
+static void __init arm_memory_present(void)
+{
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg)
+		memory_present(0, memblock_region_memory_base_pfn(reg),
+			       memblock_region_memory_end_pfn(reg));
+}
+#endif
+
+static bool arm_memblock_steal_permitted = true;
+
+phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
+{
+	phys_addr_t phys;
+
+	BUG_ON(!arm_memblock_steal_permitted);
+
+	phys = memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ANYWHERE);
+	memblock_free(phys, size);
+	memblock_remove(phys, size);
+
+	return phys;
+}
+
+static void __init arm_initrd_init(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	phys_addr_t start;
+	unsigned long size;
+
+	/* FDT scan will populate initrd_start */
+	if (initrd_start && !phys_initrd_size) {
+		phys_initrd_start = __virt_to_phys(initrd_start);
+		phys_initrd_size = initrd_end - initrd_start;
+	}
+
+	initrd_start = initrd_end = 0;
+
+	if (!phys_initrd_size)
+		return;
+
+	/*
+	 * Round the memory region to page boundaries as per free_initrd_mem()
+	 * This allows us to detect whether the pages overlapping the initrd
+	 * are in use, but more importantly, reserves the entire set of pages
+	 * as we don't want these pages allocated for other purposes.
+	 */
+	start = round_down(phys_initrd_start, PAGE_SIZE);
+	size = phys_initrd_size + (phys_initrd_start - start);
+	size = round_up(size, PAGE_SIZE);
+
+	if (!memblock_is_region_memory(start, size)) {
+		pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n",
+		       (u64)start, size);
+		return;
+	}
+
+	if (memblock_is_region_reserved(start, size)) {
+		pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n",
+		       (u64)start, size);
+		return;
+	}
+
+	memblock_reserve(start, size);
+
+	/* Now convert initrd to virtual addresses */
+	initrd_start = __phys_to_virt(phys_initrd_start);
+	initrd_end = initrd_start + phys_initrd_size;
+#endif
+}
+
+void __init arm_memblock_init(const struct machine_desc *mdesc)
+{
+	/* Register the kernel text, kernel data and initrd with memblock. */
+	memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START);
+
+	arm_initrd_init();
+
+	arm_mm_memblock_reserve();
+
+	/* reserve any platform specific memblock areas */
+	if (mdesc->reserve)
+		mdesc->reserve();
+
+	early_init_fdt_reserve_self();
+	early_init_fdt_scan_reserved_mem();
+
+	/* reserve memory for DMA contiguous allocations */
+	dma_contiguous_reserve(arm_dma_limit);
+
+	arm_memblock_steal_permitted = false;
+	memblock_dump_all();
+}
+
+void __init bootmem_init(void)
+{
+	unsigned long min, max_low, max_high;
+
+	memblock_allow_resize();
+	max_low = max_high = 0;
+
+	find_limits(&min, &max_low, &max_high);
+
+	early_memtest((phys_addr_t)min << PAGE_SHIFT,
+		      (phys_addr_t)max_low << PAGE_SHIFT);
+
+	/*
+	 * Sparsemem tries to allocate bootmem in memory_present(),
+	 * so must be done after the fixed reservations
+	 */
+	arm_memory_present();
+
+	/*
+	 * sparse_init() needs the bootmem allocator up and running.
+	 */
+	sparse_init();
+
+	/*
+	 * Now free the memory - free_area_init_node needs
+	 * the sparse mem_map arrays initialized by sparse_init()
+	 * for memmap_init_zone(), otherwise all PFNs are invalid.
+	 */
+	zone_sizes_init(min, max_low, max_high);
+
+	/*
+	 * This doesn't seem to be used by the Linux memory manager any
+	 * more, but is used by ll_rw_block.  If we can get rid of it, we
+	 * also get rid of some of the stuff above as well.
+	 */
+	min_low_pfn = min;
+	max_low_pfn = max_low;
+	max_pfn = max_high;
+}
+
+/*
+ * Poison init memory with an undefined instruction (ARM) or a branch to an
+ * undefined instruction (Thumb).
+ */
+static inline void poison_init_mem(void *s, size_t count)
+{
+	u32 *p = (u32 *)s;
+	for (; count != 0; count -= 4)
+		*p++ = 0xe7fddef0;
+}
+
+static inline void
+free_memmap(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct page *start_pg, *end_pg;
+	phys_addr_t pg, pgend;
+
+	/*
+	 * Convert start_pfn/end_pfn to a struct page pointer.
+	 */
+	start_pg = pfn_to_page(start_pfn - 1) + 1;
+	end_pg = pfn_to_page(end_pfn - 1) + 1;
+
+	/*
+	 * Convert to physical addresses, and
+	 * round start upwards and end downwards.
+	 */
+	pg = PAGE_ALIGN(__pa(start_pg));
+	pgend = __pa(end_pg) & PAGE_MASK;
+
+	/*
+	 * If there are free pages between these,
+	 * free the section of the memmap array.
+	 */
+	if (pg < pgend)
+		memblock_free_early(pg, pgend - pg);
+}
+
+/*
+ * The mem_map array can get very big.  Free the unused area of the memory map.
+ */
+static void __init free_unused_memmap(void)
+{
+	unsigned long start, prev_end = 0;
+	struct memblock_region *reg;
+
+	/*
+	 * This relies on each bank being in address order.
+	 * The banks are sorted previously in bootmem_init().
+	 */
+	for_each_memblock(memory, reg) {
+		start = memblock_region_memory_base_pfn(reg);
+
+#ifdef CONFIG_SPARSEMEM
+		/*
+		 * Take care not to free memmap entries that don't exist
+		 * due to SPARSEMEM sections which aren't present.
+		 */
+		start = min(start,
+				 ALIGN(prev_end, PAGES_PER_SECTION));
+#else
+		/*
+		 * Align down here since the VM subsystem insists that the
+		 * memmap entries are valid from the bank start aligned to
+		 * MAX_ORDER_NR_PAGES.
+		 */
+		start = round_down(start, MAX_ORDER_NR_PAGES);
+#endif
+		/*
+		 * If we had a previous bank, and there is a space
+		 * between the current bank and the previous, free it.
+		 */
+		if (prev_end && prev_end < start)
+			free_memmap(prev_end, start);
+
+		/*
+		 * Align up here since the VM subsystem insists that the
+		 * memmap entries are valid from the bank end aligned to
+		 * MAX_ORDER_NR_PAGES.
+		 */
+		prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
+				 MAX_ORDER_NR_PAGES);
+	}
+
+#ifdef CONFIG_SPARSEMEM
+	if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
+		free_memmap(prev_end,
+			    ALIGN(prev_end, PAGES_PER_SECTION));
+#endif
+}
+
+#ifdef CONFIG_HIGHMEM
+static inline void free_area_high(unsigned long pfn, unsigned long end)
+{
+	for (; pfn < end; pfn++)
+		free_highmem_page(pfn_to_page(pfn));
+}
+#endif
+
+static void __init free_highpages(void)
+{
+#ifdef CONFIG_HIGHMEM
+	unsigned long max_low = max_low_pfn;
+	struct memblock_region *mem, *res;
+
+	/* set highmem page free */
+	for_each_memblock(memory, mem) {
+		unsigned long start = memblock_region_memory_base_pfn(mem);
+		unsigned long end = memblock_region_memory_end_pfn(mem);
+
+		/* Ignore complete lowmem entries */
+		if (end <= max_low)
+			continue;
+
+		if (memblock_is_nomap(mem))
+			continue;
+
+		/* Truncate partial highmem entries */
+		if (start < max_low)
+			start = max_low;
+
+		/* Find and exclude any reserved regions */
+		for_each_memblock(reserved, res) {
+			unsigned long res_start, res_end;
+
+			res_start = memblock_region_reserved_base_pfn(res);
+			res_end = memblock_region_reserved_end_pfn(res);
+
+			if (res_end < start)
+				continue;
+			if (res_start < start)
+				res_start = start;
+			if (res_start > end)
+				res_start = end;
+			if (res_end > end)
+				res_end = end;
+			if (res_start != start)
+				free_area_high(start, res_start);
+			start = res_end;
+			if (start == end)
+				break;
+		}
+
+		/* And now free anything which remains */
+		if (start < end)
+			free_area_high(start, end);
+	}
+#endif
+}
+
+/*
+ * mem_init() marks the free areas in the mem_map and tells us how much
+ * memory is free.  This is done after various parts of the system have
+ * claimed their memory after the kernel image.
+ */
+void __init mem_init(void)
+{
+#ifdef CONFIG_HAVE_TCM
+	/* These pointers are filled in on TCM detection */
+	extern u32 dtcm_end;
+	extern u32 itcm_end;
+#endif
+
+	set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
+
+	/* this will put all unused low memory onto the freelists */
+	free_unused_memmap();
+	free_all_bootmem();
+
+#ifdef CONFIG_SA1111
+	/* now that our DMA memory is actually so designated, we can free it */
+	free_reserved_area(__va(PHYS_OFFSET), swapper_pg_dir, -1, NULL);
+#endif
+
+	free_highpages();
+
+	mem_init_print_info(NULL);
+
+#define MLK(b, t) b, t, ((t) - (b)) >> 10
+#define MLM(b, t) b, t, ((t) - (b)) >> 20
+#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
+
+	pr_notice("Virtual kernel memory layout:\n"
+			"    vector  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#ifdef CONFIG_HAVE_TCM
+			"    DTCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+			"    ITCM    : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+#endif
+			"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
+			"    vmalloc : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+			"    lowmem  : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+#ifdef CONFIG_HIGHMEM
+			"    pkmap   : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+#endif
+#ifdef CONFIG_MODULES
+			"    modules : 0x%08lx - 0x%08lx   (%4ld MB)\n"
+#endif
+			"      .text : 0x%p" " - 0x%p" "   (%4td kB)\n"
+			"      .init : 0x%p" " - 0x%p" "   (%4td kB)\n"
+			"      .data : 0x%p" " - 0x%p" "   (%4td kB)\n"
+			"       .bss : 0x%p" " - 0x%p" "   (%4td kB)\n",
+
+			MLK(VECTORS_BASE, VECTORS_BASE + PAGE_SIZE),
+#ifdef CONFIG_HAVE_TCM
+			MLK(DTCM_OFFSET, (unsigned long) dtcm_end),
+			MLK(ITCM_OFFSET, (unsigned long) itcm_end),
+#endif
+			MLK(FIXADDR_START, FIXADDR_END),
+			MLM(VMALLOC_START, VMALLOC_END),
+			MLM(PAGE_OFFSET, (unsigned long)high_memory),
+#ifdef CONFIG_HIGHMEM
+			MLM(PKMAP_BASE, (PKMAP_BASE) + (LAST_PKMAP) *
+				(PAGE_SIZE)),
+#endif
+#ifdef CONFIG_MODULES
+			MLM(MODULES_VADDR, MODULES_END),
+#endif
+
+			MLK_ROUNDUP(_text, _etext),
+			MLK_ROUNDUP(__init_begin, __init_end),
+			MLK_ROUNDUP(_sdata, _edata),
+			MLK_ROUNDUP(__bss_start, __bss_stop));
+
+#undef MLK
+#undef MLM
+#undef MLK_ROUNDUP
+
+	/*
+	 * Check boundaries twice: Some fundamental inconsistencies can
+	 * be detected at build time already.
+	 */
+#ifdef CONFIG_MMU
+	BUILD_BUG_ON(TASK_SIZE				> MODULES_VADDR);
+	BUG_ON(TASK_SIZE 				> MODULES_VADDR);
+#endif
+
+#ifdef CONFIG_HIGHMEM
+	BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET);
+	BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE	> PAGE_OFFSET);
+#endif
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+struct section_perm {
+	const char *name;
+	unsigned long start;
+	unsigned long end;
+	pmdval_t mask;
+	pmdval_t prot;
+	pmdval_t clear;
+};
+
+/* First section-aligned location at or after __start_rodata. */
+extern char __start_rodata_section_aligned[];
+
+static struct section_perm nx_perms[] = {
+	/* Make pages tables, etc before _stext RW (set NX). */
+	{
+		.name	= "pre-text NX",
+		.start	= PAGE_OFFSET,
+		.end	= (unsigned long)_stext,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+	/* Make init RW (set NX). */
+	{
+		.name	= "init NX",
+		.start	= (unsigned long)__init_begin,
+		.end	= (unsigned long)_sdata,
+		.mask	= ~PMD_SECT_XN,
+		.prot	= PMD_SECT_XN,
+	},
+	/* Make rodata NX (set RO in ro_perms below). */
+	{
+		.name	= "rodata NX",
+		.start  = (unsigned long)__start_rodata_section_aligned,
+		.end    = (unsigned long)__init_begin,
+		.mask   = ~PMD_SECT_XN,
+		.prot   = PMD_SECT_XN,
+	},
+};
+
+static struct section_perm ro_perms[] = {
+	/* Make kernel code and rodata RX (set RO). */
+	{
+		.name	= "text/rodata RO",
+		.start  = (unsigned long)_stext,
+		.end    = (unsigned long)__init_begin,
+#ifdef CONFIG_ARM_LPAE
+		.mask   = ~(L_PMD_SECT_RDONLY | PMD_SECT_AP2),
+		.prot   = L_PMD_SECT_RDONLY | PMD_SECT_AP2,
+#else
+		.mask   = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE),
+		.prot   = PMD_SECT_APX | PMD_SECT_AP_WRITE,
+		.clear  = PMD_SECT_AP_WRITE,
+#endif
+	},
+};
+
+/*
+ * Updates section permissions only for the current mm (sections are
+ * copied into each mm). During startup, this is the init_mm. Is only
+ * safe to be called with preemption disabled, as under stop_machine().
+ */
+static inline void section_update(unsigned long addr, pmdval_t mask,
+				  pmdval_t prot, struct mm_struct *mm)
+{
+	pmd_t *pmd;
+
+	pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
+
+#ifdef CONFIG_ARM_LPAE
+	pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#else
+	if (addr & SECTION_SIZE)
+		pmd[1] = __pmd((pmd_val(pmd[1]) & mask) | prot);
+	else
+		pmd[0] = __pmd((pmd_val(pmd[0]) & mask) | prot);
+#endif
+	flush_pmd_entry(pmd);
+	local_flush_tlb_kernel_range(addr, addr + SECTION_SIZE);
+}
+
+/* Make sure extended page tables are in use. */
+static inline bool arch_has_strict_perms(void)
+{
+	if (cpu_architecture() < CPU_ARCH_ARMv6)
+		return false;
+
+	return !!(get_cr() & CR_XP);
+}
+
+void set_section_perms(struct section_perm *perms, int n, bool set,
+			struct mm_struct *mm)
+{
+	size_t i;
+	unsigned long addr;
+
+	if (!arch_has_strict_perms())
+		return;
+
+	for (i = 0; i < n; i++) {
+		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||
+		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {
+			pr_err("BUG: %s section %lx-%lx not aligned to %lx\n",
+				perms[i].name, perms[i].start, perms[i].end,
+				SECTION_SIZE);
+			continue;
+		}
+
+		for (addr = perms[i].start;
+		     addr < perms[i].end;
+		     addr += SECTION_SIZE)
+			section_update(addr, perms[i].mask,
+				set ? perms[i].prot : perms[i].clear, mm);
+	}
+
+}
+
+/**
+ * update_sections_early intended to be called only through stop_machine
+ * framework and executed by only one CPU while all other CPUs will spin and
+ * wait, so no locking is required in this function.
+ */
+static void update_sections_early(struct section_perm perms[], int n)
+{
+	struct task_struct *t, *s;
+
+	for_each_process(t) {
+		if (t->flags & PF_KTHREAD)
+			continue;
+		for_each_thread(t, s)
+			set_section_perms(perms, n, true, s->mm);
+	}
+	set_section_perms(perms, n, true, current->active_mm);
+	set_section_perms(perms, n, true, &init_mm);
+}
+
+static int __fix_kernmem_perms(void *unused)
+{
+	update_sections_early(nx_perms, ARRAY_SIZE(nx_perms));
+	return 0;
+}
+
+static void fix_kernmem_perms(void)
+{
+	stop_machine(__fix_kernmem_perms, NULL, NULL);
+}
+
+static int __mark_rodata_ro(void *unused)
+{
+	update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
+	return 0;
+}
+
+static int kernel_set_to_readonly __read_mostly;
+
+void mark_rodata_ro(void)
+{
+	kernel_set_to_readonly = 1;
+	stop_machine(__mark_rodata_ro, NULL, NULL);
+	debug_checkwx();
+}
+
+void set_kernel_text_rw(void)
+{
+	if (!kernel_set_to_readonly)
+		return;
+
+	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
+				current->active_mm);
+}
+
+void set_kernel_text_ro(void)
+{
+	if (!kernel_set_to_readonly)
+		return;
+
+	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
+				current->active_mm);
+}
+
+#else
+static inline void fix_kernmem_perms(void) { }
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
+void free_initmem(void)
+{
+	fix_kernmem_perms();
+
+	poison_init_mem(__init_begin, __init_end - __init_begin);
+	if (!machine_is_integrator() && !machine_is_cintegrator())
+		free_initmem_default(-1);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+
+static int keep_initrd;
+
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	if (!keep_initrd) {
+		if (start == initrd_start)
+			start = round_down(start, PAGE_SIZE);
+		if (end == initrd_end)
+			end = round_up(end, PAGE_SIZE);
+
+		poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
+		free_reserved_area((void *)start, (void *)end, -1, "initrd");
+	}
+}
+
+static int __init keepinitrd_setup(char *__unused)
+{
+	keep_initrd = 1;
+	return 1;
+}
+
+__setup("keepinitrd", keepinitrd_setup);
+#endif
diff --git a/arch/arm/mm/iomap.c b/arch/arm/mm/iomap.c
new file mode 100644
index 0000000..091ddc5
--- /dev/null
+++ b/arch/arm/mm/iomap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/arm/mm/iomap.c
+ *
+ * Map IO port and PCI memory spaces so that {read,write}[bwl] can
+ * be used to access this memory.
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+
+unsigned long vga_base;
+EXPORT_SYMBOL(vga_base);
+
+#ifdef __io
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return __io(port);
+}
+EXPORT_SYMBOL(ioport_map);
+
+void ioport_unmap(void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(ioport_unmap);
+#endif
+
+#ifdef CONFIG_PCI
+unsigned long pcibios_min_io = 0x1000;
+EXPORT_SYMBOL(pcibios_min_io);
+
+unsigned long pcibios_min_mem = 0x01000000;
+EXPORT_SYMBOL(pcibios_min_mem);
+
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
+{
+	if ((unsigned long)addr >= VMALLOC_START &&
+	    (unsigned long)addr < VMALLOC_END)
+		iounmap(addr);
+}
+EXPORT_SYMBOL(pci_iounmap);
+#endif
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
new file mode 100644
index 0000000..5bf9443
--- /dev/null
+++ b/arch/arm/mm/ioremap.c
@@ -0,0 +1,499 @@
+/*
+ *  linux/arch/arm/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ *
+ * Hacked for ARM by Phil Blundell <philb@gnu.org>
+ * Hacked to allow all architectures to build, and various cleanups
+ * by Russell King
+ *
+ * This allows a driver to remap an arbitrary region of bus memory into
+ * virtual space.  One should *only* use readl, writel, memcpy_toio and
+ * so on with such remapped areas.
+ *
+ * Because the ARM only has a 32-bit address space we can't address the
+ * whole of the (physical) PCI space at once.  PCI huge-mode addressing
+ * allows us to circumvent this restriction by splitting PCI space into
+ * two 2GB chunks and mapping only one at a time into processor memory.
+ * We use MMU protection domains to trap any attempt to access the bank
+ * that is not currently mapped.  (This isn't fully implemented yet.)
+ */
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+#include <linux/sizes.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/cacheflush.h>
+#include <asm/early_ioremap.h>
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/system_info.h>
+
+#include <asm/mach/map.h>
+#include <asm/mach/pci.h>
+#include "mm.h"
+
+
+LIST_HEAD(static_vmlist);
+
+static struct static_vm *find_static_vm_paddr(phys_addr_t paddr,
+			size_t size, unsigned int mtype)
+{
+	struct static_vm *svm;
+	struct vm_struct *vm;
+
+	list_for_each_entry(svm, &static_vmlist, list) {
+		vm = &svm->vm;
+		if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+			continue;
+		if ((vm->flags & VM_ARM_MTYPE_MASK) != VM_ARM_MTYPE(mtype))
+			continue;
+
+		if (vm->phys_addr > paddr ||
+			paddr + size - 1 > vm->phys_addr + vm->size - 1)
+			continue;
+
+		return svm;
+	}
+
+	return NULL;
+}
+
+struct static_vm *find_static_vm_vaddr(void *vaddr)
+{
+	struct static_vm *svm;
+	struct vm_struct *vm;
+
+	list_for_each_entry(svm, &static_vmlist, list) {
+		vm = &svm->vm;
+
+		/* static_vmlist is ascending order */
+		if (vm->addr > vaddr)
+			break;
+
+		if (vm->addr <= vaddr && vm->addr + vm->size > vaddr)
+			return svm;
+	}
+
+	return NULL;
+}
+
+void __init add_static_vm_early(struct static_vm *svm)
+{
+	struct static_vm *curr_svm;
+	struct vm_struct *vm;
+	void *vaddr;
+
+	vm = &svm->vm;
+	vm_area_add_early(vm);
+	vaddr = vm->addr;
+
+	list_for_each_entry(curr_svm, &static_vmlist, list) {
+		vm = &curr_svm->vm;
+
+		if (vm->addr > vaddr)
+			break;
+	}
+	list_add_tail(&svm->list, &curr_svm->list);
+}
+
+int ioremap_page(unsigned long virt, unsigned long phys,
+		 const struct mem_type *mtype)
+{
+	return ioremap_page_range(virt, virt + PAGE_SIZE, phys,
+				  __pgprot(mtype->prot_pte));
+}
+EXPORT_SYMBOL(ioremap_page);
+
+void __check_vmalloc_seq(struct mm_struct *mm)
+{
+	unsigned int seq;
+
+	do {
+		seq = init_mm.context.vmalloc_seq;
+		memcpy(pgd_offset(mm, VMALLOC_START),
+		       pgd_offset_k(VMALLOC_START),
+		       sizeof(pgd_t) * (pgd_index(VMALLOC_END) -
+					pgd_index(VMALLOC_START)));
+		mm->context.vmalloc_seq = seq;
+	} while (seq != init_mm.context.vmalloc_seq);
+}
+
+#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
+/*
+ * Section support is unsafe on SMP - If you iounmap and ioremap a region,
+ * the other CPUs will not see this change until their next context switch.
+ * Meanwhile, (eg) if an interrupt comes in on one of those other CPUs
+ * which requires the new ioremap'd region to be referenced, the CPU will
+ * reference the _old_ region.
+ *
+ * Note that get_vm_area_caller() allocates a guard 4K page, so we need to
+ * mask the size back to 1MB aligned or we will overflow in the loop below.
+ */
+static void unmap_area_sections(unsigned long virt, unsigned long size)
+{
+	unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1));
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmdp;
+
+	flush_cache_vunmap(addr, end);
+	pgd = pgd_offset_k(addr);
+	pud = pud_offset(pgd, addr);
+	pmdp = pmd_offset(pud, addr);
+	do {
+		pmd_t pmd = *pmdp;
+
+		if (!pmd_none(pmd)) {
+			/*
+			 * Clear the PMD from the page table, and
+			 * increment the vmalloc sequence so others
+			 * notice this change.
+			 *
+			 * Note: this is still racy on SMP machines.
+			 */
+			pmd_clear(pmdp);
+			init_mm.context.vmalloc_seq++;
+
+			/*
+			 * Free the page table, if there was one.
+			 */
+			if ((pmd_val(pmd) & PMD_TYPE_MASK) == PMD_TYPE_TABLE)
+				pte_free_kernel(&init_mm, pmd_page_vaddr(pmd));
+		}
+
+		addr += PMD_SIZE;
+		pmdp += 2;
+	} while (addr < end);
+
+	/*
+	 * Ensure that the active_mm is up to date - we want to
+	 * catch any use-after-iounmap cases.
+	 */
+	if (current->active_mm->context.vmalloc_seq != init_mm.context.vmalloc_seq)
+		__check_vmalloc_seq(current->active_mm);
+
+	flush_tlb_kernel_range(virt, end);
+}
+
+static int
+remap_area_sections(unsigned long virt, unsigned long pfn,
+		    size_t size, const struct mem_type *type)
+{
+	unsigned long addr = virt, end = virt + size;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	/*
+	 * Remove and free any PTE-based mapping, and
+	 * sync the current kernel mapping.
+	 */
+	unmap_area_sections(virt, size);
+
+	pgd = pgd_offset_k(addr);
+	pud = pud_offset(pgd, addr);
+	pmd = pmd_offset(pud, addr);
+	do {
+		pmd[0] = __pmd(__pfn_to_phys(pfn) | type->prot_sect);
+		pfn += SZ_1M >> PAGE_SHIFT;
+		pmd[1] = __pmd(__pfn_to_phys(pfn) | type->prot_sect);
+		pfn += SZ_1M >> PAGE_SHIFT;
+		flush_pmd_entry(pmd);
+
+		addr += PMD_SIZE;
+		pmd += 2;
+	} while (addr < end);
+
+	return 0;
+}
+
+static int
+remap_area_supersections(unsigned long virt, unsigned long pfn,
+			 size_t size, const struct mem_type *type)
+{
+	unsigned long addr = virt, end = virt + size;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	/*
+	 * Remove and free any PTE-based mapping, and
+	 * sync the current kernel mapping.
+	 */
+	unmap_area_sections(virt, size);
+
+	pgd = pgd_offset_k(virt);
+	pud = pud_offset(pgd, addr);
+	pmd = pmd_offset(pud, addr);
+	do {
+		unsigned long super_pmd_val, i;
+
+		super_pmd_val = __pfn_to_phys(pfn) | type->prot_sect |
+				PMD_SECT_SUPER;
+		super_pmd_val |= ((pfn >> (32 - PAGE_SHIFT)) & 0xf) << 20;
+
+		for (i = 0; i < 8; i++) {
+			pmd[0] = __pmd(super_pmd_val);
+			pmd[1] = __pmd(super_pmd_val);
+			flush_pmd_entry(pmd);
+
+			addr += PMD_SIZE;
+			pmd += 2;
+		}
+
+		pfn += SUPERSECTION_SIZE >> PAGE_SHIFT;
+	} while (addr < end);
+
+	return 0;
+}
+#endif
+
+static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
+	unsigned long offset, size_t size, unsigned int mtype, void *caller)
+{
+	const struct mem_type *type;
+	int err;
+	unsigned long addr;
+	struct vm_struct *area;
+	phys_addr_t paddr = __pfn_to_phys(pfn);
+
+#ifndef CONFIG_ARM_LPAE
+	/*
+	 * High mappings must be supersection aligned
+	 */
+	if (pfn >= 0x100000 && (paddr & ~SUPERSECTION_MASK))
+		return NULL;
+#endif
+
+	type = get_mem_type(mtype);
+	if (!type)
+		return NULL;
+
+	/*
+	 * Page align the mapping size, taking account of any offset.
+	 */
+	size = PAGE_ALIGN(offset + size);
+
+	/*
+	 * Try to reuse one of the static mapping whenever possible.
+	 */
+	if (size && !(sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) {
+		struct static_vm *svm;
+
+		svm = find_static_vm_paddr(paddr, size, mtype);
+		if (svm) {
+			addr = (unsigned long)svm->vm.addr;
+			addr += paddr - svm->vm.phys_addr;
+			return (void __iomem *) (offset + addr);
+		}
+	}
+
+	/*
+	 * Don't allow RAM to be mapped with mismatched attributes - this
+	 * causes problems with ARMv6+
+	 */
+	if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW))
+		return NULL;
+
+	area = get_vm_area_caller(size, VM_IOREMAP, caller);
+ 	if (!area)
+ 		return NULL;
+ 	addr = (unsigned long)area->addr;
+	area->phys_addr = paddr;
+
+#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
+	if (DOMAIN_IO == 0 &&
+	    (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) ||
+	       cpu_is_xsc3()) && pfn >= 0x100000 &&
+	       !((paddr | size | addr) & ~SUPERSECTION_MASK)) {
+		area->flags |= VM_ARM_SECTION_MAPPING;
+		err = remap_area_supersections(addr, pfn, size, type);
+	} else if (!((paddr | size | addr) & ~PMD_MASK)) {
+		area->flags |= VM_ARM_SECTION_MAPPING;
+		err = remap_area_sections(addr, pfn, size, type);
+	} else
+#endif
+		err = ioremap_page_range(addr, addr + size, paddr,
+					 __pgprot(type->prot_pte));
+
+	if (err) {
+ 		vunmap((void *)addr);
+ 		return NULL;
+ 	}
+
+	flush_cache_vmap(addr, addr + size);
+	return (void __iomem *) (offset + addr);
+}
+
+void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
+	unsigned int mtype, void *caller)
+{
+	phys_addr_t last_addr;
+ 	unsigned long offset = phys_addr & ~PAGE_MASK;
+ 	unsigned long pfn = __phys_to_pfn(phys_addr);
+
+ 	/*
+ 	 * Don't allow wraparound or zero size
+	 */
+	last_addr = phys_addr + size - 1;
+	if (!size || last_addr < phys_addr)
+		return NULL;
+
+	return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
+			caller);
+}
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem *
+__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size,
+		  unsigned int mtype)
+{
+	return __arm_ioremap_pfn_caller(pfn, offset, size, mtype,
+					__builtin_return_address(0));
+}
+EXPORT_SYMBOL(__arm_ioremap_pfn);
+
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t,
+				      unsigned int, void *) =
+	__arm_ioremap_caller;
+
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+	__alias(ioremap_cached);
+
+void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+EXPORT_SYMBOL(ioremap_cached);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space as memory. Needed when the kernel wants to execute
+ * code in external memory. This is needed for reprogramming source
+ * clocks that would affect normal memory for example. Please see
+ * CONFIG_GENERIC_ALLOCATOR for allocating external memory.
+ */
+void __iomem *
+__arm_ioremap_exec(phys_addr_t phys_addr, size_t size, bool cached)
+{
+	unsigned int mtype;
+
+	if (cached)
+		mtype = MT_MEMORY_RWX;
+	else
+		mtype = MT_MEMORY_RWX_NONCACHED;
+
+	return __arm_ioremap_caller(phys_addr, size, mtype,
+			__builtin_return_address(0));
+}
+
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+{
+	return (__force void *)arch_ioremap_caller(phys_addr, size,
+						   MT_MEMORY_RW,
+						   __builtin_return_address(0));
+}
+
+void __iounmap(volatile void __iomem *io_addr)
+{
+	void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
+	struct static_vm *svm;
+
+	/* If this is a static mapping, we must leave it alone */
+	svm = find_static_vm_vaddr(addr);
+	if (svm)
+		return;
+
+#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
+	{
+		struct vm_struct *vm;
+
+		vm = find_vm_area(addr);
+
+		/*
+		 * If this is a section based mapping we need to handle it
+		 * specially as the VM subsystem does not know how to handle
+		 * such a beast.
+		 */
+		if (vm && (vm->flags & VM_ARM_SECTION_MAPPING))
+			unmap_area_sections((unsigned long)vm->addr, vm->size);
+	}
+#endif
+
+	vunmap(addr);
+}
+
+void (*arch_iounmap)(volatile void __iomem *) = __iounmap;
+
+void iounmap(volatile void __iomem *cookie)
+{
+	arch_iounmap(cookie);
+}
+EXPORT_SYMBOL(iounmap);
+
+#ifdef CONFIG_PCI
+static int pci_ioremap_mem_type = MT_DEVICE;
+
+void pci_ioremap_set_mem_type(int mem_type)
+{
+	pci_ioremap_mem_type = mem_type;
+}
+
+int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr)
+{
+	BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT);
+
+	return ioremap_page_range(PCI_IO_VIRT_BASE + offset,
+				  PCI_IO_VIRT_BASE + offset + SZ_64K,
+				  phys_addr,
+				  __pgprot(get_mem_type(pci_ioremap_mem_type)->prot_pte));
+}
+EXPORT_SYMBOL_GPL(pci_ioremap_io);
+
+void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_UNCACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL_GPL(pci_remap_cfgspace);
+#endif
+
+/*
+ * Must be called after early_fixmap_init
+ */
+void __init early_ioremap_init(void)
+{
+	early_ioremap_setup();
+}
diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c
new file mode 100644
index 0000000..10a3cf2
--- /dev/null
+++ b/arch/arm/mm/l2c-common.c
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2010 ARM Ltd.
+ * Written by Catalin Marinas <catalin.marinas@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/bug.h>
+#include <linux/smp.h>
+#include <asm/outercache.h>
+
+void outer_disable(void)
+{
+	WARN_ON(!irqs_disabled());
+	WARN_ON(num_online_cpus() > 1);
+
+	if (outer_cache.disable)
+		outer_cache.disable();
+}
diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S
new file mode 100644
index 0000000..fc01f1b
--- /dev/null
+++ b/arch/arm/mm/l2c-l2x0-resume.S
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * L2C-310 early resume code.  This can be used by platforms to restore
+ * the settings of their L2 cache controller before restoring the
+ * processor state.
+ *
+ * This code can only be used to if you are running in the secure world.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/hardware/cache-l2x0.h>
+
+	.text
+
+ENTRY(l2c310_early_resume)
+	adr	r0, 1f
+	ldr	r2, [r0]
+	add	r0, r2, r0
+
+	ldmia	r0, {r1, r2, r3, r4, r5, r6, r7, r8}
+	@ r1 = phys address of L2C-310 controller
+	@ r2 = aux_ctrl
+	@ r3 = tag_latency
+	@ r4 = data_latency
+	@ r5 = filter_start
+	@ r6 = filter_end
+	@ r7 = prefetch_ctrl
+	@ r8 = pwr_ctrl
+
+	@ Check that the address has been initialised
+	teq	r1, #0
+	reteq	lr
+
+	@ The prefetch and power control registers are revision dependent
+	@ and can be written whether or not the L2 cache is enabled
+	ldr	r0, [r1, #L2X0_CACHE_ID]
+	and	r0, r0, #L2X0_CACHE_ID_RTL_MASK
+	cmp	r0, #L310_CACHE_ID_RTL_R2P0
+	strcs	r7, [r1, #L310_PREFETCH_CTRL]
+	cmp	r0, #L310_CACHE_ID_RTL_R3P0
+	strcs	r8, [r1, #L310_POWER_CTRL]
+
+	@ Don't setup the L2 cache if it is already enabled
+	ldr	r0, [r1, #L2X0_CTRL]
+	tst	r0, #L2X0_CTRL_EN
+	retne	lr
+
+	str	r3, [r1, #L310_TAG_LATENCY_CTRL]
+	str	r4, [r1, #L310_DATA_LATENCY_CTRL]
+	str	r6, [r1, #L310_ADDR_FILTER_END]
+	str	r5, [r1, #L310_ADDR_FILTER_START]
+
+	str	r2, [r1, #L2X0_AUX_CTRL]
+	mov	r9, #L2X0_CTRL_EN
+	str	r9, [r1, #L2X0_CTRL]
+	ret	lr
+ENDPROC(l2c310_early_resume)
+
+	.align
+1:	.long	l2x0_saved_regs - .
diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h
new file mode 100644
index 0000000..6b045c6
--- /dev/null
+++ b/arch/arm/mm/mm.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef CONFIG_MMU
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+
+#include <asm/pgtable.h>
+
+/* the upper-most page table pointer */
+extern pmd_t *top_pmd;
+
+/*
+ * 0xffff8000 to 0xffffffff is reserved for any ARM architecture
+ * specific hacks for copying pages efficiently, while 0xffff4000
+ * is reserved for VIPT aliasing flushing by generic code.
+ *
+ * Note that we don't allow VIPT aliasing caches with SMP.
+ */
+#define COPYPAGE_MINICACHE	0xffff8000
+#define COPYPAGE_V6_FROM	0xffff8000
+#define COPYPAGE_V6_TO		0xffffc000
+/* PFN alias flushing, for VIPT caches */
+#define FLUSH_ALIAS_START	0xffff4000
+
+static inline void set_top_pte(unsigned long va, pte_t pte)
+{
+	pte_t *ptep = pte_offset_kernel(top_pmd, va);
+	set_pte_ext(ptep, pte, 0);
+	local_flush_tlb_kernel_page(va);
+}
+
+static inline pte_t get_top_pte(unsigned long va)
+{
+	pte_t *ptep = pte_offset_kernel(top_pmd, va);
+	return *ptep;
+}
+
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+	return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
+}
+
+struct mem_type {
+	pteval_t prot_pte;
+	pteval_t prot_pte_s2;
+	pmdval_t prot_l1;
+	pmdval_t prot_sect;
+	unsigned int domain;
+};
+
+const struct mem_type *get_mem_type(unsigned int type);
+
+extern void __flush_dcache_page(struct address_space *mapping, struct page *page);
+
+/*
+ * ARM specific vm_struct->flags bits.
+ */
+
+/* (super)section-mapped I/O regions used by ioremap()/iounmap() */
+#define VM_ARM_SECTION_MAPPING	0x80000000
+
+/* permanent static mappings from iotable_init() */
+#define VM_ARM_STATIC_MAPPING	0x40000000
+
+/* empty mapping */
+#define VM_ARM_EMPTY_MAPPING	0x20000000
+
+/* mapping type (attributes) for permanent static mappings */
+#define VM_ARM_MTYPE(mt)		((mt) << 20)
+#define VM_ARM_MTYPE_MASK	(0x1f << 20)
+
+/* consistent regions used by dma_alloc_attrs() */
+#define VM_ARM_DMA_CONSISTENT	0x20000000
+
+
+struct static_vm {
+	struct vm_struct vm;
+	struct list_head list;
+};
+
+extern struct list_head static_vmlist;
+extern struct static_vm *find_static_vm_vaddr(void *vaddr);
+extern __init void add_static_vm_early(struct static_vm *svm);
+
+#endif
+
+#ifdef CONFIG_ZONE_DMA
+extern phys_addr_t arm_dma_limit;
+extern unsigned long arm_dma_pfn_limit;
+#else
+#define arm_dma_limit ((phys_addr_t)~0)
+#define arm_dma_pfn_limit (~0ul >> PAGE_SHIFT)
+#endif
+
+extern phys_addr_t arm_lowmem_limit;
+
+void __init bootmem_init(void);
+void arm_mm_memblock_reserve(void);
+void dma_contiguous_remap(void);
+
+unsigned long __clear_cr(unsigned long mask);
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
new file mode 100644
index 0000000..f866870
--- /dev/null
+++ b/arch/arm/mm/mmap.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/arch/arm/mm/mmap.c
+ */
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/shm.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
+#include <linux/io.h>
+#include <linux/personality.h>
+#include <linux/random.h>
+#include <asm/cachetype.h>
+
+#define COLOUR_ALIGN(addr,pgoff)		\
+	((((addr)+SHMLBA-1)&~(SHMLBA-1)) +	\
+	 (((pgoff)<<PAGE_SHIFT) & (SHMLBA-1)))
+
+/* gap between mmap and stack */
+#define MIN_GAP (128*1024*1024UL)
+#define MAX_GAP ((TASK_SIZE)/6*5)
+
+static int mmap_is_legacy(struct rlimit *rlim_stack)
+{
+	if (current->personality & ADDR_COMPAT_LAYOUT)
+		return 1;
+
+	if (rlim_stack->rlim_cur == RLIM_INFINITY)
+		return 1;
+
+	return sysctl_legacy_va_layout;
+}
+
+static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack)
+{
+	unsigned long gap = rlim_stack->rlim_cur;
+
+	if (gap < MIN_GAP)
+		gap = MIN_GAP;
+	else if (gap > MAX_GAP)
+		gap = MAX_GAP;
+
+	return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+}
+
+/*
+ * We need to ensure that shared mappings are correctly aligned to
+ * avoid aliasing issues with VIPT caches.  We need to ensure that
+ * a specific page of an object is always mapped at a multiple of
+ * SHMLBA bytes.
+ *
+ * We unconditionally provide this function for all cases, however
+ * in the VIVT case, we optimise out the alignment rules.
+ */
+unsigned long
+arch_get_unmapped_area(struct file *filp, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int do_align = 0;
+	int aliasing = cache_is_vipt_aliasing();
+	struct vm_unmapped_area_info info;
+
+	/*
+	 * We only need to do colour alignment if either the I or D
+	 * caches alias.
+	 */
+	if (aliasing)
+		do_align = filp || (flags & MAP_SHARED);
+
+	/*
+	 * We enforce the MAP_FIXED case.
+	 */
+	if (flags & MAP_FIXED) {
+		if (aliasing && flags & MAP_SHARED &&
+		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = 0;
+	info.length = len;
+	info.low_limit = mm->mmap_base;
+	info.high_limit = TASK_SIZE;
+	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	return vm_unmapped_area(&info);
+}
+
+unsigned long
+arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+			const unsigned long len, const unsigned long pgoff,
+			const unsigned long flags)
+{
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = current->mm;
+	unsigned long addr = addr0;
+	int do_align = 0;
+	int aliasing = cache_is_vipt_aliasing();
+	struct vm_unmapped_area_info info;
+
+	/*
+	 * We only need to do colour alignment if either the I or D
+	 * caches alias.
+	 */
+	if (aliasing)
+		do_align = filp || (flags & MAP_SHARED);
+
+	/* requested length too big for entire address space */
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (aliasing && flags & MAP_SHARED &&
+		    (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1))
+			return -EINVAL;
+		return addr;
+	}
+
+	/* requesting a specific address */
+	if (addr) {
+		if (do_align)
+			addr = COLOUR_ALIGN(addr, pgoff);
+		else
+			addr = PAGE_ALIGN(addr);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+				(!vma || addr + len <= vm_start_gap(vma)))
+			return addr;
+	}
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = FIRST_USER_ADDRESS;
+	info.high_limit = mm->mmap_base;
+	info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (addr & ~PAGE_MASK) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = mm->mmap_base;
+		info.high_limit = TASK_SIZE;
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+unsigned long arch_mmap_rnd(void)
+{
+	unsigned long rnd;
+
+	rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+
+	return rnd << PAGE_SHIFT;
+}
+
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
+{
+	unsigned long random_factor = 0UL;
+
+	if (current->flags & PF_RANDOMIZE)
+		random_factor = arch_mmap_rnd();
+
+	if (mmap_is_legacy(rlim_stack)) {
+		mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
+		mm->get_unmapped_area = arch_get_unmapped_area;
+	} else {
+		mm->mmap_base = mmap_base(random_factor, rlim_stack);
+		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+	}
+}
+
+/*
+ * You really shouldn't be using read() or write() on /dev/mem.  This
+ * might go away in the future.
+ */
+int valid_phys_addr_range(phys_addr_t addr, size_t size)
+{
+	if (addr < PHYS_OFFSET)
+		return 0;
+	if (addr + size > __pa(high_memory - 1) + 1)
+		return 0;
+
+	return 1;
+}
+
+/*
+ * Do not allow /dev/mem mappings beyond the supported physical range.
+ */
+int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
+{
+	return (pfn + (size >> PAGE_SHIFT)) <= (1 + (PHYS_MASK >> PAGE_SHIFT));
+}
+
+#ifdef CONFIG_STRICT_DEVMEM
+
+#include <linux/ioport.h>
+
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain
+ * address is valid. The argument is a physical page number.
+ * We mimic x86 here by disallowing access to system RAM as well as
+ * device-exclusive MMIO regions. This effectively disable read()/write()
+ * on /dev/mem.
+ */
+int devmem_is_allowed(unsigned long pfn)
+{
+	if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+		return 0;
+	if (!page_is_ram(pfn))
+		return 1;
+	return 0;
+}
+
+#endif
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
new file mode 100644
index 0000000..e46a6a4
--- /dev/null
+++ b/arch/arm/mm/mmu.c
@@ -0,0 +1,1653 @@
+/*
+ *  linux/arch/arm/mm/mmu.c
+ *
+ *  Copyright (C) 1995-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/mman.h>
+#include <linux/nodemask.h>
+#include <linux/memblock.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+#include <linux/sizes.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/sections.h>
+#include <asm/cachetype.h>
+#include <asm/fixmap.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/smp_plat.h>
+#include <asm/tlb.h>
+#include <asm/highmem.h>
+#include <asm/system_info.h>
+#include <asm/traps.h>
+#include <asm/procinfo.h>
+#include <asm/memory.h>
+
+#include <asm/mach/arch.h>
+#include <asm/mach/map.h>
+#include <asm/mach/pci.h>
+#include <asm/fixmap.h>
+
+#include "fault.h"
+#include "mm.h"
+#include "tcm.h"
+
+/*
+ * empty_zero_page is a special page that is used for
+ * zero-initialized data and COW.
+ */
+struct page *empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+/*
+ * The pmd table for the upper-most set of pages.
+ */
+pmd_t *top_pmd;
+
+pmdval_t user_pmd_table = _PAGE_USER_TABLE;
+
+#define CPOLICY_UNCACHED	0
+#define CPOLICY_BUFFERED	1
+#define CPOLICY_WRITETHROUGH	2
+#define CPOLICY_WRITEBACK	3
+#define CPOLICY_WRITEALLOC	4
+
+static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
+static unsigned int ecc_mask __initdata = 0;
+pgprot_t pgprot_user;
+pgprot_t pgprot_kernel;
+pgprot_t pgprot_hyp_device;
+pgprot_t pgprot_s2;
+pgprot_t pgprot_s2_device;
+
+EXPORT_SYMBOL(pgprot_user);
+EXPORT_SYMBOL(pgprot_kernel);
+
+struct cachepolicy {
+	const char	policy[16];
+	unsigned int	cr_mask;
+	pmdval_t	pmd;
+	pteval_t	pte;
+	pteval_t	pte_s2;
+};
+
+#ifdef CONFIG_ARM_LPAE
+#define s2_policy(policy)	policy
+#else
+#define s2_policy(policy)	0
+#endif
+
+unsigned long kimage_voffset __ro_after_init;
+
+static struct cachepolicy cache_policies[] __initdata = {
+	{
+		.policy		= "uncached",
+		.cr_mask	= CR_W|CR_C,
+		.pmd		= PMD_SECT_UNCACHED,
+		.pte		= L_PTE_MT_UNCACHED,
+		.pte_s2		= s2_policy(L_PTE_S2_MT_UNCACHED),
+	}, {
+		.policy		= "buffered",
+		.cr_mask	= CR_C,
+		.pmd		= PMD_SECT_BUFFERED,
+		.pte		= L_PTE_MT_BUFFERABLE,
+		.pte_s2		= s2_policy(L_PTE_S2_MT_UNCACHED),
+	}, {
+		.policy		= "writethrough",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WT,
+		.pte		= L_PTE_MT_WRITETHROUGH,
+		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITETHROUGH),
+	}, {
+		.policy		= "writeback",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WB,
+		.pte		= L_PTE_MT_WRITEBACK,
+		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITEBACK),
+	}, {
+		.policy		= "writealloc",
+		.cr_mask	= 0,
+		.pmd		= PMD_SECT_WBWA,
+		.pte		= L_PTE_MT_WRITEALLOC,
+		.pte_s2		= s2_policy(L_PTE_S2_MT_WRITEBACK),
+	}
+};
+
+#ifdef CONFIG_CPU_CP15
+static unsigned long initial_pmd_value __initdata = 0;
+
+/*
+ * Initialise the cache_policy variable with the initial state specified
+ * via the "pmd" value.  This is used to ensure that on ARMv6 and later,
+ * the C code sets the page tables up with the same policy as the head
+ * assembly code, which avoids an illegal state where the TLBs can get
+ * confused.  See comments in early_cachepolicy() for more information.
+ */
+void __init init_default_cache_policy(unsigned long pmd)
+{
+	int i;
+
+	initial_pmd_value = pmd;
+
+	pmd &= PMD_SECT_CACHE_MASK;
+
+	for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
+		if (cache_policies[i].pmd == pmd) {
+			cachepolicy = i;
+			break;
+		}
+
+	if (i == ARRAY_SIZE(cache_policies))
+		pr_err("ERROR: could not find cache policy\n");
+}
+
+/*
+ * These are useful for identifying cache coherency problems by allowing
+ * the cache or the cache and writebuffer to be turned off.  (Note: the
+ * write buffer should not be on and the cache off).
+ */
+static int __init early_cachepolicy(char *p)
+{
+	int i, selected = -1;
+
+	for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
+		int len = strlen(cache_policies[i].policy);
+
+		if (memcmp(p, cache_policies[i].policy, len) == 0) {
+			selected = i;
+			break;
+		}
+	}
+
+	if (selected == -1)
+		pr_err("ERROR: unknown or unsupported cache policy\n");
+
+	/*
+	 * This restriction is partly to do with the way we boot; it is
+	 * unpredictable to have memory mapped using two different sets of
+	 * memory attributes (shared, type, and cache attribs).  We can not
+	 * change these attributes once the initial assembly has setup the
+	 * page tables.
+	 */
+	if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) {
+		pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n",
+			cache_policies[cachepolicy].policy);
+		return 0;
+	}
+
+	if (selected != cachepolicy) {
+		unsigned long cr = __clear_cr(cache_policies[selected].cr_mask);
+		cachepolicy = selected;
+		flush_cache_all();
+		set_cr(cr);
+	}
+	return 0;
+}
+early_param("cachepolicy", early_cachepolicy);
+
+static int __init early_nocache(char *__unused)
+{
+	char *p = "buffered";
+	pr_warn("nocache is deprecated; use cachepolicy=%s\n", p);
+	early_cachepolicy(p);
+	return 0;
+}
+early_param("nocache", early_nocache);
+
+static int __init early_nowrite(char *__unused)
+{
+	char *p = "uncached";
+	pr_warn("nowb is deprecated; use cachepolicy=%s\n", p);
+	early_cachepolicy(p);
+	return 0;
+}
+early_param("nowb", early_nowrite);
+
+#ifndef CONFIG_ARM_LPAE
+static int __init early_ecc(char *p)
+{
+	if (memcmp(p, "on", 2) == 0)
+		ecc_mask = PMD_PROTECTION;
+	else if (memcmp(p, "off", 3) == 0)
+		ecc_mask = 0;
+	return 0;
+}
+early_param("ecc", early_ecc);
+#endif
+
+#else /* ifdef CONFIG_CPU_CP15 */
+
+static int __init early_cachepolicy(char *p)
+{
+	pr_warn("cachepolicy kernel parameter not supported without cp15\n");
+}
+early_param("cachepolicy", early_cachepolicy);
+
+static int __init noalign_setup(char *__unused)
+{
+	pr_warn("noalign kernel parameter not supported without cp15\n");
+}
+__setup("noalign", noalign_setup);
+
+#endif /* ifdef CONFIG_CPU_CP15 / else */
+
+#define PROT_PTE_DEVICE		L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_XN
+#define PROT_PTE_S2_DEVICE	PROT_PTE_DEVICE
+#define PROT_SECT_DEVICE	PMD_TYPE_SECT|PMD_SECT_AP_WRITE
+
+static struct mem_type mem_types[] __ro_after_init = {
+	[MT_DEVICE] = {		  /* Strongly ordered / ARMv6 shared device */
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
+				  L_PTE_SHARED,
+		.prot_pte_s2	= s2_policy(PROT_PTE_S2_DEVICE) |
+				  s2_policy(L_PTE_S2_MT_DEV_SHARED) |
+				  L_PTE_SHARED,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_S,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PROT_SECT_DEVICE,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_DEVICE_CACHED] = {	  /* ioremap_cached */
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PROT_SECT_DEVICE | PMD_SECT_WB,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_DEVICE_WC] = {	/* ioremap_wc */
+		.prot_pte	= PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PROT_SECT_DEVICE,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_UNCACHED] = {
+		.prot_pte	= PROT_PTE_DEVICE,
+		.prot_l1	= PMD_TYPE_TABLE,
+		.prot_sect	= PMD_TYPE_SECT | PMD_SECT_XN,
+		.domain		= DOMAIN_IO,
+	},
+	[MT_CACHECLEAN] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
+		.domain    = DOMAIN_KERNEL,
+	},
+#ifndef CONFIG_ARM_LPAE
+	[MT_MINICLEAN] = {
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
+		.domain    = DOMAIN_KERNEL,
+	},
+#endif
+	[MT_LOW_VECTORS] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_RDONLY,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_VECTORS,
+	},
+	[MT_HIGH_VECTORS] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_USER | L_PTE_RDONLY,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_VECTORS,
+	},
+	[MT_MEMORY_RWX] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_RW] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+			     L_PTE_XN,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_ROM] = {
+		.prot_sect = PMD_TYPE_SECT,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_RWX_NONCACHED] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_MT_BUFFERABLE,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_RW_DTCM] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_XN,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_RWX_ITCM] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_RW_SO] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_MT_UNCACHED | L_PTE_XN,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_S |
+				PMD_SECT_UNCACHED | PMD_SECT_XN,
+		.domain    = DOMAIN_KERNEL,
+	},
+	[MT_MEMORY_DMA_READY] = {
+		.prot_pte  = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+				L_PTE_XN,
+		.prot_l1   = PMD_TYPE_TABLE,
+		.domain    = DOMAIN_KERNEL,
+	},
+};
+
+const struct mem_type *get_mem_type(unsigned int type)
+{
+	return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
+}
+EXPORT_SYMBOL(get_mem_type);
+
+static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr);
+
+static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS]
+	__aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata;
+
+static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr)
+{
+	return &bm_pte[pte_index(addr)];
+}
+
+static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr)
+{
+	return pte_offset_kernel(dir, addr);
+}
+
+static inline pmd_t * __init fixmap_pmd(unsigned long addr)
+{
+	pgd_t *pgd = pgd_offset_k(addr);
+	pud_t *pud = pud_offset(pgd, addr);
+	pmd_t *pmd = pmd_offset(pud, addr);
+
+	return pmd;
+}
+
+void __init early_fixmap_init(void)
+{
+	pmd_t *pmd;
+
+	/*
+	 * The early fixmap range spans multiple pmds, for which
+	 * we are not prepared:
+	 */
+	BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)
+		     != FIXADDR_TOP >> PMD_SHIFT);
+
+	pmd = fixmap_pmd(FIXADDR_TOP);
+	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
+	pte_offset_fixmap = pte_offset_early_fixmap;
+}
+
+/*
+ * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range().
+ * As a result, this can only be called with preemption disabled, as under
+ * stop_machine().
+ */
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
+{
+	unsigned long vaddr = __fix_to_virt(idx);
+	pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr);
+
+	/* Make sure fixmap region does not exceed available allocation. */
+	BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) >
+		     FIXADDR_END);
+	BUG_ON(idx >= __end_of_fixed_addresses);
+
+	/* we only support device mappings until pgprot_kernel has been set */
+	if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
+		    pgprot_val(pgprot_kernel) == 0))
+		return;
+
+	if (pgprot_val(prot))
+		set_pte_at(NULL, vaddr, pte,
+			pfn_pte(phys >> PAGE_SHIFT, prot));
+	else
+		pte_clear(NULL, vaddr, pte);
+	local_flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+}
+
+/*
+ * Adjust the PMD section entries according to the CPU in use.
+ */
+static void __init build_mem_type_table(void)
+{
+	struct cachepolicy *cp;
+	unsigned int cr = get_cr();
+	pteval_t user_pgprot, kern_pgprot, vecs_pgprot;
+	pteval_t hyp_device_pgprot, s2_pgprot, s2_device_pgprot;
+	int cpu_arch = cpu_architecture();
+	int i;
+
+	if (cpu_arch < CPU_ARCH_ARMv6) {
+#if defined(CONFIG_CPU_DCACHE_DISABLE)
+		if (cachepolicy > CPOLICY_BUFFERED)
+			cachepolicy = CPOLICY_BUFFERED;
+#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
+		if (cachepolicy > CPOLICY_WRITETHROUGH)
+			cachepolicy = CPOLICY_WRITETHROUGH;
+#endif
+	}
+	if (cpu_arch < CPU_ARCH_ARMv5) {
+		if (cachepolicy >= CPOLICY_WRITEALLOC)
+			cachepolicy = CPOLICY_WRITEBACK;
+		ecc_mask = 0;
+	}
+
+	if (is_smp()) {
+		if (cachepolicy != CPOLICY_WRITEALLOC) {
+			pr_warn("Forcing write-allocate cache policy for SMP\n");
+			cachepolicy = CPOLICY_WRITEALLOC;
+		}
+		if (!(initial_pmd_value & PMD_SECT_S)) {
+			pr_warn("Forcing shared mappings for SMP\n");
+			initial_pmd_value |= PMD_SECT_S;
+		}
+	}
+
+	/*
+	 * Strip out features not present on earlier architectures.
+	 * Pre-ARMv5 CPUs don't have TEX bits.  Pre-ARMv6 CPUs or those
+	 * without extended page tables don't have the 'Shared' bit.
+	 */
+	if (cpu_arch < CPU_ARCH_ARMv5)
+		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
+			mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
+	if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
+		for (i = 0; i < ARRAY_SIZE(mem_types); i++)
+			mem_types[i].prot_sect &= ~PMD_SECT_S;
+
+	/*
+	 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
+	 * "update-able on write" bit on ARM610).  However, Xscale and
+	 * Xscale3 require this bit to be cleared.
+	 */
+	if (cpu_is_xscale_family()) {
+		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+			mem_types[i].prot_sect &= ~PMD_BIT4;
+			mem_types[i].prot_l1 &= ~PMD_BIT4;
+		}
+	} else if (cpu_arch < CPU_ARCH_ARMv6) {
+		for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+			if (mem_types[i].prot_l1)
+				mem_types[i].prot_l1 |= PMD_BIT4;
+			if (mem_types[i].prot_sect)
+				mem_types[i].prot_sect |= PMD_BIT4;
+		}
+	}
+
+	/*
+	 * Mark the device areas according to the CPU/architecture.
+	 */
+	if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
+		if (!cpu_is_xsc3()) {
+			/*
+			 * Mark device regions on ARMv6+ as execute-never
+			 * to prevent speculative instruction fetches.
+			 */
+			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
+			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
+			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
+			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
+
+			/* Also setup NX memory mapping */
+			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
+		}
+		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+			/*
+			 * For ARMv7 with TEX remapping,
+			 * - shared device is SXCB=1100
+			 * - nonshared device is SXCB=0100
+			 * - write combine device mem is SXCB=0001
+			 * (Uncached Normal memory)
+			 */
+			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
+			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
+			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+		} else if (cpu_is_xsc3()) {
+			/*
+			 * For Xscale3,
+			 * - shared device is TEXCB=00101
+			 * - nonshared device is TEXCB=01000
+			 * - write combine device mem is TEXCB=00100
+			 * (Inner/Outer Uncacheable in xsc3 parlance)
+			 */
+			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
+			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
+			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
+		} else {
+			/*
+			 * For ARMv6 and ARMv7 without TEX remapping,
+			 * - shared device is TEXCB=00001
+			 * - nonshared device is TEXCB=01000
+			 * - write combine device mem is TEXCB=00100
+			 * (Uncached Normal in ARMv6 parlance).
+			 */
+			mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
+			mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
+			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
+		}
+	} else {
+		/*
+		 * On others, write combining is "Uncached/Buffered"
+		 */
+		mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
+	}
+
+	/*
+	 * Now deal with the memory-type mappings
+	 */
+	cp = &cache_policies[cachepolicy];
+	vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
+	s2_pgprot = cp->pte_s2;
+	hyp_device_pgprot = mem_types[MT_DEVICE].prot_pte;
+	s2_device_pgprot = mem_types[MT_DEVICE].prot_pte_s2;
+
+#ifndef CONFIG_ARM_LPAE
+	/*
+	 * We don't use domains on ARMv6 (since this causes problems with
+	 * v6/v7 kernels), so we must use a separate memory type for user
+	 * r/o, kernel r/w to map the vectors page.
+	 */
+	if (cpu_arch == CPU_ARCH_ARMv6)
+		vecs_pgprot |= L_PTE_MT_VECTORS;
+
+	/*
+	 * Check is it with support for the PXN bit
+	 * in the Short-descriptor translation table format descriptors.
+	 */
+	if (cpu_arch == CPU_ARCH_ARMv7 &&
+		(read_cpuid_ext(CPUID_EXT_MMFR0) & 0xF) >= 4) {
+		user_pmd_table |= PMD_PXNTABLE;
+	}
+#endif
+
+	/*
+	 * ARMv6 and above have extended page tables.
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
+#ifndef CONFIG_ARM_LPAE
+		/*
+		 * Mark cache clean areas and XIP ROM read only
+		 * from SVC mode and no access from userspace.
+		 */
+		mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+#endif
+
+		/*
+		 * If the initial page tables were created with the S bit
+		 * set, then we need to do the same here for the same
+		 * reasons given in early_cachepolicy().
+		 */
+		if (initial_pmd_value & PMD_SECT_S) {
+			user_pgprot |= L_PTE_SHARED;
+			kern_pgprot |= L_PTE_SHARED;
+			vecs_pgprot |= L_PTE_SHARED;
+			s2_pgprot |= L_PTE_SHARED;
+			mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S;
+			mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S;
+			mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RWX].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
+		}
+	}
+
+	/*
+	 * Non-cacheable Normal - intended for memory areas that must
+	 * not cause dirty cache line writebacks when used
+	 */
+	if (cpu_arch >= CPU_ARCH_ARMv6) {
+		if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
+			/* Non-cacheable Normal is XCB = 001 */
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
+				PMD_SECT_BUFFERED;
+		} else {
+			/* For both ARMv6 and non-TEX-remapping ARMv7 */
+			mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |=
+				PMD_SECT_TEX(1);
+		}
+	} else {
+		mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
+	}
+
+#ifdef CONFIG_ARM_LPAE
+	/*
+	 * Do not generate access flag faults for the kernel mappings.
+	 */
+	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+		mem_types[i].prot_pte |= PTE_EXT_AF;
+		if (mem_types[i].prot_sect)
+			mem_types[i].prot_sect |= PMD_SECT_AF;
+	}
+	kern_pgprot |= PTE_EXT_AF;
+	vecs_pgprot |= PTE_EXT_AF;
+
+	/*
+	 * Set PXN for user mappings
+	 */
+	user_pgprot |= PTE_EXT_PXN;
+#endif
+
+	for (i = 0; i < 16; i++) {
+		pteval_t v = pgprot_val(protection_map[i]);
+		protection_map[i] = __pgprot(v | user_pgprot);
+	}
+
+	mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
+	mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
+
+	pgprot_user   = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
+	pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
+				 L_PTE_DIRTY | kern_pgprot);
+	pgprot_s2  = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | s2_pgprot);
+	pgprot_s2_device  = __pgprot(s2_device_pgprot);
+	pgprot_hyp_device  = __pgprot(hyp_device_pgprot);
+
+	mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
+	mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
+	mem_types[MT_MEMORY_RWX].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
+	mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
+	mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
+	mem_types[MT_ROM].prot_sect |= cp->pmd;
+
+	switch (cp->pmd) {
+	case PMD_SECT_WT:
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
+		break;
+	case PMD_SECT_WB:
+	case PMD_SECT_WBWA:
+		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
+		break;
+	}
+	pr_info("Memory policy: %sData cache %s\n",
+		ecc_mask ? "ECC enabled, " : "", cp->policy);
+
+	for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
+		struct mem_type *t = &mem_types[i];
+		if (t->prot_l1)
+			t->prot_l1 |= PMD_DOMAIN(t->domain);
+		if (t->prot_sect)
+			t->prot_sect |= PMD_DOMAIN(t->domain);
+	}
+}
+
+#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+			      unsigned long size, pgprot_t vma_prot)
+{
+	if (!pfn_valid(pfn))
+		return pgprot_noncached(vma_prot);
+	else if (file->f_flags & O_SYNC)
+		return pgprot_writecombine(vma_prot);
+	return vma_prot;
+}
+EXPORT_SYMBOL(phys_mem_access_prot);
+#endif
+
+#define vectors_base()	(vectors_high() ? 0xffff0000 : 0)
+
+static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
+{
+	void *ptr = __va(memblock_alloc(sz, align));
+	memset(ptr, 0, sz);
+	return ptr;
+}
+
+static void __init *early_alloc(unsigned long sz)
+{
+	return early_alloc_aligned(sz, sz);
+}
+
+static void *__init late_alloc(unsigned long sz)
+{
+	void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
+
+	if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+		BUG();
+	return ptr;
+}
+
+static pte_t * __init arm_pte_alloc(pmd_t *pmd, unsigned long addr,
+				unsigned long prot,
+				void *(*alloc)(unsigned long sz))
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte = alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE);
+		__pmd_populate(pmd, __pa(pte), prot);
+	}
+	BUG_ON(pmd_bad(*pmd));
+	return pte_offset_kernel(pmd, addr);
+}
+
+static pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr,
+				      unsigned long prot)
+{
+	return arm_pte_alloc(pmd, addr, prot, early_alloc);
+}
+
+static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
+				  unsigned long end, unsigned long pfn,
+				  const struct mem_type *type,
+				  void *(*alloc)(unsigned long sz),
+				  bool ng)
+{
+	pte_t *pte = arm_pte_alloc(pmd, addr, type->prot_l1, alloc);
+	do {
+		set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)),
+			    ng ? PTE_EXT_NG : 0);
+		pfn++;
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+}
+
+static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
+			unsigned long end, phys_addr_t phys,
+			const struct mem_type *type, bool ng)
+{
+	pmd_t *p = pmd;
+
+#ifndef CONFIG_ARM_LPAE
+	/*
+	 * In classic MMU format, puds and pmds are folded in to
+	 * the pgds. pmd_offset gives the PGD entry. PGDs refer to a
+	 * group of L1 entries making up one logical pointer to
+	 * an L2 table (2MB), where as PMDs refer to the individual
+	 * L1 entries (1MB). Hence increment to get the correct
+	 * offset for odd 1MB sections.
+	 * (See arch/arm/include/asm/pgtable-2level.h)
+	 */
+	if (addr & SECTION_SIZE)
+		pmd++;
+#endif
+	do {
+		*pmd = __pmd(phys | type->prot_sect | (ng ? PMD_SECT_nG : 0));
+		phys += SECTION_SIZE;
+	} while (pmd++, addr += SECTION_SIZE, addr != end);
+
+	flush_pmd_entry(p);
+}
+
+static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
+				      unsigned long end, phys_addr_t phys,
+				      const struct mem_type *type,
+				      void *(*alloc)(unsigned long sz), bool ng)
+{
+	pmd_t *pmd = pmd_offset(pud, addr);
+	unsigned long next;
+
+	do {
+		/*
+		 * With LPAE, we must loop over to map
+		 * all the pmds for the given range.
+		 */
+		next = pmd_addr_end(addr, end);
+
+		/*
+		 * Try a section mapping - addr, next and phys must all be
+		 * aligned to a section boundary.
+		 */
+		if (type->prot_sect &&
+				((addr | next | phys) & ~SECTION_MASK) == 0) {
+			__map_init_section(pmd, addr, next, phys, type, ng);
+		} else {
+			alloc_init_pte(pmd, addr, next,
+				       __phys_to_pfn(phys), type, alloc, ng);
+		}
+
+		phys += next - addr;
+
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
+				  unsigned long end, phys_addr_t phys,
+				  const struct mem_type *type,
+				  void *(*alloc)(unsigned long sz), bool ng)
+{
+	pud_t *pud = pud_offset(pgd, addr);
+	unsigned long next;
+
+	do {
+		next = pud_addr_end(addr, end);
+		alloc_init_pmd(pud, addr, next, phys, type, alloc, ng);
+		phys += next - addr;
+	} while (pud++, addr = next, addr != end);
+}
+
+#ifndef CONFIG_ARM_LPAE
+static void __init create_36bit_mapping(struct mm_struct *mm,
+					struct map_desc *md,
+					const struct mem_type *type,
+					bool ng)
+{
+	unsigned long addr, length, end;
+	phys_addr_t phys;
+	pgd_t *pgd;
+
+	addr = md->virtual;
+	phys = __pfn_to_phys(md->pfn);
+	length = PAGE_ALIGN(md->length);
+
+	if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
+		pr_err("MM: CPU does not support supersection mapping for 0x%08llx at 0x%08lx\n",
+		       (long long)__pfn_to_phys((u64)md->pfn), addr);
+		return;
+	}
+
+	/* N.B.	ARMv6 supersections are only defined to work with domain 0.
+	 *	Since domain assignments can in fact be arbitrary, the
+	 *	'domain == 0' check below is required to insure that ARMv6
+	 *	supersections are only allocated for domain 0 regardless
+	 *	of the actual domain assignments in use.
+	 */
+	if (type->domain) {
+		pr_err("MM: invalid domain in supersection mapping for 0x%08llx at 0x%08lx\n",
+		       (long long)__pfn_to_phys((u64)md->pfn), addr);
+		return;
+	}
+
+	if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
+		pr_err("MM: cannot create mapping for 0x%08llx at 0x%08lx invalid alignment\n",
+		       (long long)__pfn_to_phys((u64)md->pfn), addr);
+		return;
+	}
+
+	/*
+	 * Shift bits [35:32] of address into bits [23:20] of PMD
+	 * (See ARMv6 spec).
+	 */
+	phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
+
+	pgd = pgd_offset(mm, addr);
+	end = addr + length;
+	do {
+		pud_t *pud = pud_offset(pgd, addr);
+		pmd_t *pmd = pmd_offset(pud, addr);
+		int i;
+
+		for (i = 0; i < 16; i++)
+			*pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER |
+				       (ng ? PMD_SECT_nG : 0));
+
+		addr += SUPERSECTION_SIZE;
+		phys += SUPERSECTION_SIZE;
+		pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
+	} while (addr != end);
+}
+#endif	/* !CONFIG_ARM_LPAE */
+
+static void __init __create_mapping(struct mm_struct *mm, struct map_desc *md,
+				    void *(*alloc)(unsigned long sz),
+				    bool ng)
+{
+	unsigned long addr, length, end;
+	phys_addr_t phys;
+	const struct mem_type *type;
+	pgd_t *pgd;
+
+	type = &mem_types[md->type];
+
+#ifndef CONFIG_ARM_LPAE
+	/*
+	 * Catch 36-bit addresses
+	 */
+	if (md->pfn >= 0x100000) {
+		create_36bit_mapping(mm, md, type, ng);
+		return;
+	}
+#endif
+
+	addr = md->virtual & PAGE_MASK;
+	phys = __pfn_to_phys(md->pfn);
+	length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
+
+	if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
+		pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
+			(long long)__pfn_to_phys(md->pfn), addr);
+		return;
+	}
+
+	pgd = pgd_offset(mm, addr);
+	end = addr + length;
+	do {
+		unsigned long next = pgd_addr_end(addr, end);
+
+		alloc_init_pud(pgd, addr, next, phys, type, alloc, ng);
+
+		phys += next - addr;
+		addr = next;
+	} while (pgd++, addr != end);
+}
+
+/*
+ * Create the page directory entries and any necessary
+ * page tables for the mapping specified by `md'.  We
+ * are able to cope here with varying sizes and address
+ * offsets, and we take full advantage of sections and
+ * supersections.
+ */
+static void __init create_mapping(struct map_desc *md)
+{
+	if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
+		pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+		return;
+	}
+
+	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
+	    md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
+	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
+		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
+			(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
+	}
+
+	__create_mapping(&init_mm, md, early_alloc, false);
+}
+
+void __init create_mapping_late(struct mm_struct *mm, struct map_desc *md,
+				bool ng)
+{
+#ifdef CONFIG_ARM_LPAE
+	pud_t *pud = pud_alloc(mm, pgd_offset(mm, md->virtual), md->virtual);
+	if (WARN_ON(!pud))
+		return;
+	pmd_alloc(mm, pud, 0);
+#endif
+	__create_mapping(mm, md, late_alloc, ng);
+}
+
+/*
+ * Create the architecture specific mappings
+ */
+void __init iotable_init(struct map_desc *io_desc, int nr)
+{
+	struct map_desc *md;
+	struct vm_struct *vm;
+	struct static_vm *svm;
+
+	if (!nr)
+		return;
+
+	svm = early_alloc_aligned(sizeof(*svm) * nr, __alignof__(*svm));
+
+	for (md = io_desc; nr; md++, nr--) {
+		create_mapping(md);
+
+		vm = &svm->vm;
+		vm->addr = (void *)(md->virtual & PAGE_MASK);
+		vm->size = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
+		vm->phys_addr = __pfn_to_phys(md->pfn);
+		vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+		vm->flags |= VM_ARM_MTYPE(md->type);
+		vm->caller = iotable_init;
+		add_static_vm_early(svm++);
+	}
+}
+
+void __init vm_reserve_area_early(unsigned long addr, unsigned long size,
+				  void *caller)
+{
+	struct vm_struct *vm;
+	struct static_vm *svm;
+
+	svm = early_alloc_aligned(sizeof(*svm), __alignof__(*svm));
+
+	vm = &svm->vm;
+	vm->addr = (void *)addr;
+	vm->size = size;
+	vm->flags = VM_IOREMAP | VM_ARM_EMPTY_MAPPING;
+	vm->caller = caller;
+	add_static_vm_early(svm);
+}
+
+#ifndef CONFIG_ARM_LPAE
+
+/*
+ * The Linux PMD is made of two consecutive section entries covering 2MB
+ * (see definition in include/asm/pgtable-2level.h).  However a call to
+ * create_mapping() may optimize static mappings by using individual
+ * 1MB section mappings.  This leaves the actual PMD potentially half
+ * initialized if the top or bottom section entry isn't used, leaving it
+ * open to problems if a subsequent ioremap() or vmalloc() tries to use
+ * the virtual space left free by that unused section entry.
+ *
+ * Let's avoid the issue by inserting dummy vm entries covering the unused
+ * PMD halves once the static mappings are in place.
+ */
+
+static void __init pmd_empty_section_gap(unsigned long addr)
+{
+	vm_reserve_area_early(addr, SECTION_SIZE, pmd_empty_section_gap);
+}
+
+static void __init fill_pmd_gaps(void)
+{
+	struct static_vm *svm;
+	struct vm_struct *vm;
+	unsigned long addr, next = 0;
+	pmd_t *pmd;
+
+	list_for_each_entry(svm, &static_vmlist, list) {
+		vm = &svm->vm;
+		addr = (unsigned long)vm->addr;
+		if (addr < next)
+			continue;
+
+		/*
+		 * Check if this vm starts on an odd section boundary.
+		 * If so and the first section entry for this PMD is free
+		 * then we block the corresponding virtual address.
+		 */
+		if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+			pmd = pmd_off_k(addr);
+			if (pmd_none(*pmd))
+				pmd_empty_section_gap(addr & PMD_MASK);
+		}
+
+		/*
+		 * Then check if this vm ends on an odd section boundary.
+		 * If so and the second section entry for this PMD is empty
+		 * then we block the corresponding virtual address.
+		 */
+		addr += vm->size;
+		if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+			pmd = pmd_off_k(addr) + 1;
+			if (pmd_none(*pmd))
+				pmd_empty_section_gap(addr);
+		}
+
+		/* no need to look at any vm entry until we hit the next PMD */
+		next = (addr + PMD_SIZE - 1) & PMD_MASK;
+	}
+}
+
+#else
+#define fill_pmd_gaps() do { } while (0)
+#endif
+
+#if defined(CONFIG_PCI) && !defined(CONFIG_NEED_MACH_IO_H)
+static void __init pci_reserve_io(void)
+{
+	struct static_vm *svm;
+
+	svm = find_static_vm_vaddr((void *)PCI_IO_VIRT_BASE);
+	if (svm)
+		return;
+
+	vm_reserve_area_early(PCI_IO_VIRT_BASE, SZ_2M, pci_reserve_io);
+}
+#else
+#define pci_reserve_io() do { } while (0)
+#endif
+
+#ifdef CONFIG_DEBUG_LL
+void __init debug_ll_io_init(void)
+{
+	struct map_desc map;
+
+	debug_ll_addr(&map.pfn, &map.virtual);
+	if (!map.pfn || !map.virtual)
+		return;
+	map.pfn = __phys_to_pfn(map.pfn);
+	map.virtual &= PAGE_MASK;
+	map.length = PAGE_SIZE;
+	map.type = MT_DEVICE;
+	iotable_init(&map, 1);
+}
+#endif
+
+static void * __initdata vmalloc_min =
+	(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
+
+/*
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
+ * bytes. This can be used to increase (or decrease) the vmalloc
+ * area - the default is 240m.
+ */
+static int __init early_vmalloc(char *arg)
+{
+	unsigned long vmalloc_reserve = memparse(arg, NULL);
+
+	if (vmalloc_reserve < SZ_16M) {
+		vmalloc_reserve = SZ_16M;
+		pr_warn("vmalloc area too small, limiting to %luMB\n",
+			vmalloc_reserve >> 20);
+	}
+
+	if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
+		vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
+		pr_warn("vmalloc area is too big, limiting to %luMB\n",
+			vmalloc_reserve >> 20);
+	}
+
+	vmalloc_min = (void *)(VMALLOC_END - vmalloc_reserve);
+	return 0;
+}
+early_param("vmalloc", early_vmalloc);
+
+phys_addr_t arm_lowmem_limit __initdata = 0;
+
+void __init adjust_lowmem_bounds(void)
+{
+	phys_addr_t memblock_limit = 0;
+	u64 vmalloc_limit;
+	struct memblock_region *reg;
+	phys_addr_t lowmem_limit = 0;
+
+	/*
+	 * Let's use our own (unoptimized) equivalent of __pa() that is
+	 * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
+	 * The result is used as the upper bound on physical memory address
+	 * and may itself be outside the valid range for which phys_addr_t
+	 * and therefore __pa() is defined.
+	 */
+	vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
+
+	for_each_memblock(memory, reg) {
+		phys_addr_t block_start = reg->base;
+		phys_addr_t block_end = reg->base + reg->size;
+
+		if (reg->base < vmalloc_limit) {
+			if (block_end > lowmem_limit)
+				/*
+				 * Compare as u64 to ensure vmalloc_limit does
+				 * not get truncated. block_end should always
+				 * fit in phys_addr_t so there should be no
+				 * issue with assignment.
+				 */
+				lowmem_limit = min_t(u64,
+							 vmalloc_limit,
+							 block_end);
+
+			/*
+			 * Find the first non-pmd-aligned page, and point
+			 * memblock_limit at it. This relies on rounding the
+			 * limit down to be pmd-aligned, which happens at the
+			 * end of this function.
+			 *
+			 * With this algorithm, the start or end of almost any
+			 * bank can be non-pmd-aligned. The only exception is
+			 * that the start of the bank 0 must be section-
+			 * aligned, since otherwise memory would need to be
+			 * allocated when mapping the start of bank 0, which
+			 * occurs before any free memory is mapped.
+			 */
+			if (!memblock_limit) {
+				if (!IS_ALIGNED(block_start, PMD_SIZE))
+					memblock_limit = block_start;
+				else if (!IS_ALIGNED(block_end, PMD_SIZE))
+					memblock_limit = lowmem_limit;
+			}
+
+		}
+	}
+
+	arm_lowmem_limit = lowmem_limit;
+
+	high_memory = __va(arm_lowmem_limit - 1) + 1;
+
+	if (!memblock_limit)
+		memblock_limit = arm_lowmem_limit;
+
+	/*
+	 * Round the memblock limit down to a pmd size.  This
+	 * helps to ensure that we will allocate memory from the
+	 * last full pmd, which should be mapped.
+	 */
+	memblock_limit = round_down(memblock_limit, PMD_SIZE);
+
+	if (!IS_ENABLED(CONFIG_HIGHMEM) || cache_is_vipt_aliasing()) {
+		if (memblock_end_of_DRAM() > arm_lowmem_limit) {
+			phys_addr_t end = memblock_end_of_DRAM();
+
+			pr_notice("Ignoring RAM at %pa-%pa\n",
+				  &memblock_limit, &end);
+			pr_notice("Consider using a HIGHMEM enabled kernel.\n");
+
+			memblock_remove(memblock_limit, end - memblock_limit);
+		}
+	}
+
+	memblock_set_current_limit(memblock_limit);
+}
+
+static inline void prepare_page_table(void)
+{
+	unsigned long addr;
+	phys_addr_t end;
+
+	/*
+	 * Clear out all the mappings below the kernel image.
+	 */
+	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+
+#ifdef CONFIG_XIP_KERNEL
+	/* The XIP kernel is mapped in the module area -- skip over it */
+	addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK;
+#endif
+	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+
+	/*
+	 * Find the end of the first block of lowmem.
+	 */
+	end = memblock.memory.regions[0].base + memblock.memory.regions[0].size;
+	if (end >= arm_lowmem_limit)
+		end = arm_lowmem_limit;
+
+	/*
+	 * Clear out all the kernel space mappings, except for the first
+	 * memory bank, up to the vmalloc region.
+	 */
+	for (addr = __phys_to_virt(end);
+	     addr < VMALLOC_START; addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+}
+
+#ifdef CONFIG_ARM_LPAE
+/* the first page is reserved for pgd */
+#define SWAPPER_PG_DIR_SIZE	(PAGE_SIZE + \
+				 PTRS_PER_PGD * PTRS_PER_PMD * sizeof(pmd_t))
+#else
+#define SWAPPER_PG_DIR_SIZE	(PTRS_PER_PGD * sizeof(pgd_t))
+#endif
+
+/*
+ * Reserve the special regions of memory
+ */
+void __init arm_mm_memblock_reserve(void)
+{
+	/*
+	 * Reserve the page tables.  These are already in use,
+	 * and can only be in node 0.
+	 */
+	memblock_reserve(__pa(swapper_pg_dir), SWAPPER_PG_DIR_SIZE);
+
+#ifdef CONFIG_SA1111
+	/*
+	 * Because of the SA1111 DMA bug, we want to preserve our
+	 * precious DMA-able memory...
+	 */
+	memblock_reserve(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET);
+#endif
+}
+
+/*
+ * Set up the device mappings.  Since we clear out the page tables for all
+ * mappings above VMALLOC_START, except early fixmap, we might remove debug
+ * device mappings.  This means earlycon can be used to debug this function
+ * Any other function or debugging method which may touch any device _will_
+ * crash the kernel.
+ */
+static void __init devicemaps_init(const struct machine_desc *mdesc)
+{
+	struct map_desc map;
+	unsigned long addr;
+	void *vectors;
+
+	/*
+	 * Allocate the vector page early.
+	 */
+	vectors = early_alloc(PAGE_SIZE * 2);
+
+	early_trap_init(vectors);
+
+	/*
+	 * Clear page table except top pmd used by early fixmaps
+	 */
+	for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+
+	/*
+	 * Map the kernel if it is XIP.
+	 * It is always first in the modulearea.
+	 */
+#ifdef CONFIG_XIP_KERNEL
+	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
+	map.virtual = MODULES_VADDR;
+	map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+	map.type = MT_ROM;
+	create_mapping(&map);
+#endif
+
+	/*
+	 * Map the cache flushing regions.
+	 */
+#ifdef FLUSH_BASE
+	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
+	map.virtual = FLUSH_BASE;
+	map.length = SZ_1M;
+	map.type = MT_CACHECLEAN;
+	create_mapping(&map);
+#endif
+#ifdef FLUSH_BASE_MINICACHE
+	map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
+	map.virtual = FLUSH_BASE_MINICACHE;
+	map.length = SZ_1M;
+	map.type = MT_MINICLEAN;
+	create_mapping(&map);
+#endif
+
+	/*
+	 * Create a mapping for the machine vectors at the high-vectors
+	 * location (0xffff0000).  If we aren't using high-vectors, also
+	 * create a mapping at the low-vectors virtual address.
+	 */
+	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
+	map.virtual = 0xffff0000;
+	map.length = PAGE_SIZE;
+#ifdef CONFIG_KUSER_HELPERS
+	map.type = MT_HIGH_VECTORS;
+#else
+	map.type = MT_LOW_VECTORS;
+#endif
+	create_mapping(&map);
+
+	if (!vectors_high()) {
+		map.virtual = 0;
+		map.length = PAGE_SIZE * 2;
+		map.type = MT_LOW_VECTORS;
+		create_mapping(&map);
+	}
+
+	/* Now create a kernel read-only mapping */
+	map.pfn += 1;
+	map.virtual = 0xffff0000 + PAGE_SIZE;
+	map.length = PAGE_SIZE;
+	map.type = MT_LOW_VECTORS;
+	create_mapping(&map);
+
+	/*
+	 * Ask the machine support to map in the statically mapped devices.
+	 */
+	if (mdesc->map_io)
+		mdesc->map_io();
+	else
+		debug_ll_io_init();
+	fill_pmd_gaps();
+
+	/* Reserve fixed i/o space in VMALLOC region */
+	pci_reserve_io();
+
+	/*
+	 * Finally flush the caches and tlb to ensure that we're in a
+	 * consistent state wrt the writebuffer.  This also ensures that
+	 * any write-allocated cache lines in the vector page are written
+	 * back.  After this point, we can start to touch devices again.
+	 */
+	local_flush_tlb_all();
+	flush_cache_all();
+
+	/* Enable asynchronous aborts */
+	early_abt_enable();
+}
+
+static void __init kmap_init(void)
+{
+#ifdef CONFIG_HIGHMEM
+	pkmap_page_table = early_pte_alloc(pmd_off_k(PKMAP_BASE),
+		PKMAP_BASE, _PAGE_KERNEL_TABLE);
+#endif
+
+	early_pte_alloc(pmd_off_k(FIXADDR_START), FIXADDR_START,
+			_PAGE_KERNEL_TABLE);
+}
+
+static void __init map_lowmem(void)
+{
+	struct memblock_region *reg;
+	phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
+	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+
+	/* Map all the lowmem memory banks. */
+	for_each_memblock(memory, reg) {
+		phys_addr_t start = reg->base;
+		phys_addr_t end = start + reg->size;
+		struct map_desc map;
+
+		if (memblock_is_nomap(reg))
+			continue;
+
+		if (end > arm_lowmem_limit)
+			end = arm_lowmem_limit;
+		if (start >= end)
+			break;
+
+		if (end < kernel_x_start) {
+			map.pfn = __phys_to_pfn(start);
+			map.virtual = __phys_to_virt(start);
+			map.length = end - start;
+			map.type = MT_MEMORY_RWX;
+
+			create_mapping(&map);
+		} else if (start >= kernel_x_end) {
+			map.pfn = __phys_to_pfn(start);
+			map.virtual = __phys_to_virt(start);
+			map.length = end - start;
+			map.type = MT_MEMORY_RW;
+
+			create_mapping(&map);
+		} else {
+			/* This better cover the entire kernel */
+			if (start < kernel_x_start) {
+				map.pfn = __phys_to_pfn(start);
+				map.virtual = __phys_to_virt(start);
+				map.length = kernel_x_start - start;
+				map.type = MT_MEMORY_RW;
+
+				create_mapping(&map);
+			}
+
+			map.pfn = __phys_to_pfn(kernel_x_start);
+			map.virtual = __phys_to_virt(kernel_x_start);
+			map.length = kernel_x_end - kernel_x_start;
+			map.type = MT_MEMORY_RWX;
+
+			create_mapping(&map);
+
+			if (kernel_x_end < end) {
+				map.pfn = __phys_to_pfn(kernel_x_end);
+				map.virtual = __phys_to_virt(kernel_x_end);
+				map.length = end - kernel_x_end;
+				map.type = MT_MEMORY_RW;
+
+				create_mapping(&map);
+			}
+		}
+	}
+}
+
+#ifdef CONFIG_ARM_PV_FIXUP
+extern unsigned long __atags_pointer;
+typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
+pgtables_remap lpae_pgtables_remap_asm;
+
+/*
+ * early_paging_init() recreates boot time page table setup, allowing machines
+ * to switch over to a high (>4G) address space on LPAE systems
+ */
+static void __init early_paging_init(const struct machine_desc *mdesc)
+{
+	pgtables_remap *lpae_pgtables_remap;
+	unsigned long pa_pgd;
+	unsigned int cr, ttbcr;
+	long long offset;
+	void *boot_data;
+
+	if (!mdesc->pv_fixup)
+		return;
+
+	offset = mdesc->pv_fixup();
+	if (offset == 0)
+		return;
+
+	/*
+	 * Get the address of the remap function in the 1:1 identity
+	 * mapping setup by the early page table assembly code.  We
+	 * must get this prior to the pv update.  The following barrier
+	 * ensures that this is complete before we fixup any P:V offsets.
+	 */
+	lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
+	pa_pgd = __pa(swapper_pg_dir);
+	boot_data = __va(__atags_pointer);
+	barrier();
+
+	pr_info("Switching physical address space to 0x%08llx\n",
+		(u64)PHYS_OFFSET + offset);
+
+	/* Re-set the phys pfn offset, and the pv offset */
+	__pv_offset += offset;
+	__pv_phys_pfn_offset += PFN_DOWN(offset);
+
+	/* Run the patch stub to update the constants */
+	fixup_pv_table(&__pv_table_begin,
+		(&__pv_table_end - &__pv_table_begin) << 2);
+
+	/*
+	 * We changing not only the virtual to physical mapping, but also
+	 * the physical addresses used to access memory.  We need to flush
+	 * all levels of cache in the system with caching disabled to
+	 * ensure that all data is written back, and nothing is prefetched
+	 * into the caches.  We also need to prevent the TLB walkers
+	 * allocating into the caches too.  Note that this is ARMv7 LPAE
+	 * specific.
+	 */
+	cr = get_cr();
+	set_cr(cr & ~(CR_I | CR_C));
+	asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr));
+	asm volatile("mcr p15, 0, %0, c2, c0, 2"
+		: : "r" (ttbcr & ~(3 << 8 | 3 << 10)));
+	flush_cache_all();
+
+	/*
+	 * Fixup the page tables - this must be in the idmap region as
+	 * we need to disable the MMU to do this safely, and hence it
+	 * needs to be assembly.  It's fairly simple, as we're using the
+	 * temporary tables setup by the initial assembly code.
+	 */
+	lpae_pgtables_remap(offset, pa_pgd, boot_data);
+
+	/* Re-enable the caches and cacheable TLB walks */
+	asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
+	set_cr(cr);
+}
+
+#else
+
+static void __init early_paging_init(const struct machine_desc *mdesc)
+{
+	long long offset;
+
+	if (!mdesc->pv_fixup)
+		return;
+
+	offset = mdesc->pv_fixup();
+	if (offset == 0)
+		return;
+
+	pr_crit("Physical address space modification is only to support Keystone2.\n");
+	pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n");
+	pr_crit("feature. Your kernel may crash now, have a good day.\n");
+	add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
+}
+
+#endif
+
+static void __init early_fixmap_shutdown(void)
+{
+	int i;
+	unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1);
+
+	pte_offset_fixmap = pte_offset_late_fixmap;
+	pmd_clear(fixmap_pmd(va));
+	local_flush_tlb_kernel_page(va);
+
+	for (i = 0; i < __end_of_permanent_fixed_addresses; i++) {
+		pte_t *pte;
+		struct map_desc map;
+
+		map.virtual = fix_to_virt(i);
+		pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual);
+
+		/* Only i/o device mappings are supported ATM */
+		if (pte_none(*pte) ||
+		    (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED)
+			continue;
+
+		map.pfn = pte_pfn(*pte);
+		map.type = MT_DEVICE;
+		map.length = PAGE_SIZE;
+
+		create_mapping(&map);
+	}
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(const struct machine_desc *mdesc)
+{
+	void *zero_page;
+
+	prepare_page_table();
+	map_lowmem();
+	memblock_set_current_limit(arm_lowmem_limit);
+	dma_contiguous_remap();
+	early_fixmap_shutdown();
+	devicemaps_init(mdesc);
+	kmap_init();
+	tcm_init();
+
+	top_pmd = pmd_off_k(0xffff0000);
+
+	/* allocate the zero page. */
+	zero_page = early_alloc(PAGE_SIZE);
+
+	bootmem_init();
+
+	empty_zero_page = virt_to_page(zero_page);
+	__flush_dcache_page(NULL, empty_zero_page);
+
+	/* Compute the virt/idmap offset, mostly for the sake of KVM */
+	kimage_voffset = (unsigned long)&kimage_voffset - virt_to_idmap(&kimage_voffset);
+}
+
+void __init early_mm_init(const struct machine_desc *mdesc)
+{
+	build_mem_type_table();
+	early_paging_init(mdesc);
+}
diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c
new file mode 100644
index 0000000..7d67c70
--- /dev/null
+++ b/arch/arm/mm/nommu.c
@@ -0,0 +1,252 @@
+/*
+ *  linux/arch/arm/mm/nommu.c
+ *
+ * ARM uCLinux supporting functions.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/io.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/sections.h>
+#include <asm/page.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+#include <asm/mach/arch.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+#include <asm/procinfo.h>
+
+#include "mm.h"
+
+unsigned long vectors_base;
+
+#ifdef CONFIG_ARM_MPU
+struct mpu_rgn_info mpu_rgn_info;
+#endif
+
+#ifdef CONFIG_CPU_CP15
+#ifdef CONFIG_CPU_HIGH_VECTOR
+unsigned long setup_vectors_base(void)
+{
+	unsigned long reg = get_cr();
+
+	set_cr(reg | CR_V);
+	return 0xffff0000;
+}
+#else /* CONFIG_CPU_HIGH_VECTOR */
+/* Write exception base address to VBAR */
+static inline void set_vbar(unsigned long val)
+{
+	asm("mcr p15, 0, %0, c12, c0, 0" : : "r" (val) : "cc");
+}
+
+/*
+ * Security extensions, bits[7:4], permitted values,
+ * 0b0000 - not implemented, 0b0001/0b0010 - implemented
+ */
+static inline bool security_extensions_enabled(void)
+{
+	/* Check CPUID Identification Scheme before ID_PFR1 read */
+	if ((read_cpuid_id() & 0x000f0000) == 0x000f0000)
+		return cpuid_feature_extract(CPUID_EXT_PFR1, 4) ||
+			cpuid_feature_extract(CPUID_EXT_PFR1, 20);
+	return 0;
+}
+
+unsigned long setup_vectors_base(void)
+{
+	unsigned long base = 0, reg = get_cr();
+
+	set_cr(reg & ~CR_V);
+	if (security_extensions_enabled()) {
+		if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM))
+			base = CONFIG_DRAM_BASE;
+		set_vbar(base);
+	} else if (IS_ENABLED(CONFIG_REMAP_VECTORS_TO_RAM)) {
+		if (CONFIG_DRAM_BASE != 0)
+			pr_err("Security extensions not enabled, vectors cannot be remapped to RAM, vectors base will be 0x00000000\n");
+	}
+
+	return base;
+}
+#endif /* CONFIG_CPU_HIGH_VECTOR */
+#endif /* CONFIG_CPU_CP15 */
+
+void __init arm_mm_memblock_reserve(void)
+{
+#ifndef CONFIG_CPU_V7M
+	vectors_base = IS_ENABLED(CONFIG_CPU_CP15) ? setup_vectors_base() : 0;
+	/*
+	 * Register the exception vector page.
+	 * some architectures which the DRAM is the exception vector to trap,
+	 * alloc_page breaks with error, although it is not NULL, but "0."
+	 */
+	memblock_reserve(vectors_base, 2 * PAGE_SIZE);
+#else /* ifndef CONFIG_CPU_V7M */
+	/*
+	 * There is no dedicated vector page on V7-M. So nothing needs to be
+	 * reserved here.
+	 */
+#endif
+	/*
+	 * In any case, always ensure address 0 is never used as many things
+	 * get very confused if 0 is returned as a legitimate address.
+	 */
+	memblock_reserve(0, 1);
+}
+
+static void __init adjust_lowmem_bounds_mpu(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_adjust_lowmem_bounds();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_adjust_lowmem_bounds();
+		break;
+	default:
+		break;
+	}
+}
+
+static void __init mpu_setup(void)
+{
+	unsigned long pmsa = read_cpuid_ext(CPUID_EXT_MMFR0) & MMFR0_PMSA;
+
+	switch (pmsa) {
+	case MMFR0_PMSAv7:
+		pmsav7_setup();
+		break;
+	case MMFR0_PMSAv8:
+		pmsav8_setup();
+		break;
+	default:
+		break;
+	}
+}
+
+void __init adjust_lowmem_bounds(void)
+{
+	phys_addr_t end;
+	adjust_lowmem_bounds_mpu();
+	end = memblock_end_of_DRAM();
+	high_memory = __va(end - 1) + 1;
+	memblock_set_current_limit(end);
+}
+
+/*
+ * paging_init() sets up the page tables, initialises the zone memory
+ * maps, and sets up the zero page, bad page and bad page tables.
+ */
+void __init paging_init(const struct machine_desc *mdesc)
+{
+	early_trap_init((void *)vectors_base);
+	mpu_setup();
+	bootmem_init();
+}
+
+/*
+ * We don't need to do anything here for nommu machines.
+ */
+void setup_mm_for_reboot(void)
+{
+}
+
+void flush_dcache_page(struct page *page)
+{
+	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+}
+EXPORT_SYMBOL(flush_dcache_page);
+
+void flush_kernel_dcache_page(struct page *page)
+{
+	__cpuc_flush_dcache_area(page_address(page), PAGE_SIZE);
+}
+EXPORT_SYMBOL(flush_kernel_dcache_page);
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long uaddr, void *dst, const void *src,
+		       unsigned long len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC)
+		__cpuc_coherent_user_range(uaddr, uaddr + len);
+}
+
+void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset,
+				size_t size, unsigned int mtype)
+{
+	if (pfn >= (0x100000000ULL >> PAGE_SHIFT))
+		return NULL;
+	return (void __iomem *) (offset + (pfn << PAGE_SHIFT));
+}
+EXPORT_SYMBOL(__arm_ioremap_pfn);
+
+void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size,
+				   unsigned int mtype, void *caller)
+{
+	return (void __iomem *)phys_addr;
+}
+
+void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *);
+
+void __iomem *ioremap(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size)
+	__alias(ioremap_cached);
+
+void __iomem *ioremap_cached(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_cache);
+EXPORT_SYMBOL(ioremap_cached);
+
+void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size)
+{
+	return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC,
+				    __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+#ifdef CONFIG_PCI
+
+#include <asm/mach/map.h>
+
+void __iomem *pci_remap_cfgspace(resource_size_t res_cookie, size_t size)
+{
+	return arch_ioremap_caller(res_cookie, size, MT_UNCACHED,
+				   __builtin_return_address(0));
+}
+EXPORT_SYMBOL_GPL(pci_remap_cfgspace);
+#endif
+
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size)
+{
+	return (void *)phys_addr;
+}
+
+void __iounmap(volatile void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(__iounmap);
+
+void (*arch_iounmap)(volatile void __iomem *);
+
+void iounmap(volatile void __iomem *addr)
+{
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/arm/mm/pabort-legacy.S b/arch/arm/mm/pabort-legacy.S
new file mode 100644
index 0000000..b2ffce4
--- /dev/null
+++ b/arch/arm/mm/pabort-legacy.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Function: legacy_pabort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current prefetch abort.
+ */
+
+	.align	5
+ENTRY(legacy_pabort)
+	mov	r0, r4
+	mov	r1, #5
+	b	do_PrefetchAbort
+ENDPROC(legacy_pabort)
diff --git a/arch/arm/mm/pabort-v6.S b/arch/arm/mm/pabort-v6.S
new file mode 100644
index 0000000..8686265
--- /dev/null
+++ b/arch/arm/mm/pabort-v6.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Function: v6_pabort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current prefetch abort.
+ */
+
+	.align	5
+ENTRY(v6_pabort)
+	mov	r0, r4
+	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
+	b	do_PrefetchAbort
+ENDPROC(v6_pabort)
diff --git a/arch/arm/mm/pabort-v7.S b/arch/arm/mm/pabort-v7.S
new file mode 100644
index 0000000..9c70b1a
--- /dev/null
+++ b/arch/arm/mm/pabort-v7.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Function: v7_pabort
+ *
+ * Params  : r2 = pt_regs
+ *	   : r4 = address of aborted instruction
+ *	   : r5 = psr for parent context
+ *
+ * Returns : r4 - r11, r13 preserved
+ *
+ * Purpose : obtain information about current prefetch abort.
+ */
+
+	.align	5
+ENTRY(v7_pabort)
+	mrc	p15, 0, r0, c6, c0, 2		@ get IFAR
+	mrc	p15, 0, r1, c5, c0, 1		@ get IFSR
+	b	do_PrefetchAbort
+ENDPROC(v7_pabort)
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
new file mode 100644
index 0000000..1403cb4
--- /dev/null
+++ b/arch/arm/mm/pageattr.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/set_memory.h>
+
+struct page_change_data {
+	pgprot_t set_mask;
+	pgprot_t clear_mask;
+};
+
+static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
+			void *data)
+{
+	struct page_change_data *cdata = data;
+	pte_t pte = *ptep;
+
+	pte = clear_pte_bit(pte, cdata->clear_mask);
+	pte = set_pte_bit(pte, cdata->set_mask);
+
+	set_pte_ext(ptep, pte, 0);
+	return 0;
+}
+
+static bool in_range(unsigned long start, unsigned long size,
+	unsigned long range_start, unsigned long range_end)
+{
+	return start >= range_start && start < range_end &&
+		size <= range_end - start;
+}
+
+static int change_memory_common(unsigned long addr, int numpages,
+				pgprot_t set_mask, pgprot_t clear_mask)
+{
+	unsigned long start = addr & PAGE_MASK;
+	unsigned long end = PAGE_ALIGN(addr) + numpages * PAGE_SIZE;
+	unsigned long size = end - start;
+	int ret;
+	struct page_change_data data;
+
+	WARN_ON_ONCE(start != addr);
+
+	if (!size)
+		return 0;
+
+	if (!in_range(start, size, MODULES_VADDR, MODULES_END) &&
+	    !in_range(start, size, VMALLOC_START, VMALLOC_END))
+		return -EINVAL;
+
+	data.set_mask = set_mask;
+	data.clear_mask = clear_mask;
+
+	ret = apply_to_page_range(&init_mm, start, size, change_page_range,
+					&data);
+
+	flush_tlb_kernel_range(start, end);
+	return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_RDONLY),
+					__pgprot(0));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_RDONLY));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(L_PTE_XN),
+					__pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+	return change_memory_common(addr, numpages,
+					__pgprot(0),
+					__pgprot(L_PTE_XN));
+}
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
new file mode 100644
index 0000000..a1606d9
--- /dev/null
+++ b/arch/arm/mm/pgd.c
@@ -0,0 +1,174 @@
+/*
+ *  linux/arch/arm/mm/pgd.c
+ *
+ *  Copyright (C) 1998-2005 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/slab.h>
+
+#include <asm/cp15.h>
+#include <asm/pgalloc.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+
+#include "mm.h"
+
+#ifdef CONFIG_ARM_LPAE
+#define __pgd_alloc()	kmalloc_array(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL)
+#define __pgd_free(pgd)	kfree(pgd)
+#else
+#define __pgd_alloc()	(pgd_t *)__get_free_pages(GFP_KERNEL, 2)
+#define __pgd_free(pgd)	free_pages((unsigned long)pgd, 2)
+#endif
+
+/*
+ * need to get a 16k page for level 1
+ */
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *new_pgd, *init_pgd;
+	pud_t *new_pud, *init_pud;
+	pmd_t *new_pmd, *init_pmd;
+	pte_t *new_pte, *init_pte;
+
+	new_pgd = __pgd_alloc();
+	if (!new_pgd)
+		goto no_pgd;
+
+	memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+
+	/*
+	 * Copy over the kernel and IO PGD entries
+	 */
+	init_pgd = pgd_offset_k(0);
+	memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD,
+		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+
+	clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
+
+#ifdef CONFIG_ARM_LPAE
+	/*
+	 * Allocate PMD table for modules and pkmap mappings.
+	 */
+	new_pud = pud_alloc(mm, new_pgd + pgd_index(MODULES_VADDR),
+			    MODULES_VADDR);
+	if (!new_pud)
+		goto no_pud;
+
+	new_pmd = pmd_alloc(mm, new_pud, 0);
+	if (!new_pmd)
+		goto no_pmd;
+#endif
+
+	if (!vectors_high()) {
+		/*
+		 * On ARM, first page must always be allocated since it
+		 * contains the machine vectors. The vectors are always high
+		 * with LPAE.
+		 */
+		new_pud = pud_alloc(mm, new_pgd, 0);
+		if (!new_pud)
+			goto no_pud;
+
+		new_pmd = pmd_alloc(mm, new_pud, 0);
+		if (!new_pmd)
+			goto no_pmd;
+
+		new_pte = pte_alloc_map(mm, new_pmd, 0);
+		if (!new_pte)
+			goto no_pte;
+
+#ifndef CONFIG_ARM_LPAE
+		/*
+		 * Modify the PTE pointer to have the correct domain.  This
+		 * needs to be the vectors domain to avoid the low vectors
+		 * being unmapped.
+		 */
+		pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK;
+		pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS);
+#endif
+
+		init_pud = pud_offset(init_pgd, 0);
+		init_pmd = pmd_offset(init_pud, 0);
+		init_pte = pte_offset_map(init_pmd, 0);
+		set_pte_ext(new_pte + 0, init_pte[0], 0);
+		set_pte_ext(new_pte + 1, init_pte[1], 0);
+		pte_unmap(init_pte);
+		pte_unmap(new_pte);
+	}
+
+	return new_pgd;
+
+no_pte:
+	pmd_free(mm, new_pmd);
+	mm_dec_nr_pmds(mm);
+no_pmd:
+	pud_free(mm, new_pud);
+no_pud:
+	__pgd_free(new_pgd);
+no_pgd:
+	return NULL;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pgtable_t pte;
+
+	if (!pgd_base)
+		return;
+
+	pgd = pgd_base + pgd_index(0);
+	if (pgd_none_or_clear_bad(pgd))
+		goto no_pgd;
+
+	pud = pud_offset(pgd, 0);
+	if (pud_none_or_clear_bad(pud))
+		goto no_pud;
+
+	pmd = pmd_offset(pud, 0);
+	if (pmd_none_or_clear_bad(pmd))
+		goto no_pmd;
+
+	pte = pmd_pgtable(*pmd);
+	pmd_clear(pmd);
+	pte_free(mm, pte);
+	mm_dec_nr_ptes(mm);
+no_pmd:
+	pud_clear(pud);
+	pmd_free(mm, pmd);
+	mm_dec_nr_pmds(mm);
+no_pud:
+	pgd_clear(pgd);
+	pud_free(mm, pud);
+no_pgd:
+#ifdef CONFIG_ARM_LPAE
+	/*
+	 * Free modules/pkmap or identity pmd tables.
+	 */
+	for (pgd = pgd_base; pgd < pgd_base + PTRS_PER_PGD; pgd++) {
+		if (pgd_none_or_clear_bad(pgd))
+			continue;
+		if (pgd_val(*pgd) & L_PGD_SWAPPER)
+			continue;
+		pud = pud_offset(pgd, 0);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		pmd = pmd_offset(pud, 0);
+		pud_clear(pud);
+		pmd_free(mm, pmd);
+		mm_dec_nr_pmds(mm);
+		pgd_clear(pgd);
+		pud_free(mm, pud);
+	}
+#endif
+	__pgd_free(pgd_base);
+}
diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c
new file mode 100644
index 0000000..cf75819
--- /dev/null
+++ b/arch/arm/mm/physaddr.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+
+#include <asm/sections.h>
+#include <asm/memory.h>
+#include <asm/fixmap.h>
+#include <asm/dma.h>
+
+#include "mm.h"
+
+static inline bool __virt_addr_valid(unsigned long x)
+{
+	/*
+	 * high_memory does not get immediately defined, and there
+	 * are early callers of __pa() against PAGE_OFFSET
+	 */
+	if (!high_memory && x >= PAGE_OFFSET)
+		return true;
+
+	if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory)
+		return true;
+
+	/*
+	 * MAX_DMA_ADDRESS is a virtual address that may not correspond to an
+	 * actual physical address. Enough code relies on __pa(MAX_DMA_ADDRESS)
+	 * that we just need to work around it and always return true.
+	 */
+	if (x == MAX_DMA_ADDRESS)
+		return true;
+
+	return false;
+}
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+	WARN(!__virt_addr_valid(x),
+	     "virt_to_phys used for non-linear address: %pK (%pS)\n",
+	     (void *)x, (void *)x);
+
+	return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+	/* This is bounds checking against the kernel image only.
+	 * __pa_symbol should only be used on kernel symbol addresses.
+	 */
+	VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START ||
+		       x > (unsigned long)KERNEL_END);
+
+	return __pa_symbol_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/arm/mm/pmsa-v7.c b/arch/arm/mm/pmsa-v7.c
new file mode 100644
index 0000000..699fa2e
--- /dev/null
+++ b/arch/arm/mm/pmsa-v7.c
@@ -0,0 +1,475 @@
+/*
+ * Based on linux/arch/arm/mm/nommu.c
+ *
+ * ARM PMSAv7 supporting functions.
+ */
+
+#include <linux/bitops.h>
+#include <linux/memblock.h>
+#include <linux/string.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+#include <asm/sections.h>
+
+#include "mm.h"
+
+struct region {
+	phys_addr_t base;
+	phys_addr_t size;
+	unsigned long subreg;
+};
+
+static struct region __initdata mem[MPU_MAX_REGIONS];
+#ifdef CONFIG_XIP_KERNEL
+static struct region __initdata xip[MPU_MAX_REGIONS];
+#endif
+
+static unsigned int __initdata mpu_min_region_order;
+static unsigned int __initdata mpu_max_regions;
+
+static int __init __mpu_min_region_order(void);
+static int __init __mpu_max_regions(void);
+
+#ifndef CONFIG_CPU_V7M
+
+#define DRBAR	__ACCESS_CP15(c6, 0, c1, 0)
+#define IRBAR	__ACCESS_CP15(c6, 0, c1, 1)
+#define DRSR	__ACCESS_CP15(c6, 0, c1, 2)
+#define IRSR	__ACCESS_CP15(c6, 0, c1, 3)
+#define DRACR	__ACCESS_CP15(c6, 0, c1, 4)
+#define IRACR	__ACCESS_CP15(c6, 0, c1, 5)
+#define RNGNR	__ACCESS_CP15(c6, 0, c2, 0)
+
+/* Region number */
+static inline void rgnr_write(u32 v)
+{
+	write_sysreg(v, RNGNR);
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static inline void dracr_write(u32 v)
+{
+	write_sysreg(v, DRACR);
+}
+
+/* Region size register */
+static inline void drsr_write(u32 v)
+{
+	write_sysreg(v, DRSR);
+}
+
+/* Region base address register */
+static inline void drbar_write(u32 v)
+{
+	write_sysreg(v, DRBAR);
+}
+
+static inline u32 drbar_read(void)
+{
+	return read_sysreg(DRBAR);
+}
+/* Optional instruction-side region attributes */
+
+/* I-side Region access control register */
+static inline void iracr_write(u32 v)
+{
+	write_sysreg(v, IRACR);
+}
+
+/* I-side Region size register */
+static inline void irsr_write(u32 v)
+{
+	write_sysreg(v, IRSR);
+}
+
+/* I-side Region base address register */
+static inline void irbar_write(u32 v)
+{
+	write_sysreg(v, IRBAR);
+}
+
+static inline u32 irbar_read(void)
+{
+	return read_sysreg(IRBAR);
+}
+
+#else
+
+static inline void rgnr_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RNR);
+}
+
+/* Data-side / unified region attributes */
+
+/* Region access control register */
+static inline void dracr_write(u32 v)
+{
+	u32 rsr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(15, 0);
+
+	writel_relaxed((v << 16) | rsr, BASEADDR_V7M_SCB + PMSAv7_RASR);
+}
+
+/* Region size register */
+static inline void drsr_write(u32 v)
+{
+	u32 racr = readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RASR) & GENMASK(31, 16);
+
+	writel_relaxed(v | racr, BASEADDR_V7M_SCB + PMSAv7_RASR);
+}
+
+/* Region base address register */
+static inline void drbar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv7_RBAR);
+}
+
+static inline u32 drbar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv7_RBAR);
+}
+
+/* ARMv7-M only supports a unified MPU, so I-side operations are nop */
+
+static inline void iracr_write(u32 v) {}
+static inline void irsr_write(u32 v) {}
+static inline void irbar_write(u32 v) {}
+static inline unsigned long irbar_read(void) {return 0;}
+
+#endif
+
+static bool __init try_split_region(phys_addr_t base, phys_addr_t size, struct region *region)
+{
+	unsigned long  subreg, bslots, sslots;
+	phys_addr_t abase = base & ~(size - 1);
+	phys_addr_t asize = base + size - abase;
+	phys_addr_t p2size = 1 << __fls(asize);
+	phys_addr_t bdiff, sdiff;
+
+	if (p2size != asize)
+		p2size *= 2;
+
+	bdiff = base - abase;
+	sdiff = p2size - asize;
+	subreg = p2size / PMSAv7_NR_SUBREGS;
+
+	if ((bdiff % subreg) || (sdiff % subreg))
+		return false;
+
+	bslots = bdiff / subreg;
+	sslots = sdiff / subreg;
+
+	if (bslots || sslots) {
+		int i;
+
+		if (subreg < PMSAv7_MIN_SUBREG_SIZE)
+			return false;
+
+		if (bslots + sslots > PMSAv7_NR_SUBREGS)
+			return false;
+
+		for (i = 0; i < bslots; i++)
+			_set_bit(i, &region->subreg);
+
+		for (i = 1; i <= sslots; i++)
+			_set_bit(PMSAv7_NR_SUBREGS - i, &region->subreg);
+	}
+
+	region->base = abase;
+	region->size = p2size;
+
+	return true;
+}
+
+static int __init allocate_region(phys_addr_t base, phys_addr_t size,
+				  unsigned int limit, struct region *regions)
+{
+	int count = 0;
+	phys_addr_t diff = size;
+	int attempts = MPU_MAX_REGIONS;
+
+	while (diff) {
+		/* Try cover region as is (maybe with help of subregions) */
+		if (try_split_region(base, size, &regions[count])) {
+			count++;
+			base += size;
+			diff -= size;
+			size = diff;
+		} else {
+			/*
+			 * Maximum aligned region might overflow phys_addr_t
+			 * if "base" is 0. Hence we keep everything below 4G
+			 * until we take the smaller of the aligned region
+			 * size ("asize") and rounded region size ("p2size"),
+			 * one of which is guaranteed to be smaller than the
+			 * maximum physical address.
+			 */
+			phys_addr_t asize = (base - 1) ^ base;
+			phys_addr_t p2size = (1 <<  __fls(diff)) - 1;
+
+			size = asize < p2size ? asize + 1 : p2size + 1;
+		}
+
+		if (count > limit)
+			break;
+
+		if (!attempts)
+			break;
+
+		attempts--;
+	}
+
+	return count;
+}
+
+/* MPU initialisation functions */
+void __init pmsav7_adjust_lowmem_bounds(void)
+{
+	phys_addr_t  specified_mem_size = 0, total_mem_size = 0;
+	struct memblock_region *reg;
+	bool first = true;
+	phys_addr_t mem_start;
+	phys_addr_t mem_end;
+	unsigned int mem_max_regions;
+	int num, i;
+
+	/* Free-up PMSAv7_PROBE_REGION */
+	mpu_min_region_order = __mpu_min_region_order();
+
+	/* How many regions are supported */
+	mpu_max_regions = __mpu_max_regions();
+
+	mem_max_regions = min((unsigned int)MPU_MAX_REGIONS, mpu_max_regions);
+
+	/* We need to keep one slot for background region */
+	mem_max_regions--;
+
+#ifndef CONFIG_CPU_V7M
+	/* ... and one for vectors */
+	mem_max_regions--;
+#endif
+
+#ifdef CONFIG_XIP_KERNEL
+	/* plus some regions to cover XIP ROM */
+	num = allocate_region(CONFIG_XIP_PHYS_ADDR, __pa(_exiprom) - CONFIG_XIP_PHYS_ADDR,
+			      mem_max_regions, xip);
+
+	mem_max_regions -= num;
+#endif
+
+	for_each_memblock(memory, reg) {
+		if (first) {
+			phys_addr_t phys_offset = PHYS_OFFSET;
+
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
+
+			mem_start = reg->base;
+			mem_end = reg->base + reg->size;
+			specified_mem_size = reg->size;
+			first = false;
+		} else {
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards in one go (we can't remove
+			 * blocks separately while iterating)
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_end, &reg->base);
+			memblock_remove(reg->base, 0 - reg->base);
+			break;
+		}
+	}
+
+	memset(mem, 0, sizeof(mem));
+	num = allocate_region(mem_start, specified_mem_size, mem_max_regions, mem);
+
+	for (i = 0; i < num; i++) {
+		unsigned long  subreg = mem[i].size / PMSAv7_NR_SUBREGS;
+
+		total_mem_size += mem[i].size - subreg * hweight_long(mem[i].subreg);
+
+		pr_debug("MPU: base %pa size %pa disable subregions: %*pbl\n",
+			 &mem[i].base, &mem[i].size, PMSAv7_NR_SUBREGS, &mem[i].subreg);
+	}
+
+	if (total_mem_size != specified_mem_size) {
+		pr_warn("Truncating memory from %pa to %pa (MPU region constraints)",
+				&specified_mem_size, &total_mem_size);
+		memblock_remove(mem_start + total_mem_size,
+				specified_mem_size - total_mem_size);
+	}
+}
+
+static int __init __mpu_max_regions(void)
+{
+	/*
+	 * We don't support a different number of I/D side regions so if we
+	 * have separate instruction and data memory maps then return
+	 * whichever side has a smaller number of supported regions.
+	 */
+	u32 dregions, iregions, mpuir;
+
+	mpuir = read_cpuid_mputype();
+
+	dregions = iregions = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	/* Check for separate d-side and i-side memory maps */
+	if (mpuir & MPUIR_nU)
+		iregions = (mpuir & MPUIR_IREGION_SZMASK) >> MPUIR_IREGION;
+
+	/* Use the smallest of the two maxima */
+	return min(dregions, iregions);
+}
+
+static int __init mpu_iside_independent(void)
+{
+	/* MPUIR.nU specifies whether there is *not* a unified memory map */
+	return read_cpuid_mputype() & MPUIR_nU;
+}
+
+static int __init __mpu_min_region_order(void)
+{
+	u32 drbar_result, irbar_result;
+
+	/* We've kept a region free for this probing */
+	rgnr_write(PMSAv7_PROBE_REGION);
+	isb();
+	/*
+	 * As per ARM ARM, write 0xFFFFFFFC to DRBAR to find the minimum
+	 * region order
+	*/
+	drbar_write(0xFFFFFFFC);
+	drbar_result = irbar_result = drbar_read();
+	drbar_write(0x0);
+	/* If the MPU is non-unified, we use the larger of the two minima*/
+	if (mpu_iside_independent()) {
+		irbar_write(0xFFFFFFFC);
+		irbar_result = irbar_read();
+		irbar_write(0x0);
+	}
+	isb(); /* Ensure that MPU region operations have completed */
+	/* Return whichever result is larger */
+
+	return __ffs(max(drbar_result, irbar_result));
+}
+
+static int __init mpu_setup_region(unsigned int number, phys_addr_t start,
+				   unsigned int size_order, unsigned int properties,
+				   unsigned int subregions, bool need_flush)
+{
+	u32 size_data;
+
+	/* We kept a region free for probing resolution of MPU regions*/
+	if (number > mpu_max_regions
+	    || number >= MPU_MAX_REGIONS)
+		return -ENOENT;
+
+	if (size_order > 32)
+		return -ENOMEM;
+
+	if (size_order < mpu_min_region_order)
+		return -ENOMEM;
+
+	/* Writing N to bits 5:1 (RSR_SZ)  specifies region size 2^N+1 */
+	size_data = ((size_order - 1) << PMSAv7_RSR_SZ) | 1 << PMSAv7_RSR_EN;
+	size_data |= subregions << PMSAv7_RSR_SD;
+
+	if (need_flush)
+		flush_cache_all();
+
+	dsb(); /* Ensure all previous data accesses occur with old mappings */
+	rgnr_write(number);
+	isb();
+	drbar_write(start);
+	dracr_write(properties);
+	isb(); /* Propagate properties before enabling region */
+	drsr_write(size_data);
+
+	/* Check for independent I-side registers */
+	if (mpu_iside_independent()) {
+		irbar_write(start);
+		iracr_write(properties);
+		isb();
+		irsr_write(size_data);
+	}
+	isb();
+
+	/* Store region info (we treat i/d side the same, so only store d) */
+	mpu_rgn_info.rgns[number].dracr = properties;
+	mpu_rgn_info.rgns[number].drbar = start;
+	mpu_rgn_info.rgns[number].drsr = size_data;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+/*
+* Set up default MPU regions, doing nothing if there is no MPU
+*/
+void __init pmsav7_setup(void)
+{
+	int i, region = 0, err = 0;
+
+	/* Setup MPU (order is important) */
+
+	/* Background */
+	err |= mpu_setup_region(region++, 0, 32,
+				PMSAv7_ACR_XN | PMSAv7_RGN_STRONGLY_ORDERED | PMSAv7_AP_PL1RW_PL0RW,
+				0, false);
+
+#ifdef CONFIG_XIP_KERNEL
+	/* ROM */
+	for (i = 0; i < ARRAY_SIZE(xip); i++) {
+		/*
+                 * In case we overwrite RAM region we set earlier in
+                 * head-nommu.S (which is cachable) all subsequent
+                 * data access till we setup RAM bellow would be done
+                 * with BG region (which is uncachable), thus we need
+                 * to clean and invalidate cache.
+		 */
+		bool need_flush = region == PMSAv7_RAM_REGION;
+
+		if (!xip[i].size)
+			continue;
+
+		err |= mpu_setup_region(region++, xip[i].base, ilog2(xip[i].size),
+					PMSAv7_AP_PL1RO_PL0NA | PMSAv7_RGN_NORMAL,
+					xip[i].subreg, need_flush);
+	}
+#endif
+
+	/* RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++) {
+		if (!mem[i].size)
+			continue;
+
+		err |= mpu_setup_region(region++, mem[i].base, ilog2(mem[i].size),
+					PMSAv7_AP_PL1RW_PL0RW | PMSAv7_RGN_NORMAL,
+					mem[i].subreg, false);
+	}
+
+	/* Vectors */
+#ifndef CONFIG_CPU_V7M
+	err |= mpu_setup_region(region++, vectors_base, ilog2(2 * PAGE_SIZE),
+				PMSAv7_AP_PL1RW_PL0NA | PMSAv7_RGN_NORMAL,
+				0, false);
+#endif
+	if (err) {
+		panic("MPU region initialization failure! %d", err);
+	} else {
+		pr_info("Using ARMv7 PMSA Compliant MPU. "
+			 "Region independence: %s, Used %d of %d regions\n",
+			mpu_iside_independent() ? "Yes" : "No",
+			mpu_rgn_info.used, mpu_max_regions);
+	}
+}
diff --git a/arch/arm/mm/pmsa-v8.c b/arch/arm/mm/pmsa-v8.c
new file mode 100644
index 0000000..617a83d
--- /dev/null
+++ b/arch/arm/mm/pmsa-v8.c
@@ -0,0 +1,307 @@
+/*
+ * Based on linux/arch/arm/pmsa-v7.c
+ *
+ * ARM PMSAv8 supporting functions.
+ */
+
+#include <linux/memblock.h>
+#include <linux/range.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/mpu.h>
+
+#include <asm/memory.h>
+#include <asm/sections.h>
+
+#include "mm.h"
+
+#ifndef CONFIG_CPU_V7M
+
+#define PRSEL	__ACCESS_CP15(c6, 0, c2, 1)
+#define PRBAR	__ACCESS_CP15(c6, 0, c3, 0)
+#define PRLAR	__ACCESS_CP15(c6, 0, c3, 1)
+
+static inline u32 prlar_read(void)
+{
+	return read_sysreg(PRLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return read_sysreg(PRBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	write_sysreg(v, PRSEL);
+}
+
+static inline void prbar_write(u32 v)
+{
+	write_sysreg(v, PRBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	write_sysreg(v, PRLAR);
+}
+#else
+
+static inline u32 prlar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+static inline u32 prbar_read(void)
+{
+	return readl_relaxed(BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prsel_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RNR);
+}
+
+static inline void prbar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RBAR);
+}
+
+static inline void prlar_write(u32 v)
+{
+	writel_relaxed(v, BASEADDR_V7M_SCB + PMSAv8_RLAR);
+}
+
+#endif
+
+static struct range __initdata io[MPU_MAX_REGIONS];
+static struct range __initdata mem[MPU_MAX_REGIONS];
+
+static unsigned int __initdata mpu_max_regions;
+
+static __init bool is_region_fixed(int number)
+{
+	switch (number) {
+	case PMSAv8_XIP_REGION:
+	case PMSAv8_KERNEL_REGION:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void __init pmsav8_adjust_lowmem_bounds(void)
+{
+	phys_addr_t mem_end;
+	struct memblock_region *reg;
+	bool first = true;
+
+	for_each_memblock(memory, reg) {
+		if (first) {
+			phys_addr_t phys_offset = PHYS_OFFSET;
+
+			/*
+			 * Initially only use memory continuous from
+			 * PHYS_OFFSET */
+			if (reg->base != phys_offset)
+				panic("First memory bank must be contiguous from PHYS_OFFSET");
+			mem_end = reg->base + reg->size;
+			first = false;
+		} else {
+			/*
+			 * memblock auto merges contiguous blocks, remove
+			 * all blocks afterwards in one go (we can't remove
+			 * blocks separately while iterating)
+			 */
+			pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
+				  &mem_end, &reg->base);
+			memblock_remove(reg->base, 0 - reg->base);
+			break;
+		}
+	}
+}
+
+static int __init __mpu_max_regions(void)
+{
+	static int max_regions;
+	u32 mpuir;
+
+	if (max_regions)
+		return max_regions;
+
+	mpuir = read_cpuid_mputype();
+
+	max_regions  = (mpuir & MPUIR_DREGION_SZMASK) >> MPUIR_DREGION;
+
+	return max_regions;
+}
+
+static int __init __pmsav8_setup_region(unsigned int number, u32 bar, u32 lar)
+{
+	if (number > mpu_max_regions
+	    || number >= MPU_MAX_REGIONS)
+		return -ENOENT;
+
+	dsb();
+	prsel_write(number);
+	isb();
+	prbar_write(bar);
+	prlar_write(lar);
+
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+static int __init pmsav8_setup_ram(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_io(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);;
+
+	bar |= PMSAv8_AP_PL1RW_PL0RW | PMSAv8_RGN_SHARED | PMSAv8_BAR_XN;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_DEVICE_nGnRnE) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+
+static int __init pmsav8_setup_fixed(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (!is_region_fixed(number))
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	prsel_write(number);
+	isb();
+
+	if (prbar_read() != bar || prlar_read() != lar)
+		return -EINVAL;
+
+	/* Reserved region was set up early, we just need a record for secondaries */
+	mpu_rgn_info.rgns[number].prbar = bar;
+	mpu_rgn_info.rgns[number].prlar = lar;
+
+	mpu_rgn_info.used++;
+
+	return 0;
+}
+
+#ifndef CONFIG_CPU_V7M
+static int __init pmsav8_setup_vector(unsigned int number, phys_addr_t start,phys_addr_t end)
+{
+	u32 bar, lar;
+
+	if (number == PMSAv8_KERNEL_REGION)
+		return -EINVAL;
+
+	bar = start;
+	lar = (end - 1) & ~(PMSAv8_MINALIGN - 1);
+
+	bar |= PMSAv8_AP_PL1RW_PL0NA | PMSAv8_RGN_SHARED;
+	lar |= PMSAv8_LAR_IDX(PMSAv8_RGN_NORMAL) | PMSAv8_LAR_EN;
+
+	return __pmsav8_setup_region(number, bar, lar);
+}
+#endif
+
+void __init pmsav8_setup(void)
+{
+	int i, err = 0;
+	int region = PMSAv8_KERNEL_REGION;
+
+	/* How many regions are supported ? */
+	mpu_max_regions = __mpu_max_regions();
+
+	/* RAM: single chunk of memory */
+	add_range(mem,  ARRAY_SIZE(mem), 0,  memblock.memory.regions[0].base,
+		  memblock.memory.regions[0].base + memblock.memory.regions[0].size);
+
+	/* IO: cover full 4G range */
+	add_range(io, ARRAY_SIZE(io), 0, 0, 0xffffffff);
+
+	/* RAM and IO: exclude kernel */
+	subtract_range(mem, ARRAY_SIZE(mem), __pa(KERNEL_START), __pa(KERNEL_END));
+	subtract_range(io, ARRAY_SIZE(io),  __pa(KERNEL_START), __pa(KERNEL_END));
+
+#ifdef CONFIG_XIP_KERNEL
+	/* RAM and IO: exclude xip */
+	subtract_range(mem, ARRAY_SIZE(mem), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+	subtract_range(io, ARRAY_SIZE(io), CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+
+#ifndef CONFIG_CPU_V7M
+	/* RAM and IO: exclude vectors */
+	subtract_range(mem, ARRAY_SIZE(mem),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+	subtract_range(io, ARRAY_SIZE(io),  vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	/* IO: exclude RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++)
+		subtract_range(io, ARRAY_SIZE(io), mem[i].start, mem[i].end);
+
+	/* Now program MPU */
+
+#ifdef CONFIG_XIP_KERNEL
+	/* ROM */
+	err |= pmsav8_setup_fixed(PMSAv8_XIP_REGION, CONFIG_XIP_PHYS_ADDR, __pa(_exiprom));
+#endif
+	/* Kernel */
+	err |= pmsav8_setup_fixed(region++, __pa(KERNEL_START), __pa(KERNEL_END));
+
+
+	/* IO */
+	for (i = 0; i < ARRAY_SIZE(io); i++) {
+		if (!io[i].end)
+			continue;
+
+		err |= pmsav8_setup_io(region++, io[i].start, io[i].end);
+	}
+
+	/* RAM */
+	for (i = 0; i < ARRAY_SIZE(mem); i++) {
+		if (!mem[i].end)
+			continue;
+
+		err |= pmsav8_setup_ram(region++, mem[i].start, mem[i].end);
+	}
+
+	/* Vectors */
+#ifndef CONFIG_CPU_V7M
+	err |= pmsav8_setup_vector(region++, vectors_base, vectors_base + 2 * PAGE_SIZE);
+#endif
+	if (err)
+		pr_warn("MPU region initialization failure! %d", err);
+	else
+		pr_info("Using ARM PMSAv8 Compliant MPU. Used %d of %d regions\n",
+			mpu_rgn_info.used, mpu_max_regions);
+}
diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S
new file mode 100644
index 0000000..774ef13
--- /dev/null
+++ b/arch/arm/mm/proc-arm1020.S
@@ -0,0 +1,529 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1020.S: MMU functions for ARM1020
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm1020.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1020_proc_init()
+ */
+ENTRY(cpu_arm1020_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm1020_proc_fin()
+ */
+ENTRY(cpu_arm1020_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm1020_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm1020_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm1020_reset)
+	.popsection
+
+/*
+ * cpu_arm1020_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1020_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm1020_flush_icache_all)
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+#endif
+	ret	lr
+ENDPROC(arm1020_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1020_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1020_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1020_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_coherent_kern_range)
+	/* FALLTRHOUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcr	p15, 0, ip, c7, c10, 4
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm1020_flush_kern_dcache_area)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1020_dma_inv_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, ip, c7, c10, 4
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, ip, c7, c10, 4
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1020_dma_clean_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcr	p15, 0, ip, c7, c10, 4
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1020_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm1020_dma_clean_range
+	bcs	arm1020_dma_inv_range
+	b	arm1020_dma_flush_range
+ENDPROC(arm1020_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1020_dma_unmap_area)
+	ret	lr
+ENDPROC(arm1020_dma_unmap_area)
+
+	.globl	arm1020_flush_kern_cache_louis
+	.equ	arm1020_flush_kern_cache_louis, arm1020_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm1020
+
+	.align	5
+ENTRY(cpu_arm1020_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1020_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1020_switch_mm)
+#ifdef CONFIG_MMU
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r3, c7, c10, 4
+	mov	r1, #0xF			@ 16 segments
+1:	mov	r3, #0x3F			@ 64 entries
+2:	mov	ip, r3, LSL #26 		@ shift up entry
+	orr	ip, ip, r1, LSL #5		@ shift in/up index
+	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c10, 4
+	subs	r3, r3, #1
+	cmp	r3, #0
+	bge	2b				@ entries 3F to 0
+	subs	r1, r1, #1
+	cmp	r1, #0
+	bge	1b				@ segments 15 to 0
+
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+#endif /* CONFIG_MMU */
+	ret	lr
+        
+/*
+ * cpu_arm1020_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1020_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 4
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm1020_setup, #function
+__arm1020_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+
+	adr	r5, arm1020_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	ret	lr
+	.size	__arm1020_setup, . - __arm1020_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 */
+	.type	arm1020_crval, #object
+arm1020_crval:
+	crval	clear=0x0000593f, mmuset=0x00003935, ucset=0x00001930
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm1020, dabort=v4t_early_abort, pabort=legacy_pabort
+
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5t"
+	string	cpu_elf_name, "v5"
+
+	.type	cpu_arm1020_name, #object
+cpu_arm1020_name:
+	.ascii	"ARM1020"
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	.ascii	"i"
+#endif
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	.ascii	"d"
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.ascii	"(wt)"
+#else
+	.ascii	"(wb)"
+#endif
+#endif
+#ifndef CONFIG_CPU_BPREDICT_DISABLE
+	.ascii	"B"
+#endif
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	.ascii	"RR"
+#endif
+	.ascii	"\0"
+	.size	cpu_arm1020_name, . - cpu_arm1020_name
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm1020_proc_info,#object
+__arm1020_proc_info:
+	.long	0x4104a200			@ ARM 1020T (Architecture v5T)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm1020_setup, __arm1020_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm1020_name
+	.long	arm1020_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1020_cache_fns
+	.size	__arm1020_proc_info, . - __arm1020_proc_info
diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S
new file mode 100644
index 0000000..ae3c27b
--- /dev/null
+++ b/arch/arm/mm/proc-arm1020e.S
@@ -0,0 +1,489 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1020e.S: MMU functions for ARM1020
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm1020e.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1020e_proc_init()
+ */
+ENTRY(cpu_arm1020e_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm1020e_proc_fin()
+ */
+ENTRY(cpu_arm1020e_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm1020e_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm1020e_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm1020e_reset)
+	.popsection
+
+/*
+ * cpu_arm1020e_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1020e_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm1020e_flush_icache_all)
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+#endif
+	ret	lr
+ENDPROC(arm1020e_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1020e_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1020e_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1020e_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_coherent_kern_range)
+	/* FALLTHROUGH */
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm1020e_flush_kern_dcache_area)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1020e_dma_inv_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1020e_dma_clean_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1020e_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1020e_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm1020e_dma_clean_range
+	bcs	arm1020e_dma_inv_range
+	b	arm1020e_dma_flush_range
+ENDPROC(arm1020e_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1020e_dma_unmap_area)
+	ret	lr
+ENDPROC(arm1020e_dma_unmap_area)
+
+	.globl	arm1020e_flush_kern_cache_louis
+	.equ	arm1020e_flush_kern_cache_louis, arm1020e_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm1020e
+
+	.align	5
+ENTRY(cpu_arm1020e_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1020e_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1020e_switch_mm)
+#ifdef CONFIG_MMU
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r3, c7, c10, 4
+	mov	r1, #0xF			@ 16 segments
+1:	mov	r3, #0x3F			@ 64 entries
+2:	mov	ip, r3, LSL #26 		@ shift up entry
+	orr	ip, ip, r1, LSL #5		@ shift in/up index
+	mcr	p15, 0, ip, c7, c14, 2		@ Clean & Inval DCache entry
+	mov	ip, #0
+	subs	r3, r3, #1
+	cmp	r3, #0
+	bge	2b				@ entries 3F to 0
+	subs	r1, r1, #1
+	cmp	r1, #0
+	bge	1b				@ segments 15 to 0
+
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+        
+/*
+ * cpu_arm1020e_set_pte(ptep, pte)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1020e_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm1020e_setup, #function
+__arm1020e_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	adr	r5, arm1020e_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	ret	lr
+	.size	__arm1020e_setup, . - __arm1020e_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 */
+	.type	arm1020e_crval, #object
+arm1020e_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm1020e, dabort=v4t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+	string	cpu_arm1020e_name, "ARM1020E"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm1020e_proc_info,#object
+__arm1020e_proc_info:
+	.long	0x4105a200			@ ARM 1020TE (Architecture v5TE)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm1020e_setup, __arm1020e_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
+	.long	cpu_arm1020e_name
+	.long	arm1020e_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1020e_cache_fns
+	.size	__arm1020e_proc_info, . - __arm1020e_proc_info
diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S
new file mode 100644
index 0000000..dbb2413
--- /dev/null
+++ b/arch/arm/mm/proc-arm1022.S
@@ -0,0 +1,474 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1022.S: MMU functions for ARM1022E
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM1022E.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1022_proc_init()
+ */
+ENTRY(cpu_arm1022_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm1022_proc_fin()
+ */
+ENTRY(cpu_arm1022_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm1022_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm1022_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm1022_reset)
+	.popsection
+
+/*
+ * cpu_arm1022_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1022_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm1022_flush_icache_all)
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+#endif
+	ret	lr
+ENDPROC(arm1022_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1022_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1022_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1022_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm1022_flush_kern_dcache_area)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1022_dma_inv_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1022_dma_clean_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1022_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1022_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm1022_dma_clean_range
+	bcs	arm1022_dma_inv_range
+	b	arm1022_dma_flush_range
+ENDPROC(arm1022_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1022_dma_unmap_area)
+	ret	lr
+ENDPROC(arm1022_dma_unmap_area)
+
+	.globl	arm1022_flush_kern_cache_louis
+	.equ	arm1022_flush_kern_cache_louis, arm1022_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm1022
+
+	.align	5
+ENTRY(cpu_arm1022_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1022_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1022_switch_mm)
+#ifdef CONFIG_MMU
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 16 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 15 to 0
+#endif
+	mov	r1, #0
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+        
+/*
+ * cpu_arm1022_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1022_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm1022_setup, #function
+__arm1022_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	adr	r5, arm1022_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R..............
+#endif
+	ret	lr
+	.size	__arm1022_setup, . - __arm1022_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 * 
+	 */
+	.type	arm1022_crval, #object
+arm1022_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001930
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm1022, dabort=v4t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+	string	cpu_arm1022_name, "ARM1022"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm1022_proc_info,#object
+__arm1022_proc_info:
+	.long	0x4105a220			@ ARM 1022E (v5TE)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm1022_setup, __arm1022_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_EDSP
+	.long	cpu_arm1022_name
+	.long	arm1022_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1022_cache_fns
+	.size	__arm1022_proc_info, . - __arm1022_proc_info
diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S
new file mode 100644
index 0000000..0b37b2c
--- /dev/null
+++ b/arch/arm/mm/proc-arm1026.S
@@ -0,0 +1,468 @@
+/*
+ *  linux/arch/arm/mm/proc-arm1026.S: MMU functions for ARM1026EJ-S
+ *
+ *  Copyright (C) 2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM1026EJ-S.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	16
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	32768
+
+	.text
+/*
+ * cpu_arm1026_proc_init()
+ */
+ENTRY(cpu_arm1026_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm1026_proc_fin()
+ */
+ENTRY(cpu_arm1026_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000 		@ ...i............
+	bic	r0, r0, #0x000e 		@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm1026_reset(loc)
+ *
+ * Perform a soft reset of the system.	Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm1026_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f 		@ ............wcam
+	bic	ip, ip, #0x1100 		@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm1026_reset)
+	.popsection
+
+/*
+ * cpu_arm1026_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm1026_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+	.align	5
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm1026_flush_icache_all)
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+#endif
+	ret	lr
+ENDPROC(arm1026_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm1026_flush_user_cache_all)
+	/* FALLTHROUGH */
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm1026_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+	bne	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for this space
+ */
+ENTRY(arm1026_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	tst	r2, #VM_EXEC
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+#endif
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_coherent_kern_range)
+	/* FALLTHROUGH */
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_coherent_user_range)
+	mov	ip, #0
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm1026_flush_kern_dcache_area)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1026_dma_inv_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm1026_dma_clean_range:
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm1026_dma_flush_range)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1026_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm1026_dma_clean_range
+	bcs	arm1026_dma_inv_range
+	b	arm1026_dma_flush_range
+ENDPROC(arm1026_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm1026_dma_unmap_area)
+	ret	lr
+ENDPROC(arm1026_dma_unmap_area)
+
+	.globl	arm1026_flush_kern_cache_louis
+	.equ	arm1026_flush_kern_cache_louis, arm1026_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm1026
+
+	.align	5
+ENTRY(cpu_arm1026_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mov	ip, #0
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm1026_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm1026_switch_mm)
+#ifdef CONFIG_MMU
+	mov	r1, #0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+1:	mrc	p15, 0, r15, c7, c14, 3		@ test, clean, invalidate
+	bne	1b
+#endif
+#ifndef CONFIG_CPU_ICACHE_DISABLE
+	mcr	p15, 0, r1, c7, c5, 0		@ invalidate I cache
+#endif
+	mcr	p15, 0, r1, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, r1, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+        
+/*
+ * cpu_arm1026_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm1026_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_DISABLE
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm1026_setup, #function
+__arm1026_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+	mcr	p15, 0, r4, c2, c0		@ load page table pointer
+#endif
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ explicitly disable writeback
+	mcr	p15, 7, r0, c15, c0, 0
+#endif
+	adr	r5, arm1026_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000 		@ .R.. .... .... ....
+#endif
+	ret	lr
+	.size	__arm1026_setup, . - __arm1026_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..11 0101
+	 * 
+	 */
+	.type	arm1026_crval, #object
+arm1026_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003935, ucset=0x00001934
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm1026, dabort=v5t_early_abort, pabort=legacy_pabort
+
+	.section .rodata
+
+	string	cpu_arch_name, "armv5tej"
+	string	cpu_elf_name, "v5"
+	.align
+	string	cpu_arm1026_name, "ARM1026EJ-S"
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm1026_proc_info,#object
+__arm1026_proc_info:
+	.long	0x4106a260			@ ARM 1026EJ-S (v5TEJ)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm1026_setup, __arm1026_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	.long	cpu_arm1026_name
+	.long	arm1026_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm1026_cache_fns
+	.size	__arm1026_proc_info, . - __arm1026_proc_info
diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S
new file mode 100644
index 0000000..3651cd7
--- /dev/null
+++ b/arch/arm/mm/proc-arm720.S
@@ -0,0 +1,219 @@
+/*
+ *  linux/arch/arm/mm/proc-arm720.S: MMU functions for ARM720
+ *
+ *  Copyright (C) 2000 Steve Hill (sjhill@cotw.com)
+ *                     Rob Scott (rscott@mtrob.fdns.net)
+ *  Copyright (C) 2000 ARM Limited, Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2004.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the ARM720T.  The ARM720T has a writethrough IDC
+ * cache, so we don't need to clean it.
+ *
+ *  Changelog:
+ *   05-09-2000 SJH	Created by moving 720 specific functions
+ *			out of 'proc-arm6,7.S' per RMK discussion
+ *   07-25-2000 SJH	Added idle function.
+ *   08-25-2000	DBS	Updated for integration of ARM Ltd version.
+ *   04-20-2004 HSC	modified for non-paged memory management mode.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * Function: arm720_proc_init (void)
+ *	   : arm720_proc_fin (void)
+ *
+ * Notes   : This processor does not require these
+ */
+ENTRY(cpu_arm720_dcache_clean_area)
+ENTRY(cpu_arm720_proc_init)
+		ret	lr
+
+ENTRY(cpu_arm720_proc_fin)
+		mrc	p15, 0, r0, c1, c0, 0
+		bic	r0, r0, #0x1000			@ ...i............
+		bic	r0, r0, #0x000e			@ ............wca.
+		mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+		ret	lr
+
+/*
+ * Function: arm720_proc_do_idle(void)
+ * Params  : r0 = unused
+ * Purpose : put the processor in proper idle mode
+ */
+ENTRY(cpu_arm720_do_idle)
+		ret	lr
+
+/*
+ * Function: arm720_switch_mm(unsigned long pgd_phys)
+ * Params  : pgd_phys	Physical address of page table
+ * Purpose : Perform a task switch, saving the old process' state and restoring
+ *	     the new.
+ */
+ENTRY(cpu_arm720_switch_mm)
+#ifdef CONFIG_MMU
+		mov	r1, #0
+		mcr	p15, 0, r1, c7, c7, 0		@ invalidate cache
+		mcr	p15, 0, r0, c2, c0, 0		@ update page table ptr
+		mcr	p15, 0, r1, c8, c7, 0		@ flush TLB (v4)
+#endif
+		ret	lr
+
+/*
+ * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext)
+ * Params  : r0 = Address to set
+ *	   : r1 = value to set
+ * Purpose : Set a PTE and flush it out of any WB cache
+ */
+	.align	5
+ENTRY(cpu_arm720_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext wc_disable=0
+#endif
+	ret	lr
+
+/*
+ * Function: arm720_reset
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+		.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm720_reset)
+		mov	ip, #0
+		mcr	p15, 0, ip, c7, c7, 0		@ invalidate cache
+#ifdef CONFIG_MMU
+		mcr	p15, 0, ip, c8, c7, 0		@ flush TLB (v4)
+#endif
+		mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
+		bic	ip, ip, #0x000f			@ ............wcam
+		bic	ip, ip, #0x2100			@ ..v....s........
+		mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+		ret	r0
+ENDPROC(cpu_arm720_reset)
+		.popsection
+
+	.type	__arm710_setup, #function
+__arm710_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
+#endif
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	ldr	r5, arm710_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, arm710_cr1_set
+	orr	r0, r0, r5
+	ret	lr				@ __ret (head.S)
+	.size	__arm710_setup, . - __arm710_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .... 0001 ..11 1101
+	 * 
+	 */
+	.type	arm710_cr1_clear, #object
+	.type	arm710_cr1_set, #object
+arm710_cr1_clear:
+	.word	0x0f3f
+arm710_cr1_set:
+	.word	0x013d
+
+	.type	__arm720_setup, #function
+__arm720_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7, 0		@ invalidate caches
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7, 0		@ flush TLB (v4)
+#endif
+	adr	r5, arm720_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr				@ __ret (head.S)
+	.size	__arm720_setup, . - __arm720_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..1. 1001 ..11 1101
+	 * 
+	 */
+	.type	arm720_crval, #object
+arm720_crval:
+	crval	clear=0x00002f3f, mmuset=0x0000213d, ucset=0x00000130
+
+		__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm720, dabort=v4t_late_abort, pabort=legacy_pabort
+
+		.section ".rodata"
+
+	string	cpu_arch_name, "armv4t"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm710_name, "ARM710T"
+	string	cpu_arm720_name, "ARM720T"
+
+		.align
+
+/*
+ * See <asm/procinfo.h> for a definition of this structure.
+ */
+	
+		.section ".proc.info.init", #alloc
+
+.macro arm720_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cpu_flush:req
+		.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+		.long	\cpu_val
+		.long	\cpu_mask
+		.long   PMD_TYPE_SECT | \
+			PMD_SECT_BUFFERABLE | \
+			PMD_SECT_CACHEABLE | \
+			PMD_BIT4 | \
+			PMD_SECT_AP_WRITE | \
+			PMD_SECT_AP_READ
+		.long   PMD_TYPE_SECT | \
+			PMD_BIT4 | \
+			PMD_SECT_AP_WRITE | \
+			PMD_SECT_AP_READ
+		initfn	\cpu_flush, __\name\()_proc_info	@ cpu_flush
+		.long	cpu_arch_name				@ arch_name
+		.long	cpu_elf_name				@ elf_name
+		.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB	@ elf_hwcap
+		.long	\cpu_name
+		.long	arm720_processor_functions
+		.long	v4_tlb_fns
+		.long	v4wt_user_fns
+		.long	v4_cache_fns
+		.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	arm720_proc_info arm710, 0x41807100, 0xffffff00, cpu_arm710_name, __arm710_setup
+	arm720_proc_info arm720, 0x41807200, 0xffffff00, cpu_arm720_name, __arm720_setup
diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S
new file mode 100644
index 0000000..024fb77
--- /dev/null
+++ b/arch/arm/mm/proc-arm740.S
@@ -0,0 +1,151 @@
+/*
+ *  linux/arch/arm/mm/arm740.S: utility functions for ARM740
+ *
+ *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+	.text
+/*
+ * cpu_arm740_proc_init()
+ * cpu_arm740_do_idle()
+ * cpu_arm740_dcache_clean_area()
+ * cpu_arm740_switch_mm()
+ *
+ * These are not required.
+ */
+ENTRY(cpu_arm740_proc_init)
+ENTRY(cpu_arm740_do_idle)
+ENTRY(cpu_arm740_dcache_clean_area)
+ENTRY(cpu_arm740_switch_mm)
+	ret	lr
+
+/*
+ * cpu_arm740_proc_fin()
+ */
+ENTRY(cpu_arm740_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0
+	bic	r0, r0, #0x3f000000		@ bank/f/lock/s
+	bic	r0, r0, #0x0000000c		@ w-buffer/cache
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm740_reset(loc)
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm740_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c0, 0		@ invalidate cache
+	mrc	p15, 0, ip, c1, c0, 0		@ get ctrl register
+	bic	ip, ip, #0x0000000c		@ ............wc..
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm740_reset)
+	.popsection
+
+	.type	__arm740_setup, #function
+__arm740_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c0, 0		@ invalidate caches
+
+	mcr	p15, 0, r0, c6, c3		@ disable area 3~7
+	mcr	p15, 0, r0, c6, c4
+	mcr	p15, 0, r0, c6, c5
+	mcr	p15, 0, r0, c6, c6
+	mcr	p15, 0, r0, c6, c7
+
+	mov	r0, #0x0000003F			@ base = 0, size = 4GB
+	mcr	p15, 0, r0, c6,	c0		@ set area 0, default
+
+	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
+	ldr	r3, =(CONFIG_DRAM_SIZE >> 12)	@ size of RAM (must be >= 4KB)
+	mov	r4, #10				@ 11 is the minimum (4KB)
+1:	add	r4, r4, #1			@ area size *= 2
+	movs	r3, r3, lsr #1
+	bne	1b				@ count not zero r-shift
+	orr	r0, r0, r4, lsl #1		@ the area register value
+	orr	r0, r0, #1			@ set enable bit
+	mcr	p15, 0, r0, c6,	c1		@ set area 1, RAM
+
+	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
+	ldr	r3, =(CONFIG_FLASH_SIZE >> 12)	@ size of FLASH (must be >= 4KB)
+	cmp	r3, #0
+	moveq	r0, #0
+	beq	2f
+	mov	r4, #10				@ 11 is the minimum (4KB)
+1:	add	r4, r4, #1			@ area size *= 2
+	movs	r3, r3, lsr #1
+	bne	1b				@ count not zero r-shift
+	orr	r0, r0, r4, lsl #1		@ the area register value
+	orr	r0, r0, #1			@ set enable bit
+2:	mcr	p15, 0, r0, c6,	c2		@ set area 2, ROM/FLASH
+
+	mov	r0, #0x06
+	mcr	p15, 0, r0, c2, c0		@ Region 1&2 cacheable
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #0x00			@ disable whole write buffer
+#else
+	mov	r0, #0x02			@ Region 1 write bufferred
+#endif
+	mcr	p15, 0, r0, c3, c0
+
+	mov	r0, #0x10000
+	sub	r0, r0, #1			@ r0 = 0xffff
+	mcr	p15, 0, r0, c5, c0		@ all read/write access
+
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	bic	r0, r0, #0x3F000000		@ set to standard caching mode
+						@ need some benchmark
+	orr	r0, r0, #0x0000000d		@ MPU/Cache/WB
+
+	ret	lr
+
+	.size	__arm740_setup, . - __arm740_setup
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm740, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm740_name, "ARM740T"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+	.type	__arm740_proc_info,#object
+__arm740_proc_info:
+	.long	0x41807400
+	.long	0xfffffff0
+	.long	0
+	.long	0
+	initfn	__arm740_setup, __arm740_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_26BIT
+	.long	cpu_arm740_name
+	.long	arm740_processor_functions
+	.long	0
+	.long	0
+	.long	v4_cache_fns			@ cache model
+	.size	__arm740_proc_info, . - __arm740_proc_info
diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S
new file mode 100644
index 0000000..25472d9
--- /dev/null
+++ b/arch/arm/mm/proc-arm7tdmi.S
@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/arm/mm/proc-arm7tdmi.S: utility functions for ARM7TDMI
+ *
+ *  Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+	.text
+/*
+ * cpu_arm7tdmi_proc_init()
+ * cpu_arm7tdmi_do_idle()
+ * cpu_arm7tdmi_dcache_clean_area()
+ * cpu_arm7tdmi_switch_mm()
+ *
+ * These are not required.
+ */
+ENTRY(cpu_arm7tdmi_proc_init)
+ENTRY(cpu_arm7tdmi_do_idle)
+ENTRY(cpu_arm7tdmi_dcache_clean_area)
+ENTRY(cpu_arm7tdmi_switch_mm)
+		ret	lr
+
+/*
+ * cpu_arm7tdmi_proc_fin()
+ */
+ENTRY(cpu_arm7tdmi_proc_fin)
+		ret	lr
+
+/*
+ * Function: cpu_arm7tdmi_reset(loc)
+ * Params  : loc(r0)	address to jump to
+ * Purpose : Sets up everything for a reset and jump to the location for soft reset.
+ */
+		.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm7tdmi_reset)
+		ret	r0
+ENDPROC(cpu_arm7tdmi_reset)
+		.popsection
+
+		.type	__arm7tdmi_setup, #function
+__arm7tdmi_setup:
+		ret	lr
+		.size	__arm7tdmi_setup, . - __arm7tdmi_setup
+
+		__INITDATA
+
+		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+		define_processor_functions arm7tdmi, dabort=v4t_late_abort, pabort=legacy_pabort, nommu=1
+
+		.section ".rodata"
+
+		string	cpu_arch_name, "armv4t"
+		string	cpu_elf_name, "v4"
+		string	cpu_arm7tdmi_name, "ARM7TDMI"
+		string	cpu_triscenda7_name, "Triscend-A7x"
+		string	cpu_at91_name, "Atmel-AT91M40xxx"
+		string	cpu_s3c3410_name, "Samsung-S3C3410"
+		string	cpu_s3c44b0x_name, "Samsung-S3C44B0x"
+		string	cpu_s3c4510b_name, "Samsung-S3C4510B"
+		string	cpu_s3c4530_name, "Samsung-S3C4530"
+		string	cpu_netarm_name, "NETARM"
+
+		.align
+
+		.section ".proc.info.init", #alloc
+
+.macro arm7tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, \
+	extra_hwcaps=0
+		.type	__\name\()_proc_info, #object
+__\name\()_proc_info:
+		.long	\cpu_val
+		.long	\cpu_mask
+		.long	0
+		.long	0
+		initfn	__arm7tdmi_setup, __\name\()_proc_info
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_26BIT | ( \extra_hwcaps )
+		.long	\cpu_name
+		.long	arm7tdmi_processor_functions
+		.long	0
+		.long	0
+		.long	v4_cache_fns
+		.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+		arm7tdmi_proc_info arm7tdmi, 0x41007700, 0xfff8ff00, \
+			cpu_arm7tdmi_name
+		arm7tdmi_proc_info triscenda7, 0x0001d2ff, 0x0001ffff, \
+			cpu_triscenda7_name, extra_hwcaps=HWCAP_THUMB
+		arm7tdmi_proc_info at91, 0x14000040, 0xfff000e0, \
+			cpu_at91_name, extra_hwcaps=HWCAP_THUMB
+		arm7tdmi_proc_info s3c4510b, 0x36365000, 0xfffff000, \
+			cpu_s3c4510b_name, extra_hwcaps=HWCAP_THUMB
+		arm7tdmi_proc_info s3c4530, 0x4c000000, 0xfff000e0, \
+			cpu_s3c4530_name, extra_hwcaps=HWCAP_THUMB
+		arm7tdmi_proc_info s3c3410, 0x34100000, 0xffff0000, \
+			cpu_s3c3410_name, extra_hwcaps=HWCAP_THUMB
+		arm7tdmi_proc_info s3c44b0x, 0x44b00000, 0xffff0000, \
+			cpu_s3c44b0x_name, extra_hwcaps=HWCAP_THUMB
diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S
new file mode 100644
index 0000000..7a14bd4
--- /dev/null
+++ b/arch/arm/mm/proc-arm920.S
@@ -0,0 +1,480 @@
+/*
+ *  linux/arch/arm/mm/proc-arm920.S: MMU functions for ARM920
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm920.
+ *
+ *  CONFIG_CPU_ARM920_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	8
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	65536
+
+
+	.text
+/*
+ * cpu_arm920_proc_init()
+ */
+ENTRY(cpu_arm920_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm920_proc_fin()
+ */
+ENTRY(cpu_arm920_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm920_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm920_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm920_reset)
+	.popsection
+
+/*
+ * cpu_arm920_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm920_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm920_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm920_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(arm920_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm920_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags for address space
+ */
+ENTRY(arm920_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm920_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm920_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm920_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm920_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm920_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm920_dma_clean_range
+	bcs	arm920_dma_inv_range
+	b	arm920_dma_flush_range
+ENDPROC(arm920_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm920_dma_unmap_area)
+	ret	lr
+ENDPROC(arm920_dma_unmap_area)
+
+	.globl	arm920_flush_kern_cache_louis
+	.equ	arm920_flush_kern_cache_louis, arm920_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm920
+#endif
+
+
+ENTRY(cpu_arm920_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm920_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm920_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+@ && Re-written to use Index Ops.
+@ && Uses registers r1, r3 and ip
+
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+
+/*
+ * cpu_arm920_set_pte(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm920_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
+.globl	cpu_arm920_suspend_size
+.equ	cpu_arm920_suspend_size, 4 * 3
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_arm920_do_suspend)
+	stmfd	sp!, {r4 - r6, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ PID
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(cpu_arm920_do_suspend)
+
+ENTRY(cpu_arm920_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
+	ldmia	r0, {r4 - r6}
+	mcr	p15, 0, r4, c13, c0, 0	@ PID
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_arm920_do_resume)
+#endif
+
+	.type	__arm920_setup, #function
+__arm920_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	adr	r5, arm920_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__arm920_setup, . - __arm920_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 0101
+	 * 
+	 */
+	.type	arm920_crval, #object
+arm920_crval:
+	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm920, dabort=v4t_early_abort, pabort=legacy_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4t"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm920_name, "ARM920T"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm920_proc_info,#object
+__arm920_proc_info:
+	.long	0x41009200
+	.long	0xff00fff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm920_setup, __arm920_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm920_name
+	.long	arm920_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.long	arm920_cache_fns
+#else
+	.long	v4wt_cache_fns
+#endif
+	.size	__arm920_proc_info, . - __arm920_proc_info
diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S
new file mode 100644
index 0000000..edccfcd
--- /dev/null
+++ b/arch/arm/mm/proc-arm922.S
@@ -0,0 +1,458 @@
+/*
+ *  linux/arch/arm/mm/proc-arm922.S: MMU functions for ARM922
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2001 Altera Corporation
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm922.
+ *
+ *  CONFIG_CPU_ARM922_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	4
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	64
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.  (I think this should
+ * be 32768).
+ */
+#define CACHE_DLIMIT	8192
+
+
+	.text
+/*
+ * cpu_arm922_proc_init()
+ */
+ENTRY(cpu_arm922_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm922_proc_fin()
+ */
+ENTRY(cpu_arm922_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm922_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm922_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm922_reset)
+	.popsection
+
+/*
+ * cpu_arm922_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm922_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm922_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm922_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm922_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm922_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 8 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm922_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm922_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm922_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm922_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm922_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm922_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm922_dma_clean_range
+	bcs	arm922_dma_inv_range
+	b	arm922_dma_flush_range
+ENDPROC(arm922_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm922_dma_unmap_area)
+	ret	lr
+ENDPROC(arm922_dma_unmap_area)
+
+	.globl	arm922_flush_kern_cache_louis
+	.equ	arm922_flush_kern_cache_louis, arm922_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm922
+#endif
+
+
+ENTRY(cpu_arm922_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm922_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm922_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+@ && Re-written to use Index Ops.
+@ && Uses registers r1, r3 and ip
+
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 5	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 5
+	bcs	1b				@ segments 7 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+
+/*
+ * cpu_arm922_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm922_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm922_setup, #function
+__arm922_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	adr	r5, arm922_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__arm922_setup, . - __arm922_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 0101
+	 * 
+	 */
+	.type	arm922_crval, #object
+arm922_crval:
+	crval	clear=0x00003f3f, mmuset=0x00003135, ucset=0x00001130
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm922, dabort=v4t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4t"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm922_name, "ARM922T"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm922_proc_info,#object
+__arm922_proc_info:
+	.long	0x41009220
+	.long	0xff00fff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm922_setup, __arm922_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm922_name
+	.long	arm922_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	.long	arm922_cache_fns
+#else
+	.long	v4wt_cache_fns
+#endif
+	.size	__arm922_proc_info, . - __arm922_proc_info
diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S
new file mode 100644
index 0000000..32a47cc
--- /dev/null
+++ b/arch/arm/mm/proc-arm925.S
@@ -0,0 +1,523 @@
+/*
+ *  linux/arch/arm/mm/arm925.S: MMU functions for ARM925
+ *
+ *  Copyright (C) 1999,2000 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  Copyright (C) 2002 RidgeRun, Inc.
+ *  Copyright (C) 2002-2003 MontaVista Software, Inc.
+ *
+ *  Update for Linux-2.6 and cache flush improvements
+ *  Copyright (C) 2004 Nokia Corporation by Tony Lindgren <tony@atomide.com>
+ *
+ *  hacked for non-paged-MM by Hyok S. Choi, 2004.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm925.
+ *
+ *  CONFIG_CPU_ARM925_CPU_IDLE -> nohlt
+ *
+ * Some additional notes based on deciphering the TI TRM on OMAP-5910:
+ *
+ * NOTE1: The TI925T Configuration Register bit "D-cache clean and flush
+ *	  entry mode" must be 0 to flush the entries in both segments
+ *	  at once. This is the default value. See TRM 2-20 and 2-24 for
+ *	  more information.
+ *
+ * NOTE2: Default is the "D-cache clean and flush entry mode". It looks
+ *	  like the "Transparent mode" must be on for partial cache flushes
+ *	  to work in this mode. This mode only works with 16-bit external
+ *	  memory. See TRM 2-24 for more information.
+ *
+ * NOTE3: Write-back cache flushing seems to be flakey with devices using
+ *        direct memory access, such as USB OHCI. The workaround is to use
+ *        write-through cache with CONFIG_CPU_DCACHE_WRITETHROUGH (this is
+ *        the default for OMAP-1510).
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * The size of one data cache line.
+ */
+#define CACHE_DLINESIZE	16
+
+/*
+ * The number of data cache segments.
+ */
+#define CACHE_DSEGMENTS	2
+
+/*
+ * The number of lines in a cache segment.
+ */
+#define CACHE_DENTRIES	256
+
+/*
+ * This is the size at which it becomes more efficient to
+ * clean the whole cache, rather than using the individual
+ * cache line maintenance instructions.
+ */
+#define CACHE_DLIMIT	8192
+
+	.text
+/*
+ * cpu_arm925_proc_init()
+ */
+ENTRY(cpu_arm925_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm925_proc_fin()
+ */
+ENTRY(cpu_arm925_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm925_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm925_reset)
+	/* Send software reset to MPU and DSP */
+	mov	ip, #0xff000000
+	orr	ip, ip, #0x00fe0000
+	orr	ip, ip, #0x0000ce00
+	mov	r4, #1
+	strh	r4, [ip, #0x10]
+ENDPROC(cpu_arm925_reset)
+	.popsection
+
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+
+/*
+ * cpu_arm925_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	10
+ENTRY(cpu_arm925_do_idle)
+	mov	r0, #0
+	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
+	bic	r2, r1, #1 << 12
+	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm925_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm925_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm925_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm925_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	/* Flush entries in both segments at once, see NOTE1 above */
+	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean+invalidate D index
+	subs	r3, r3, #1 << 4
+	bcs	2b				@ entries 255 to 0
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm925_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#else
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#endif
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm925_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm925_dma_inv_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+#endif
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm925_dma_clean_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm925_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+#else
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm925_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm925_dma_clean_range
+	bcs	arm925_dma_inv_range
+	b	arm925_dma_flush_range
+ENDPROC(arm925_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm925_dma_unmap_area)
+	ret	lr
+ENDPROC(arm925_dma_unmap_area)
+
+	.globl	arm925_flush_kern_cache_louis
+	.equ	arm925_flush_kern_cache_louis, arm925_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm925
+
+ENTRY(cpu_arm925_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm925_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm925_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	/* Flush entries in bothe segments at once, see NOTE1 above */
+	mov	r3, #(CACHE_DENTRIES - 1) << 4	@ 256 entries in segment
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean & invalidate D index
+	subs	r3, r3, #1 << 4
+	bcs	2b				@ entries 255 to 0
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+
+/*
+ * cpu_arm925_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm925_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif /* CONFIG_MMU */
+	ret	lr
+
+	.type	__arm925_setup, #function
+__arm925_setup:
+	mov	r0, #0
+
+	/* Transparent on, D-cache clean & flush mode. See  NOTE2 above */
+        orr     r0,r0,#1 << 1			@ transparent mode on
+        mcr     p15, 0, r0, c15, c1, 0          @ write TI config register
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ disable write-back on caches explicitly
+	mcr	p15, 7, r0, c15, c0, 0
+#endif
+
+	adr	r5, arm925_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000			@ .1.. .... .... ....
+#endif
+	ret	lr
+	.size	__arm925_setup, . - __arm925_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 0001 ..11 1101
+	 * 
+	 */
+	.type	arm925_crval, #object
+arm925_crval:
+	crval	clear=0x00007f3f, mmuset=0x0000313d, ucset=0x00001130
+
+	__INITDATA
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm925, dabort=v4t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4t"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm925_name, "ARM925T"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+.macro arm925_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
+	.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+	.long	\cpu_val
+	.long	\cpu_mask
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm925_setup, __\name\()_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm925_name
+	.long	arm925_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm925_cache_fns
+	.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	arm925_proc_info arm925, 0x54029250, 0xfffffff0, cpu_arm925_name
+	arm925_proc_info arm915, 0x54029150, 0xfffffff0, cpu_arm925_name
diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S
new file mode 100644
index 0000000..fb827c6
--- /dev/null
+++ b/arch/arm/mm/proc-arm926.S
@@ -0,0 +1,502 @@
+/*
+ *  linux/arch/arm/mm/proc-arm926.S: MMU functions for ARM926EJ-S
+ *
+ *  Copyright (C) 1999-2001 ARM Limited
+ *  Copyright (C) 2000 Deep Blue Solutions Ltd.
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the arm926.
+ *
+ *  CONFIG_CPU_ARM926_CPU_IDLE -> nohlt
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define CACHE_DLIMIT	16384
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define CACHE_DLINESIZE	32
+
+	.text
+/*
+ * cpu_arm926_proc_init()
+ */
+ENTRY(cpu_arm926_proc_init)
+	ret	lr
+
+/*
+ * cpu_arm926_proc_fin()
+ */
+ENTRY(cpu_arm926_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm926_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm926_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm926_reset)
+	.popsection
+
+/*
+ * cpu_arm926_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	10
+ENTRY(cpu_arm926_do_idle)
+	mov	r0, #0
+	mrc	p15, 0, r1, c1, c0, 0		@ Read control register
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
+	bic	r2, r1, #1 << 12
+	mrs	r3, cpsr			@ Disable FIQs while Icache
+	orr	ip, r3, #PSR_F_BIT		@ is disabled
+	msr	cpsr_c, ip
+	mcr	p15, 0, r2, c1, c0, 0		@ Disable I cache
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	mcr	p15, 0, r1, c1, c0, 0		@ Restore ICache enable
+	msr	cpsr_c, r3			@ Restore FIQ state
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm926_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm926_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(arm926_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm926_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+	bne	1b
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm926_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#else
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#endif
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm926_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm926_dma_inv_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+#endif
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+arm926_dma_clean_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm926_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+#else
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm926_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm926_dma_clean_range
+	bcs	arm926_dma_inv_range
+	b	arm926_dma_flush_range
+ENDPROC(arm926_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm926_dma_unmap_area)
+	ret	lr
+ENDPROC(arm926_dma_unmap_area)
+
+	.globl	arm926_flush_kern_cache_louis
+	.equ	arm926_flush_kern_cache_louis, arm926_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm926
+
+ENTRY(cpu_arm926_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_arm926_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_arm926_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+@ && 'Clean & Invalidate whole DCache'
+1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+	bne	1b
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	ret	lr
+
+/*
+ * cpu_arm926_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_arm926_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+/* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */
+.globl	cpu_arm926_suspend_size
+.equ	cpu_arm926_suspend_size, 4 * 3
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_arm926_do_suspend)
+	stmfd	sp!, {r4 - r6, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ PID
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(cpu_arm926_do_suspend)
+
+ENTRY(cpu_arm926_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
+	ldmia	r0, {r4 - r6}
+	mcr	p15, 0, r4, c13, c0, 0	@ PID
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_arm926_do_resume)
+#endif
+
+	.type	__arm926_setup, #function
+__arm926_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+
+
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #4				@ disable write-back on caches explicitly
+	mcr	p15, 7, r0, c15, c0, 0
+#endif 
+
+	adr	r5, arm926_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x4000			@ .1.. .... .... ....
+#endif
+	ret	lr
+	.size	__arm926_setup, . - __arm926_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 0001 ..11 0101
+	 * 
+	 */
+	.type	arm926_crval, #object
+arm926_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003135, ucset=0x00001134
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm926, dabort=v5tj_early_abort, pabort=legacy_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5tej"
+	string	cpu_elf_name, "v5"
+	string	cpu_arm926_name, "ARM926EJ-S"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm926_proc_info,#object
+__arm926_proc_info:
+	.long	0x41069260			@ ARM926EJ-S (v5TEJ)
+	.long	0xff0ffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__arm926_setup, __arm926_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA
+	.long	cpu_arm926_name
+	.long	arm926_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	arm926_cache_fns
+	.size	__arm926_proc_info, . - __arm926_proc_info
diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S
new file mode 100644
index 0000000..ee5b66f
--- /dev/null
+++ b/arch/arm/mm/proc-arm940.S
@@ -0,0 +1,364 @@
+/*
+ *  linux/arch/arm/mm/arm940.S: utility functions for ARM940T
+ *
+ *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/* ARM940T has a 4KB DCache comprising 256 lines of 4 words */
+#define CACHE_DLINESIZE	16
+#define CACHE_DSEGMENTS	4
+#define CACHE_DENTRIES	64
+
+	.text
+/*
+ * cpu_arm940_proc_init()
+ * cpu_arm940_switch_mm()
+ *
+ * These are not required.
+ */
+ENTRY(cpu_arm940_proc_init)
+ENTRY(cpu_arm940_switch_mm)
+	ret	lr
+
+/*
+ * cpu_arm940_proc_fin()
+ */
+ENTRY(cpu_arm940_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x00001000		@ i-cache
+	bic	r0, r0, #0x00000004		@ d-cache
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm940_reset(loc)
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm940_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ flush I cache
+	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x00000005		@ .............c.p
+	bic	ip, ip, #0x00001000		@ i-cache
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm940_reset)
+	.popsection
+
+/*
+ * cpu_arm940_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm940_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm940_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm940_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ */
+ENTRY(arm940_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm940_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	/* FALLTHROUGH */
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	There is no efficient way to flush a range of cache entries
+ *	in the specified address range. Thus, flushes all.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+ENTRY(arm940_flush_user_cache_range)
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
+#else
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 4
+	bcs	1b				@ segments 3 to 0
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm940_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm940_coherent_user_range)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(arm940_flush_kern_dcache_area)
+	mov	r0, #0
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 4
+	bcs	1b				@ segments 7 to 0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	There is no efficient way to invalidate a specifid virtual
+ *	address range. Thus, invalidates all.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+arm940_dma_inv_range:
+	mov	ip, #0
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c6, 2		@ flush D entry
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 4
+	bcs	1b				@ segments 7 to 0
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	There is no efficient way to clean a specifid virtual
+ *	address range. Thus, cleans all.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+arm940_dma_clean_range:
+ENTRY(cpu_arm940_dcache_clean_area)
+	mov	ip, #0
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:	mcr	p15, 0, r3, c7, c10, 2		@ clean D entry
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 4
+	bcs	1b				@ segments 7 to 0
+#endif
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	There is no efficient way to clean and invalidate a specifid
+ *	virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm940_dma_flush_range)
+	mov	ip, #0
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 4	@ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 26 @ 64 entries
+2:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D entry
+#else
+	mcr	p15, 0, r3, c7, c6, 2		@ invalidate D entry
+#endif
+	subs	r3, r3, #1 << 26
+	bcs	2b				@ entries 63 to 0
+	subs	r1, r1, #1 << 4
+	bcs	1b				@ segments 7 to 0
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm940_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm940_dma_clean_range
+	bcs	arm940_dma_inv_range
+	b	arm940_dma_flush_range
+ENDPROC(arm940_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm940_dma_unmap_area)
+	ret	lr
+ENDPROC(arm940_dma_unmap_area)
+
+	.globl	arm940_flush_kern_cache_louis
+	.equ	arm940_flush_kern_cache_louis, arm940_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm940
+
+	.type	__arm940_setup, #function
+__arm940_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c6, 0		@ invalidate D cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+
+	mcr	p15, 0, r0, c6, c3, 0		@ disable data area 3~7
+	mcr	p15, 0, r0, c6, c4, 0
+	mcr	p15, 0, r0, c6, c5, 0
+	mcr	p15, 0, r0, c6, c6, 0
+	mcr	p15, 0, r0, c6, c7, 0
+
+	mcr	p15, 0, r0, c6, c3, 1		@ disable instruction area 3~7
+	mcr	p15, 0, r0, c6, c4, 1
+	mcr	p15, 0, r0, c6, c5, 1
+	mcr	p15, 0, r0, c6, c6, 1
+	mcr	p15, 0, r0, c6, c7, 1
+
+	mov	r0, #0x0000003F			@ base = 0, size = 4GB
+	mcr	p15, 0, r0, c6,	c0, 0		@ set area 0, default
+	mcr	p15, 0, r0, c6,	c0, 1
+
+	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
+	ldr	r7, =CONFIG_DRAM_SIZE >> 12	@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6,	c1, 0		@ set area 1, RAM
+	mcr	p15, 0, r3, c6,	c1, 1
+
+	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r6, #1
+	mcr	p15, 0, r3, c6,	c2, 0		@ set area 2, ROM/FLASH
+	mcr	p15, 0, r3, c6,	c2, 1
+
+	mov	r0, #0x06
+	mcr	p15, 0, r0, c2, c0, 0		@ Region 1&2 cacheable
+	mcr	p15, 0, r0, c2, c0, 1
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #0x00			@ disable whole write buffer
+#else
+	mov	r0, #0x02			@ Region 1 write bufferred
+#endif
+	mcr	p15, 0, r0, c3, c0, 0
+
+	mov	r0, #0x10000
+	sub	r0, r0, #1			@ r0 = 0xffff
+	mcr	p15, 0, r0, c5, c0, 0		@ all read/write access
+	mcr	p15, 0, r0, c5, c0, 1
+
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	orr	r0, r0, #0x00001000		@ I-cache
+	orr	r0, r0, #0x00000005		@ MPU/D-cache
+
+	ret	lr
+
+	.size	__arm940_setup, . - __arm940_setup
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm940, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4t"
+	string	cpu_elf_name, "v4"
+	string	cpu_arm940_name, "ARM940T"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__arm940_proc_info,#object
+__arm940_proc_info:
+	.long	0x41009400
+	.long	0xff00fff0
+	.long	0
+	initfn	__arm940_setup, __arm940_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm940_name
+	.long	arm940_processor_functions
+	.long	0
+	.long	0
+	.long	arm940_cache_fns
+	.size	__arm940_proc_info, . - __arm940_proc_info
+
diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S
new file mode 100644
index 0000000..7361837
--- /dev/null
+++ b/arch/arm/mm/proc-arm946.S
@@ -0,0 +1,419 @@
+/*
+ *  linux/arch/arm/mm/arm946.S: utility functions for ARM946E-S
+ *
+ *  Copyright (C) 2004-2006 Hyok S. Choi (hyok.choi@samsung.com)
+ *
+ *  (Many of cache codes are from proc-arm926.S)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * ARM946E-S is synthesizable to have 0KB to 1MB sized D-Cache,
+ * comprising 256 lines of 32 bytes (8 words).
+ */
+#define CACHE_DSIZE	(CONFIG_CPU_DCACHE_SIZE) /* typically 8KB. */
+#define CACHE_DLINESIZE	32			/* fixed */
+#define CACHE_DSEGMENTS	4			/* fixed */
+#define CACHE_DENTRIES	(CACHE_DSIZE / CACHE_DSEGMENTS / CACHE_DLINESIZE)
+#define CACHE_DLIMIT	(CACHE_DSIZE * 4)	/* benchmark needed */
+
+	.text
+/*
+ * cpu_arm946_proc_init()
+ * cpu_arm946_switch_mm()
+ *
+ * These are not required.
+ */
+ENTRY(cpu_arm946_proc_init)
+ENTRY(cpu_arm946_switch_mm)
+	ret	lr
+
+/*
+ * cpu_arm946_proc_fin()
+ */
+ENTRY(cpu_arm946_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x00001000		@ i-cache
+	bic	r0, r0, #0x00000004		@ d-cache
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_arm946_reset(loc)
+ * Params  : r0 = address to jump to
+ * Notes   : This sets up everything for a reset
+ */
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm946_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ flush I cache
+	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x00000005		@ .............c.p
+	bic	ip, ip, #0x00001000		@ i-cache
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_arm946_reset)
+	.popsection
+
+/*
+ * cpu_arm946_do_idle()
+ */
+	.align	5
+ENTRY(cpu_arm946_do_idle)
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(arm946_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(arm946_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ */
+ENTRY(arm946_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(arm946_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ flush D cache
+#else
+	mov	r1, #(CACHE_DSEGMENTS - 1) << 29 @ 4 segments
+1:	orr	r3, r1, #(CACHE_DENTRIES - 1) << 4 @ n entries
+2:	mcr	p15, 0, r3, c7, c14, 2		@ clean/flush D index
+	subs	r3, r3, #1 << 4
+	bcs	2b				@ entries n to 0
+	subs	r1, r1, #1 << 29
+	bcs	1b				@ segments 3 to 0
+#endif
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ flush I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ * (same as arm926)
+ */
+ENTRY(arm946_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bhs	__flush_whole_cache
+
+1:	tst	r2, #VM_EXEC
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#else
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+#endif
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(arm946_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ * (same as arm926)
+ */
+ENTRY(arm946_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ * (same as arm926)
+ */
+ENTRY(arm946_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ * (same as arm926)
+ */
+arm946_dma_inv_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+#endif
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as arm926)
+ */
+arm946_dma_clean_range:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as arm926)
+ */
+ENTRY(arm946_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+#else
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+#endif
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm946_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	arm946_dma_clean_range
+	bcs	arm946_dma_inv_range
+	b	arm946_dma_flush_range
+ENDPROC(arm946_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(arm946_dma_unmap_area)
+	ret	lr
+ENDPROC(arm946_dma_unmap_area)
+
+	.globl	arm946_flush_kern_cache_louis
+	.equ	arm946_flush_kern_cache_louis, arm946_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions arm946
+
+ENTRY(cpu_arm946_dcache_clean_area)
+#ifndef CONFIG_CPU_DCACHE_WRITETHROUGH
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+	.type	__arm946_setup, #function
+__arm946_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c6, 0		@ invalidate D cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+
+	mcr	p15, 0, r0, c6, c3, 0		@ disable memory region 3~7
+	mcr	p15, 0, r0, c6, c4, 0
+	mcr	p15, 0, r0, c6, c5, 0
+	mcr	p15, 0, r0, c6, c6, 0
+	mcr	p15, 0, r0, c6, c7, 0
+
+	mov	r0, #0x0000003F			@ base = 0, size = 4GB
+	mcr	p15, 0, r0, c6,	c0, 0		@ set region 0, default
+
+	ldr	r0, =(CONFIG_DRAM_BASE & 0xFFFFF000) @ base[31:12] of RAM
+	ldr	r7, =CONFIG_DRAM_SIZE		@ size of RAM (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c1, 0
+
+	ldr	r0, =(CONFIG_FLASH_MEM_BASE & 0xFFFFF000) @ base[31:12] of FLASH
+	ldr	r7, =CONFIG_FLASH_SIZE		@ size of FLASH (must be >= 4KB)
+	pr_val	r3, r0, r7, #1
+	mcr	p15, 0, r3, c6, c2, 0
+
+	mov	r0, #0x06
+	mcr	p15, 0, r0, c2, c0, 0		@ region 1,2 d-cacheable
+	mcr	p15, 0, r0, c2, c0, 1		@ region 1,2 i-cacheable
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mov	r0, #0x00			@ disable whole write buffer
+#else
+	mov	r0, #0x02			@ region 1 write bufferred
+#endif
+	mcr	p15, 0, r0, c3, c0, 0
+
+/*
+ *  Access Permission Settings for future permission control by PU.
+ *
+ *				priv.	user
+ * 	region 0 (whole)	rw	--	: b0001
+ * 	region 1 (RAM)		rw	rw	: b0011
+ * 	region 2 (FLASH)	rw	r-	: b0010
+ *	region 3~7 (none)	--	--	: b0000
+ */
+	mov	r0, #0x00000031
+	orr	r0, r0, #0x00000200
+	mcr	p15, 0, r0, c5, c0, 2		@ set data access permission
+	mcr	p15, 0, r0, c5, c0, 3		@ set inst. access permission
+
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	orr	r0, r0, #0x00001000		@ I-cache
+	orr	r0, r0, #0x00000005		@ MPU/D-cache
+#ifdef CONFIG_CPU_CACHE_ROUND_ROBIN
+	orr	r0, r0, #0x00004000		@ .1.. .... .... ....
+#endif
+	ret	lr
+
+	.size	__arm946_setup, . - __arm946_setup
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions arm946, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5t"
+	string	cpu_arm946_name, "ARM946E-S"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+	.type	__arm946_proc_info,#object
+__arm946_proc_info:
+	.long	0x41009460
+	.long	0xff00fff0
+	.long	0
+	.long	0
+	initfn	__arm946_setup, __arm946_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB
+	.long	cpu_arm946_name
+	.long	arm946_processor_functions
+	.long	0
+	.long	0
+	.long	arm946_cache_fns
+	.size	__arm946_proc_info, . - __arm946_proc_info
+
diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S
new file mode 100644
index 0000000..7fac8c6
--- /dev/null
+++ b/arch/arm/mm/proc-arm9tdmi.S
@@ -0,0 +1,95 @@
+/*
+ *  linux/arch/arm/mm/proc-arm9tdmi.S: utility functions for ARM9TDMI
+ *
+ *  Copyright (C) 2003-2006 Hyok S. Choi <hyok.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+	.text
+/*
+ * cpu_arm9tdmi_proc_init()
+ * cpu_arm9tdmi_do_idle()
+ * cpu_arm9tdmi_dcache_clean_area()
+ * cpu_arm9tdmi_switch_mm()
+ *
+ * These are not required.
+ */
+ENTRY(cpu_arm9tdmi_proc_init)
+ENTRY(cpu_arm9tdmi_do_idle)
+ENTRY(cpu_arm9tdmi_dcache_clean_area)
+ENTRY(cpu_arm9tdmi_switch_mm)
+		ret	lr
+
+/*
+ * cpu_arm9tdmi_proc_fin()
+ */
+ENTRY(cpu_arm9tdmi_proc_fin)
+		ret	lr
+
+/*
+ * Function: cpu_arm9tdmi_reset(loc)
+ * Params  : loc(r0)	address to jump to
+ * Purpose : Sets up everything for a reset and jump to the location for soft reset.
+ */
+		.pushsection	.idmap.text, "ax"
+ENTRY(cpu_arm9tdmi_reset)
+		ret	r0
+ENDPROC(cpu_arm9tdmi_reset)
+		.popsection
+
+		.type	__arm9tdmi_setup, #function
+__arm9tdmi_setup:
+		ret	lr
+		.size	__arm9tdmi_setup, . - __arm9tdmi_setup
+
+		__INITDATA
+
+		@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+		define_processor_functions arm9tdmi, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+
+		.section ".rodata"
+
+		string	cpu_arch_name, "armv4t"
+		string	cpu_elf_name, "v4"
+		string	cpu_arm9tdmi_name, "ARM9TDMI"
+		string	cpu_p2001_name, "P2001"
+
+		.align
+
+		.section ".proc.info.init", #alloc
+
+.macro arm9tdmi_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
+		.type	__\name\()_proc_info, #object
+__\name\()_proc_info:
+		.long	\cpu_val
+		.long	\cpu_mask
+		.long	0
+		.long	0
+		initfn	__arm9tdmi_setup, __\name\()_proc_info
+		.long	cpu_arch_name
+		.long	cpu_elf_name
+		.long	HWCAP_SWP | HWCAP_THUMB | HWCAP_26BIT
+		.long	\cpu_name
+		.long	arm9tdmi_processor_functions
+		.long	0
+		.long	0
+		.long	v4_cache_fns
+		.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	arm9tdmi_proc_info arm9tdmi, 0x41009900, 0xfff8ff00, cpu_arm9tdmi_name
+	arm9tdmi_proc_info p2001, 0x41029000, 0xffffffff, cpu_p2001_name
diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S
new file mode 100644
index 0000000..4001b73
--- /dev/null
+++ b/arch/arm/mm/proc-fa526.S
@@ -0,0 +1,218 @@
+/*
+ *  linux/arch/arm/mm/proc-fa526.S: MMU functions for FA526
+ *
+ *  Written by : Luke Lee
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * These are the low level assembler for performing cache and TLB
+ * functions on the fa526.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+#define CACHE_DLINESIZE	16
+
+	.text
+/*
+ * cpu_fa526_proc_init()
+ */
+ENTRY(cpu_fa526_proc_init)
+	ret	lr
+
+/*
+ * cpu_fa526_proc_fin()
+ */
+ENTRY(cpu_fa526_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	nop
+	nop
+	ret	lr
+
+/*
+ * cpu_fa526_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	4
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_fa526_reset)
+/* TODO: Use CP8 if possible... */
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	bic	ip, ip, #0x0800			@ BTB off
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	nop
+	nop
+	ret	r0
+ENDPROC(cpu_fa526_reset)
+	.popsection
+
+/*
+ * cpu_fa526_do_idle()
+ */
+	.align	4
+ENTRY(cpu_fa526_do_idle)
+	ret	lr
+
+
+ENTRY(cpu_fa526_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_fa526_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	4
+ENTRY(cpu_fa526_switch_mm)
+#ifdef CONFIG_MMU
+	mov	ip, #0
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	mcr	p15, 0, ip, c7, c6, 0		@ invalidate D cache
+#else
+	mcr	p15, 0, ip, c7, c14, 0		@ clean and invalidate whole D cache
+#endif
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c5, 6		@ invalidate BTB since mm changed
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate UTLB
+#endif
+	ret	lr
+
+/*
+ * cpu_fa526_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	4
+ENTRY(cpu_fa526_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+	.type	__fa526_setup, #function
+__fa526_setup:
+	/* On return of this routine, r0 must carry correct flags for CFG register */
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	mcr	p15, 0, r0, c7, c5, 5		@ invalidate IScratchpad RAM
+
+	mov	r0, #1
+	mcr	p15, 0, r0, c1, c1, 0		@ turn-on ECR
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ invalidate BTB All
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+
+	mov	r0, #0x1f			@ Domains 0, 1 = manager, 2 = client
+	mcr	p15, 0, r0, c3, c0		@ load domain access register
+
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	ldr	r5, fa526_cr1_clear
+	bic	r0, r0, r5
+	ldr	r5, fa526_cr1_set
+	orr	r0, r0, r5
+	ret	lr
+	.size	__fa526_setup, . - __fa526_setup
+
+	/*
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 1001 .111 1101
+	 *
+	 */
+	.type	fa526_cr1_clear, #object
+	.type	fa526_cr1_set, #object
+fa526_cr1_clear:
+	.word	0x3f3f
+fa526_cr1_set:
+	.word	0x397D
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions fa526, dabort=v4_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4"
+	string	cpu_elf_name, "v4"
+	string	cpu_fa526_name, "FA526"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__fa526_proc_info,#object
+__fa526_proc_info:
+	.long	0x66015261
+	.long	0xff01fff1
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__fa526_setup, __fa526_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF
+	.long	cpu_fa526_name
+	.long	fa526_processor_functions
+	.long	fa_tlb_fns
+	.long	fa_user_fns
+	.long	fa_cache_fns
+	.size	__fa526_proc_info, . - __fa526_proc_info
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
new file mode 100644
index 0000000..92e08bf
--- /dev/null
+++ b/arch/arm/mm/proc-feroceon.S
@@ -0,0 +1,626 @@
+/*
+ *  linux/arch/arm/mm/proc-feroceon.S: MMU functions for Feroceon
+ *
+ *  Heavily based on proc-arm926.S
+ *  Maintainer: Assaf Hoffman <hoffman@marvell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be invalidated
+ * using the single invalidate entry instructions.  Anything larger
+ * than this, and we go for the whole cache.
+ *
+ * This value should be chosen such that we choose the cheapest
+ * alternative.
+ */
+#define CACHE_DLIMIT	16384
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define CACHE_DLINESIZE	32
+
+	.bss
+	.align 3
+__cache_params_loc:
+	.space	8
+
+	.text
+__cache_params:
+	.word	__cache_params_loc
+
+/*
+ * cpu_feroceon_proc_init()
+ */
+ENTRY(cpu_feroceon_proc_init)
+	mrc	p15, 0, r0, c0, c0, 1		@ read cache type register
+	ldr	r1, __cache_params
+	mov	r2, #(16 << 5)
+	tst	r0, #(1 << 16)			@ get way
+	mov	r0, r0, lsr #18			@ get cache size order
+	movne	r3, #((4 - 1) << 30)		@ 4-way
+	and	r0, r0, #0xf
+	moveq	r3, #0				@ 1-way
+	mov	r2, r2, lsl r0			@ actual cache size
+	movne	r2, r2, lsr #2			@ turned into # of sets
+	sub	r2, r2, #(1 << 5)
+	stmia	r1, {r2, r3}
+	ret	lr
+
+/*
+ * cpu_feroceon_proc_fin()
+ */
+ENTRY(cpu_feroceon_proc_fin)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+	mov	r0, #0
+	mcr	p15, 1, r0, c15, c9, 0		@ clean L2
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_feroceon_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_feroceon_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_feroceon_reset)
+	.popsection
+
+/*
+ * cpu_feroceon_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	5
+ENTRY(cpu_feroceon_do_idle)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain write buffer
+	mcr	p15, 0, r0, c7, c0, 4		@ Wait for interrupt
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(feroceon_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(feroceon_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+	.align	5
+ENTRY(feroceon_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(feroceon_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+
+__flush_whole_cache:
+	ldr	r1, __cache_params
+	ldmia	r1, {r1, r3}
+1:	orr	ip, r1, r3
+2:	mcr	p15, 0, ip, c7, c14, 2		@ clean + invalidate D set/way
+	subs	ip, ip, #(1 << 30)		@ next way
+	bcs	2b
+	subs	r1, r1, #(1 << 5)		@ next set
+	bcs	1b
+
+	tst	r2, #VM_EXEC
+	mov	ip, #0
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ */
+	.align	5
+ENTRY(feroceon_flush_user_cache_range)
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mov	ip, #0
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+	.align	5
+ENTRY(feroceon_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(feroceon_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+	.align	5
+ENTRY(feroceon_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+	.align	5
+ENTRY(feroceon_range_flush_kern_dcache_area)
+	mrs	r2, cpsr
+	add	r1, r0, #PAGE_SZ - CACHE_DLINESIZE	@ top addr is inclusive
+	orr	r3, r2, #PSR_I_BIT
+	msr	cpsr_c, r3			@ disable interrupts
+	mcr	p15, 5, r0, c15, c15, 0		@ D clean/inv range start
+	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
+	msr	cpsr_c, r2			@ restore interrupts
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+	.align	5
+feroceon_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+	.align	5
+feroceon_range_dma_inv_range:
+	mrs	r2, cpsr
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	cmp	r1, r0
+	subne	r1, r1, #1			@ top address is inclusive
+	orr	r3, r2, #PSR_I_BIT
+	msr	cpsr_c, r3			@ disable interrupts
+	mcr	p15, 5, r0, c15, c14, 0		@ D inv range start
+	mcr	p15, 5, r1, c15, c14, 1		@ D inv range top
+	msr	cpsr_c, r2			@ restore interrupts
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+	.align	5
+feroceon_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+	.align	5
+feroceon_range_dma_clean_range:
+	mrs	r2, cpsr
+	cmp	r1, r0
+	subne	r1, r1, #1			@ top address is inclusive
+	orr	r3, r2, #PSR_I_BIT
+	msr	cpsr_c, r3			@ disable interrupts
+	mcr	p15, 5, r0, c15, c13, 0		@ D clean range start
+	mcr	p15, 5, r1, c15, c13, 1		@ D clean range top
+	msr	cpsr_c, r2			@ restore interrupts
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+	.align	5
+ENTRY(feroceon_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+	.align	5
+ENTRY(feroceon_range_dma_flush_range)
+	mrs	r2, cpsr
+	cmp	r1, r0
+	subne	r1, r1, #1			@ top address is inclusive
+	orr	r3, r2, #PSR_I_BIT
+	msr	cpsr_c, r3			@ disable interrupts
+	mcr	p15, 5, r0, c15, c15, 0		@ D clean/inv range start
+	mcr	p15, 5, r1, c15, c15, 1		@ D clean/inv range top
+	msr	cpsr_c, r2			@ restore interrupts
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(feroceon_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	feroceon_dma_clean_range
+	bcs	feroceon_dma_inv_range
+	b	feroceon_dma_flush_range
+ENDPROC(feroceon_dma_map_area)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(feroceon_range_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	feroceon_range_dma_clean_range
+	bcs	feroceon_range_dma_inv_range
+	b	feroceon_range_dma_flush_range
+ENDPROC(feroceon_range_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(feroceon_dma_unmap_area)
+	ret	lr
+ENDPROC(feroceon_dma_unmap_area)
+
+	.globl	feroceon_flush_kern_cache_louis
+	.equ	feroceon_flush_kern_cache_louis, feroceon_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions feroceon
+
+.macro range_alias basename
+	.globl feroceon_range_\basename
+	.type feroceon_range_\basename , %function
+	.equ feroceon_range_\basename , feroceon_\basename
+.endm
+
+/*
+ * Most of the cache functions are unchanged for this case.
+ * Export suitable alias symbols for the unchanged functions:
+ */
+	range_alias flush_icache_all
+	range_alias flush_user_cache_all
+	range_alias flush_kern_cache_all
+	range_alias flush_kern_cache_louis
+	range_alias flush_user_cache_range
+	range_alias coherent_kern_range
+	range_alias coherent_user_range
+	range_alias dma_unmap_area
+
+	define_cache_functions feroceon_range
+
+	.align	5
+ENTRY(cpu_feroceon_dcache_clean_area)
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+	mov	r2, r0
+	mov	r3, r1
+#endif
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+1:	mcr	p15, 1, r2, c15, c9, 1		@ clean L2 entry
+	add	r2, r2, #CACHE_DLINESIZE
+	subs	r3, r3, #CACHE_DLINESIZE
+	bhi	1b
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_feroceon_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_feroceon_switch_mm)
+#ifdef CONFIG_MMU
+	/*
+	 * Note: we wish to call __flush_whole_cache but we need to preserve
+	 * lr to do so.  The only way without touching main memory is to
+	 * use r2 which is normally used to test the VM_EXEC flag, and
+	 * compensate locally for the skipped ops if it is not set.
+	 */
+	mov	r2, lr				@ abuse r2 to preserve lr
+	bl	__flush_whole_cache
+	@ if r2 contains the VM_EXEC bit then the next 2 ops are done already
+	tst	r2, #VM_EXEC
+	mcreq	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcreq	p15, 0, ip, c7, c10, 4		@ drain WB
+
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	ret	r2
+#else
+	ret	lr
+#endif
+
+/*
+ * cpu_feroceon_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_feroceon_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext wc_disable=0
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+#if defined(CONFIG_CACHE_FEROCEON_L2) && \
+	!defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH)
+	mcr	p15, 1, r0, c15, c9, 1		@ clean L2 entry
+#endif
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+/* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */
+.globl	cpu_feroceon_suspend_size
+.equ	cpu_feroceon_suspend_size, 4 * 3
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_feroceon_do_suspend)
+	stmfd	sp!, {r4 - r6, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ PID
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c1, c0, 0	@ Control register
+	stmia	r0, {r4 - r6}
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(cpu_feroceon_do_suspend)
+
+ENTRY(cpu_feroceon_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I+D caches
+	ldmia	r0, {r4 - r6}
+	mcr	p15, 0, r4, c13, c0, 0	@ PID
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB address
+	mov	r0, r6			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_feroceon_do_resume)
+#endif
+
+	.type	__feroceon_setup, #function
+__feroceon_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+
+	adr	r5, feroceon_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__feroceon_setup, . - __feroceon_setup
+
+	/*
+	 *      B
+	 *  R   P
+	 * .RVI UFRS BLDP WCAM
+	 * .011 .001 ..11 0101
+	 *
+	 */
+	.type	feroceon_crval, #object
+feroceon_crval:
+	crval	clear=0x0000773f, mmuset=0x00003135, ucset=0x00001134
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions feroceon, dabort=v5t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+	string	cpu_feroceon_name, "Feroceon"
+	string	cpu_88fr531_name, "Feroceon 88FR531-vd"
+	string	cpu_88fr571_name, "Feroceon 88FR571-vd"
+	string	cpu_88fr131_name, "Feroceon 88FR131"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+.macro feroceon_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache:req
+	.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+	.long	\cpu_val
+	.long	\cpu_mask
+	.long	PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long	PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__feroceon_setup, __\name\()_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	\cpu_name
+	.long	feroceon_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	feroceon_user_fns
+	.long	\cache
+	 .size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+#ifdef CONFIG_CPU_FEROCEON_OLD_ID
+	feroceon_proc_info feroceon_old_id, 0x41009260, 0xff00fff0, \
+		cpu_name=cpu_feroceon_name, cache=feroceon_cache_fns
+#endif
+
+	feroceon_proc_info 88fr531, 0x56055310, 0xfffffff0, cpu_88fr531_name, \
+		cache=feroceon_cache_fns
+	feroceon_proc_info 88fr571, 0x56155710, 0xfffffff0, cpu_88fr571_name, \
+		cache=feroceon_range_cache_fns
+	feroceon_proc_info 88fr131, 0x56251310, 0xfffffff0, cpu_88fr131_name, \
+		cache=feroceon_range_cache_fns
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
new file mode 100644
index 0000000..81d0efb
--- /dev/null
+++ b/arch/arm/mm/proc-macros.S
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * We need constants.h for:
+ *  VMA_VM_MM
+ *  VMA_VM_FLAGS
+ *  VM_EXEC
+ */
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+
+#ifdef CONFIG_CPU_V7M
+#include <asm/v7m.h>
+#endif
+
+/*
+ * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
+ */
+	.macro	vma_vm_mm, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_MM]
+	.endm
+
+/*
+ * vma_vm_flags - get vma->vm_flags
+ */
+	.macro	vma_vm_flags, rd, rn
+	ldr	\rd, [\rn, #VMA_VM_FLAGS]
+	.endm
+
+/*
+ * act_mm - get current->active_mm
+ */
+	.macro	act_mm, rd
+	bic	\rd, sp, #8128
+	bic	\rd, \rd, #63
+	ldr	\rd, [\rd, #TI_TASK]
+	.if (TSK_ACTIVE_MM > IMM12_MASK)
+	add	\rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
+	.endif
+	ldr	\rd, [\rd, #TSK_ACTIVE_MM & IMM12_MASK]
+	.endm
+
+/*
+ * mmid - get context id from mm pointer (mm->context.id)
+ * note, this field is 64bit, so in big-endian the two words are swapped too.
+ */
+	.macro	mmid, rd, rn
+#ifdef __ARMEB__
+	ldr	\rd, [\rn, #MM_CONTEXT_ID + 4 ]
+#else
+	ldr	\rd, [\rn, #MM_CONTEXT_ID]
+#endif
+	.endm
+
+/*
+ * mask_asid - mask the ASID from the context ID
+ */
+	.macro	asid, rd, rn
+	and	\rd, \rn, #255
+	.endm
+
+	.macro	crval, clear, mmuset, ucset
+#ifdef CONFIG_MMU
+	.word	\clear
+	.word	\mmuset
+#else
+	.word	\clear
+	.word	\ucset
+#endif
+	.endm
+
+/*
+ * dcache_line_size - get the minimum D-cache line size from the CTR register
+ * on ARMv7.
+ */
+	.macro	dcache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
+	lsr	\tmp, \tmp, #16
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
+	mov	\reg, \reg, lsl \tmp		@ actual cache line size
+	.endm
+
+/*
+ * icache_line_size - get the minimum I-cache line size from the CTR register
+ * on ARMv7.
+ */
+	.macro	icache_line_size, reg, tmp
+#ifdef CONFIG_CPU_V7M
+	movw	\tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	movt	\tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
+	ldr     \tmp, [\tmp]
+#else
+	mrc	p15, 0, \tmp, c0, c0, 1		@ read ctr
+#endif
+	and	\tmp, \tmp, #0xf		@ cache line size encoding
+	mov	\reg, #4			@ bytes per word
+	mov	\reg, \reg, lsl \tmp		@ actual cache line size
+	.endm
+
+/*
+ * Sanity check the PTE configuration for the code below - which makes
+ * certain assumptions about how these bits are laid out.
+ */
+#ifdef CONFIG_MMU
+#if L_PTE_SHARED != PTE_EXT_SHARED
+#error PTE shared bit mismatch
+#endif
+#if !defined (CONFIG_ARM_LPAE) && \
+	(L_PTE_XN+L_PTE_USER+L_PTE_RDONLY+L_PTE_DIRTY+L_PTE_YOUNG+\
+	 L_PTE_PRESENT) > L_PTE_SHARED
+#error Invalid Linux PTE bit settings
+#endif
+#endif	/* CONFIG_MMU */
+
+/*
+ * The ARMv6 and ARMv7 set_pte_ext translation function.
+ *
+ * Permission translation:
+ *  YUWD  APX AP1 AP0	SVC	User
+ *  0xxx   0   0   0	no acc	no acc
+ *  100x   1   0   1	r/o	no acc
+ *  10x0   1   0   1	r/o	no acc
+ *  1011   0   0   1	r/w	no acc
+ *  110x   1   1   1	r/o	r/o
+ *  11x0   1   1   1	r/o	r/o
+ *  1111   0   1   1	r/w	r/w
+ */
+	.macro	armv6_mt_table pfx
+\pfx\()_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE | PTE_EXT_APX	@ L_PTE_MT_VECTORS
+	.endm
+
+	.macro	armv6_set_pte_ext pfx
+	str	r1, [r0], #2048			@ linux version
+
+	bic	r3, r1, #0x000003fc
+	bic	r3, r3, #PTE_TYPE_MASK
+	orr	r3, r3, r2
+	orr	r3, r3, #PTE_EXT_AP0 | 2
+
+	adr	ip, \pfx\()_mt_table
+	and	r2, r1, #L_PTE_MT_MASK
+	ldr	r2, [ip, r2]
+
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_DIRTY|L_PTE_RDONLY
+	orrne	r3, r3, #PTE_EXT_APX
+
+	tst	r1, #L_PTE_USER
+	orrne	r3, r3, #PTE_EXT_AP1
+	tstne	r3, #PTE_EXT_APX
+
+	@ user read-only -> kernel read-only
+	bicne	r3, r3, #PTE_EXT_AP0
+
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
+
+	eor	r3, r3, r2
+
+	tst	r1, #L_PTE_YOUNG
+	tstne	r1, #L_PTE_PRESENT
+	moveq	r3, #0
+	tstne	r1, #L_PTE_NONE
+	movne	r3, #0
+
+	str	r3, [r0]
+	mcr	p15, 0, r0, c7, c10, 1		@ flush_pte
+	.endm
+
+
+/*
+ * The ARMv3, ARMv4 and ARMv5 set_pte_ext translation function,
+ * covering most CPUs except Xscale and Xscale 3.
+ *
+ * Permission translation:
+ *  YUWD   AP	SVC	User
+ *  0xxx  0x00	no acc	no acc
+ *  100x  0x00	r/o	no acc
+ *  10x0  0x00	r/o	no acc
+ *  1011  0x55	r/w	no acc
+ *  110x  0xaa	r/w	r/o
+ *  11x0  0xaa	r/w	r/o
+ *  1111  0xff	r/w	r/w
+ */
+	.macro	armv3_set_pte_ext wc_disable=1
+	str	r1, [r0], #2048			@ linux version
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
+	bic	r2, r2, #PTE_TYPE_MASK
+	orr	r2, r2, #PTE_TYPE_SMALL
+
+	tst	r3, #L_PTE_USER			@ user?
+	orrne	r2, r2, #PTE_SMALL_AP_URO_SRW
+
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
+	orreq	r2, r2, #PTE_SMALL_AP_UNO_SRW
+
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
+	movne	r2, #0
+
+	.if	\wc_disable
+#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
+	tst	r2, #PTE_CACHEABLE
+	bicne	r2, r2, #PTE_BUFFERABLE
+#endif
+	.endif
+	str	r2, [r0]		@ hardware version
+	.endm
+
+
+/*
+ * Xscale set_pte_ext translation, split into two halves to cope
+ * with work-arounds.  r3 must be preserved by code between these
+ * two macros.
+ *
+ * Permission translation:
+ *  YUWD  AP	SVC	User
+ *  0xxx  00	no acc	no acc
+ *  100x  00	r/o	no acc
+ *  10x0  00	r/o	no acc
+ *  1011  01	r/w	no acc
+ *  110x  10	r/w	r/o
+ *  11x0  10	r/w	r/o
+ *  1111  11	r/w	r/w
+ */
+	.macro	xscale_set_pte_ext_prologue
+	str	r1, [r0]			@ linux version
+
+	eor	r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY
+
+	bic	r2, r1, #PTE_SMALL_AP_MASK	@ keep C, B bits
+	orr	r2, r2, #PTE_TYPE_EXT		@ extended page
+
+	tst	r3, #L_PTE_USER			@ user?
+	orrne	r2, r2, #PTE_EXT_AP_URO_SRW	@ yes -> user r/o, system r/w
+
+	tst	r3, #L_PTE_RDONLY | L_PTE_DIRTY	@ write and dirty?
+	orreq	r2, r2, #PTE_EXT_AP_UNO_SRW	@ yes -> user n/a, system r/w
+						@ combined with user -> user r/w
+	.endm
+
+	.macro	xscale_set_pte_ext_epilogue
+	tst	r3, #L_PTE_PRESENT | L_PTE_YOUNG	@ present and young?
+	movne	r2, #0				@ no -> fault
+
+	str	r2, [r0, #2048]!		@ hardware version
+	mov	ip, #0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	.endm
+
+.macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0, bugs=0
+	.type	\name\()_processor_functions, #object
+	.align 2
+ENTRY(\name\()_processor_functions)
+	.word	\dabort
+	.word	\pabort
+	.word	cpu_\name\()_proc_init
+	.word	\bugs
+	.word	cpu_\name\()_proc_fin
+	.word	cpu_\name\()_reset
+	.word	cpu_\name\()_do_idle
+	.word	cpu_\name\()_dcache_clean_area
+	.word	cpu_\name\()_switch_mm
+
+	.if \nommu
+	.word	0
+	.else
+	.word	cpu_\name\()_set_pte_ext
+	.endif
+
+	.if \suspend
+	.word	cpu_\name\()_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	.word	cpu_\name\()_do_suspend
+	.word	cpu_\name\()_do_resume
+#else
+	.word	0
+	.word	0
+#endif
+	.else
+	.word	0
+	.word	0
+	.word	0
+	.endif
+
+	.size	\name\()_processor_functions, . - \name\()_processor_functions
+.endm
+
+.macro define_cache_functions name:req
+	.align 2
+	.type	\name\()_cache_fns, #object
+ENTRY(\name\()_cache_fns)
+	.long	\name\()_flush_icache_all
+	.long	\name\()_flush_kern_cache_all
+	.long   \name\()_flush_kern_cache_louis
+	.long	\name\()_flush_user_cache_all
+	.long	\name\()_flush_user_cache_range
+	.long	\name\()_coherent_kern_range
+	.long	\name\()_coherent_user_range
+	.long	\name\()_flush_kern_dcache_area
+	.long	\name\()_dma_map_area
+	.long	\name\()_dma_unmap_area
+	.long	\name\()_dma_flush_range
+	.size	\name\()_cache_fns, . - \name\()_cache_fns
+.endm
+
+.macro define_tlb_functions name:req, flags_up:req, flags_smp
+	.type	\name\()_tlb_fns, #object
+ENTRY(\name\()_tlb_fns)
+	.long	\name\()_flush_user_tlb_range
+	.long	\name\()_flush_kern_tlb_range
+	.ifnb \flags_smp
+		ALT_SMP(.long	\flags_smp )
+		ALT_UP(.long	\flags_up )
+	.else
+		.long	\flags_up
+	.endif
+	.size	\name\()_tlb_fns, . - \name\()_tlb_fns
+.endm
+
+.macro globl_equ x, y
+	.globl	\x
+	.equ	\x, \y
+.endm
+
+.macro	initfn, func, base
+	.long	\func - \base
+.endm
+
+	/*
+	 * Macro to calculate the log2 size for the protection region
+	 * registers. This calculates rd = log2(size) - 1.  tmp must
+	 * not be the same register as rd.
+	 */
+.macro	pr_sz, rd, size, tmp
+	mov	\tmp, \size, lsr #12
+	mov	\rd, #11
+1:	movs	\tmp, \tmp, lsr #1
+	addne	\rd, \rd, #1
+	bne	1b
+.endm
+
+	/*
+	 * Macro to generate a protection region register value
+	 * given a pre-masked address, size, and enable bit.
+	 * Corrupts size.
+	 */
+.macro	pr_val, dest, addr, size, enable
+	pr_sz	\dest, \size, \size		@ calculate log2(size) - 1
+	orr	\dest, \addr, \dest, lsl #1	@ mask in the region size
+	orr	\dest, \dest, \enable
+.endm
diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S
new file mode 100644
index 0000000..6f07d2e
--- /dev/null
+++ b/arch/arm/mm/proc-mohawk.S
@@ -0,0 +1,457 @@
+/*
+ *  linux/arch/arm/mm/proc-mohawk.S: MMU functions for Marvell PJ1 core
+ *
+ *  PJ1 (codename Mohawk) is a hybrid of the xscale3 and Marvell's own core.
+ *
+ *  Heavily based on proc-arm926.S and proc-xsc3.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define CACHE_DLIMIT	32768
+
+/*
+ * The cache line size of the L1 D cache.
+ */
+#define CACHE_DLINESIZE	32
+
+/*
+ * cpu_mohawk_proc_init()
+ */
+ENTRY(cpu_mohawk_proc_init)
+	ret	lr
+
+/*
+ * cpu_mohawk_proc_fin()
+ */
+ENTRY(cpu_mohawk_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...iz...........
+	bic	r0, r0, #0x0006			@ .............ca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_mohawk_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ *
+ * (same as arm926)
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_mohawk_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x0007			@ .............cam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_mohawk_reset)
+	.popsection
+
+/*
+ * cpu_mohawk_do_idle()
+ *
+ * Called with IRQs disabled
+ */
+	.align	5
+ENTRY(cpu_mohawk_do_idle)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c7, c0, 4		@ wait for interrupt
+	ret	lr
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(mohawk_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(mohawk_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Clean and invalidate all cache entries in a particular
+ *	address space.
+ */
+ENTRY(mohawk_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(mohawk_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcrne	p15, 0, ip, c7, c10, 0		@ drain write buffer
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, flags)
+ *
+ *	Clean and invalidate a range of cache entries in the
+ *	specified address range.
+ *
+ *	- start	- start address (inclusive)
+ *	- end	- end address (exclusive)
+ *	- flags	- vm_flags describing address space
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #CACHE_DLIMIT
+	bgt	__flush_whole_cache
+1:	tst	r2, #VM_EXEC
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	mcr	p15, 0, r0, c7, c14, 1		@ clean and invalidate D entry
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_coherent_kern_range)
+	/* FALLTHROUGH */
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start, end.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as arm926)
+ */
+ENTRY(mohawk_coherent_user_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ invalidate I entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	mov	r0, #0
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(mohawk_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+mohawk_dma_inv_range:
+	tst	r0, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHE_DLINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ *
+ * (same as v4wb)
+ */
+mohawk_dma_clean_range:
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start	- virtual start address
+ *	- end	- virtual end address
+ */
+ENTRY(mohawk_dma_flush_range)
+	bic	r0, r0, #CACHE_DLINESIZE - 1
+1:
+	mcr	p15, 0, r0, c7, c14, 1		@ clean+invalidate D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(mohawk_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	mohawk_dma_clean_range
+	bcs	mohawk_dma_inv_range
+	b	mohawk_dma_flush_range
+ENDPROC(mohawk_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(mohawk_dma_unmap_area)
+	ret	lr
+ENDPROC(mohawk_dma_unmap_area)
+
+	.globl	mohawk_flush_kern_cache_louis
+	.equ	mohawk_flush_kern_cache_louis, mohawk_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions mohawk
+
+ENTRY(cpu_mohawk_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHE_DLINESIZE
+	subs	r1, r1, #CACHE_DLINESIZE
+	bhi	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+
+/*
+ * cpu_mohawk_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_mohawk_switch_mm)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c14, 0		@ clean & invalidate all D cache
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+	orr	r0, r0, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	ret	lr
+
+/*
+ * cpu_mohawk_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_mohawk_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+	ret	lr
+#endif
+
+.globl	cpu_mohawk_suspend_size
+.equ	cpu_mohawk_suspend_size, 4 * 6
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_mohawk_do_suspend)
+	stmfd	sp!, {r4 - r9, lr}
+	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
+	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mrc	p15, 0, r6, c13, c0, 0	@ PID
+	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
+	bic	r4, r4, #2		@ clear frequency change bit
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmia	sp!, {r4 - r9, pc}
+ENDPROC(cpu_mohawk_do_suspend)
+
+ENTRY(cpu_mohawk_do_resume)
+	ldmia	r0, {r4 - r9}		@ load cp regs
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
+	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
+	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
+	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
+	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mcr	p15, 0, r6, c13, c0, 0	@ PID
+	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
+	orr	r1, r1, #0x18		@ cache the page table in L2
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mov	r0, r9			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_mohawk_do_resume)
+#endif
+
+	.type	__mohawk_setup, #function
+__mohawk_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs
+	orr	r4, r4, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+
+	mov	r0, #0				@ don't allow CP access
+	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
+
+	adr	r5, mohawk_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+
+	.size	__mohawk_setup, . - __mohawk_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * .011 1001 ..00 0101
+	 *
+	 */
+	.type	mohawk_crval, #object
+mohawk_crval:
+	crval	clear=0x00007f3f, mmuset=0x00003905, ucset=0x00001134
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions mohawk, dabort=v5t_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+	string	cpu_mohawk_name, "Marvell 88SV331x"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__88sv331x_proc_info,#object
+__88sv331x_proc_info:
+	.long	0x56158000			@ Marvell 88SV331x (MOHAWK)
+	.long	0xfffff000
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_BIT4 | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__mohawk_setup, __88sv331x_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_mohawk_name
+	.long	mohawk_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	v4wb_user_fns
+	.long	mohawk_cache_fns
+	.size	__88sv331x_proc_info, . - __88sv331x_proc_info
diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S
new file mode 100644
index 0000000..ee2ce49
--- /dev/null
+++ b/arch/arm/mm/proc-sa110.S
@@ -0,0 +1,225 @@
+/*
+ *  linux/arch/arm/mm/proc-sa110.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for SA110
+ *
+ *  These are the low level assembler for performing cache and TLB
+ *  functions on the StrongARM-110.
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <mach/hardware.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/ptrace.h>
+
+#include "proc-macros.S"
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define DCACHELINESIZE	32
+
+	.text
+
+/*
+ * cpu_sa110_proc_init()
+ */
+ENTRY(cpu_sa110_proc_init)
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
+	ret	lr
+
+/*
+ * cpu_sa110_proc_fin()
+ */
+ENTRY(cpu_sa110_proc_fin)
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c2, 2		@ Disable clock switching
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_sa110_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_sa110_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_sa110_reset)
+	.popsection
+
+/*
+ * cpu_sa110_do_idle(type)
+ *
+ * Cause the processor to idle
+ *
+ * type: call type:
+ *   0 = slow idle
+ *   1 = fast idle
+ *   2 = switch to slow processor clock
+ *   3 = switch to fast processor clock
+ */
+	.align	5
+
+ENTRY(cpu_sa110_do_idle)
+	mcr	p15, 0, ip, c15, c2, 2		@ disable clock switching
+	ldr	r1, =UNCACHEABLE_ADDR		@ load from uncacheable loc
+	ldr	r1, [r1, #0]			@ force switch to MCLK
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c8, 2		@ Wait for interrupt, cache aligned
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ * cpu_sa110_dcache_clean_area(addr,sz)
+ *
+ * Clean the specified entry of any caches such that the MMU
+ * translation fetches will obtain correct data.
+ *
+ * addr: cache-unaligned virtual address
+ */
+	.align	5
+ENTRY(cpu_sa110_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #DCACHELINESIZE
+	subs	r1, r1, #DCACHELINESIZE
+	bhi	1b
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_sa110_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_sa110_switch_mm)
+#ifdef CONFIG_MMU
+	str	lr, [sp, #-4]!
+	bl	v4wb_flush_kern_cache_all	@ clears IP
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	ldr	pc, [sp], #4
+#else
+	ret	lr
+#endif
+
+/*
+ * cpu_sa110_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_sa110_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext wc_disable=0
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+	.type	__sa110_setup, #function
+__sa110_setup:
+	mov	r10, #0
+	mcr	p15, 0, r10, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r10, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r10, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+
+	adr	r5, sa110_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__sa110_setup, . - __sa110_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..01 0001 ..11 1101
+	 * 
+	 */
+	.type	sa110_crval, #object
+sa110_crval:
+	crval	clear=0x00003f3f, mmuset=0x0000113d, ucset=0x00001130
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions sa110, dabort=v4_early_abort, pabort=legacy_pabort
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4"
+	string	cpu_elf_name, "v4"
+	string	cpu_sa110_name, "StrongARM-110"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	.type	__sa110_proc_info,#object
+__sa110_proc_info:
+	.long	0x4401a100
+	.long	0xfffffff0
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__sa110_setup, __sa110_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
+	.long	cpu_sa110_name
+	.long	sa110_processor_functions
+	.long	v4wb_tlb_fns
+	.long	v4wb_user_fns
+	.long	v4wb_cache_fns
+	.size	__sa110_proc_info, . - __sa110_proc_info
diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S
new file mode 100644
index 0000000..222d583
--- /dev/null
+++ b/arch/arm/mm/proc-sa1100.S
@@ -0,0 +1,273 @@
+/*
+ *  linux/arch/arm/mm/proc-sa1100.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *  hacked for non-paged-MM by Hyok S. Choi, 2003.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  MMU functions for SA110
+ *
+ *  These are the low level assembler for performing cache and TLB
+ *  functions on the StrongARM-1100 and StrongARM-1110.
+ *
+ *  Note that SA1100 and SA1110 share everything but their name and CPU ID.
+ *
+ *  12-jun-2000, Erik Mouw (J.A.K.Mouw@its.tudelft.nl):
+ *    Flush the read buffer at context switches
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <mach/hardware.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+
+#include "proc-macros.S"
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define DCACHELINESIZE	32
+
+	.section .text
+
+/*
+ * cpu_sa1100_proc_init()
+ */
+ENTRY(cpu_sa1100_proc_init)
+	mov	r0, #0
+	mcr	p15, 0, r0, c15, c1, 2		@ Enable clock switching
+	mcr	p15, 0, r0, c9, c0, 5		@ Allow read-buffer operations from userland
+	ret	lr
+
+/*
+ * cpu_sa1100_proc_fin()
+ *
+ * Prepare the CPU for reset:
+ *  - Disable interrupts
+ *  - Clean and turn off caches.
+ */
+ENTRY(cpu_sa1100_proc_fin)
+	mcr	p15, 0, ip, c15, c2, 2		@ Disable clock switching
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x000e			@ ............wca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_sa1100_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_sa1100_reset)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches
+	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+#endif
+	mrc	p15, 0, ip, c1, c0, 0		@ ctrl register
+	bic	ip, ip, #0x000f			@ ............wcam
+	bic	ip, ip, #0x1100			@ ...i...s........
+	mcr	p15, 0, ip, c1, c0, 0		@ ctrl register
+	ret	r0
+ENDPROC(cpu_sa1100_reset)
+	.popsection
+
+/*
+ * cpu_sa1100_do_idle(type)
+ *
+ * Cause the processor to idle
+ *
+ * type: call type:
+ *   0 = slow idle
+ *   1 = fast idle
+ *   2 = switch to slow processor clock
+ *   3 = switch to fast processor clock
+ */
+	.align	5
+ENTRY(cpu_sa1100_do_idle)
+	mov	r0, r0				@ 4 nop padding
+	mov	r0, r0
+	mov	r0, r0
+	mov	r0, r0				@ 4 nop padding
+	mov	r0, r0
+	mov	r0, r0
+	mov	r0, #0
+	ldr	r1, =UNCACHEABLE_ADDR		@ ptr to uncacheable address
+	@ --- aligned to a cache line
+	mcr	p15, 0, r0, c15, c2, 2		@ disable clock switching
+	ldr	r1, [r1, #0]			@ force switch to MCLK
+	mcr	p15, 0, r0, c15, c8, 2		@ wait for interrupt
+	mov	r0, r0				@ safety
+	mcr	p15, 0, r0, c15, c1, 2		@ enable clock switching
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ * cpu_sa1100_dcache_clean_area(addr,sz)
+ *
+ * Clean the specified entry of any caches such that the MMU
+ * translation fetches will obtain correct data.
+ *
+ * addr: cache-unaligned virtual address
+ */
+	.align	5
+ENTRY(cpu_sa1100_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #DCACHELINESIZE
+	subs	r1, r1, #DCACHELINESIZE
+	bhi	1b
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_sa1100_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_sa1100_switch_mm)
+#ifdef CONFIG_MMU
+	str	lr, [sp, #-4]!
+	bl	v4wb_flush_kern_cache_all	@ clears IP
+	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	ldr	pc, [sp], #4
+#else
+	ret	lr
+#endif
+
+/*
+ * cpu_sa1100_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+	.align	5
+ENTRY(cpu_sa1100_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv3_set_pte_ext wc_disable=0
+	mov	r0, r0
+	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c10, 4		@ drain WB
+#endif
+	ret	lr
+
+.globl	cpu_sa1100_suspend_size
+.equ	cpu_sa1100_suspend_size, 4 * 3
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_sa1100_do_suspend)
+	stmfd	sp!, {r4 - r6, lr}
+	mrc	p15, 0, r4, c3, c0, 0		@ domain ID
+	mrc	p15, 0, r5, c13, c0, 0		@ PID
+	mrc	p15, 0, r6, c1, c0, 0		@ control reg
+	stmia	r0, {r4 - r6}			@ store cp regs
+	ldmfd	sp!, {r4 - r6, pc}
+ENDPROC(cpu_sa1100_do_suspend)
+
+ENTRY(cpu_sa1100_do_resume)
+	ldmia	r0, {r4 - r6}			@ load cp regs
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0		@ flush I+D TLBs
+	mcr	p15, 0, ip, c7, c7, 0		@ flush I&D cache
+	mcr	p15, 0, ip, c9, c0, 0		@ invalidate RB
+	mcr	p15, 0, ip, c9, c0, 5		@ allow user space to use RB
+
+	mcr	p15, 0, r4, c3, c0, 0		@ domain ID
+	mcr	p15, 0, r1, c2, c0, 0		@ translation table base addr
+	mcr	p15, 0, r5, c13, c0, 0		@ PID
+	mov	r0, r6				@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_sa1100_do_resume)
+#endif
+
+	.type	__sa1100_setup, #function
+__sa1100_setup:
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c7		@ invalidate I,D caches on v4
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer on v4
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7		@ invalidate I,D TLBs on v4
+#endif
+	adr	r5, sa1100_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0		@ get control register v4
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__sa1100_setup, . - __sa1100_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 0001 ..11 1101
+	 * 
+	 */
+	.type	sa1100_crval, #object
+sa1100_crval:
+	crval	clear=0x00003f3f, mmuset=0x0000313d, ucset=0x00001130
+
+	__INITDATA
+
+/*
+ * SA1100 and SA1110 share the same function calls
+ */
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions sa1100, dabort=v4_early_abort, pabort=legacy_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv4"
+	string	cpu_elf_name, "v4"
+	string	cpu_sa1100_name, "StrongARM-1100"
+	string	cpu_sa1110_name, "StrongARM-1110"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+.macro sa1100_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req
+	.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+	.long	\cpu_val
+	.long	\cpu_mask
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__sa1100_setup, __\name\()_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_26BIT | HWCAP_FAST_MULT
+	.long	\cpu_name
+	.long	sa1100_processor_functions
+	.long	v4wb_tlb_fns
+	.long	v4_mc_user_fns
+	.long	v4wb_cache_fns
+	.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	sa1100_proc_info sa1100, 0x4401a110, 0xfffffff0, cpu_sa1100_name
+	sa1100_proc_info sa1110, 0x6901b110, 0xfffffff0, cpu_sa1110_name
diff --git a/arch/arm/mm/proc-syms.c b/arch/arm/mm/proc-syms.c
new file mode 100644
index 0000000..054b491
--- /dev/null
+++ b/arch/arm/mm/proc-syms.c
@@ -0,0 +1,53 @@
+/*
+ *  linux/arch/arm/mm/proc-syms.c
+ *
+ *  Copyright (C) 2000-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/mm.h>
+
+#include <asm/cacheflush.h>
+#include <asm/proc-fns.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+
+#ifndef MULTI_CPU
+EXPORT_SYMBOL(cpu_dcache_clean_area);
+#ifdef CONFIG_MMU
+EXPORT_SYMBOL(cpu_set_pte_ext);
+#endif
+#else
+EXPORT_SYMBOL(processor);
+#endif
+
+#ifndef MULTI_CACHE
+EXPORT_SYMBOL(__cpuc_flush_kern_all);
+EXPORT_SYMBOL(__cpuc_flush_user_all);
+EXPORT_SYMBOL(__cpuc_flush_user_range);
+EXPORT_SYMBOL(__cpuc_coherent_kern_range);
+EXPORT_SYMBOL(__cpuc_flush_dcache_area);
+#else
+EXPORT_SYMBOL(cpu_cache);
+#endif
+
+#ifdef CONFIG_MMU
+#ifndef MULTI_USER
+EXPORT_SYMBOL(__cpu_clear_user_highpage);
+EXPORT_SYMBOL(__cpu_copy_user_highpage);
+#else
+EXPORT_SYMBOL(cpu_user);
+#endif
+#endif
+
+/*
+ * No module should need to touch the TLB (and currently
+ * no modules do.  We export this for "loadkernel" support
+ * (booting a new kernel from within a running kernel.)
+ */
+#ifdef MULTI_TLB
+EXPORT_SYMBOL(cpu_tlb);
+#endif
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
new file mode 100644
index 0000000..06d890a
--- /dev/null
+++ b/arch/arm/mm/proc-v6.S
@@ -0,0 +1,300 @@
+/*
+ *  linux/arch/arm/mm/proc-v6.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *  Modified by Catalin Marinas for noMMU support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv6 processor support.
+ */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+
+#include "proc-macros.S"
+
+#define D_CACHE_LINE_SIZE	32
+
+#define TTB_C		(1 << 0)
+#define TTB_S		(1 << 1)
+#define TTB_IMP		(1 << 2)
+#define TTB_RGN_NC	(0 << 3)
+#define TTB_RGN_WBWA	(1 << 3)
+#define TTB_RGN_WT	(2 << 3)
+#define TTB_RGN_WB	(3 << 3)
+
+#define TTB_FLAGS_UP	TTB_RGN_WBWA
+#define PMD_FLAGS_UP	PMD_SECT_WB
+#define TTB_FLAGS_SMP	TTB_RGN_WBWA|TTB_S
+#define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
+
+ENTRY(cpu_v6_proc_init)
+	ret	lr
+
+ENTRY(cpu_v6_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x0006			@ .............ca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ *	cpu_v6_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_v6_reset)
+	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x1			@ ...............m
+	mcr	p15, 0, r1, c1, c0, 0		@ disable MMU
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c5, 4		@ ISB
+	ret	r0
+ENDPROC(cpu_v6_reset)
+	.popsection
+
+/*
+ *	cpu_v6_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v6_do_idle)
+	mov	r1, #0
+	mcr	p15, 0, r1, c7, c10, 4		@ DWB - WFI may enter a low-power mode
+	mcr	p15, 0, r1, c7, c0, 4		@ wait for interrupt
+	ret	lr
+
+ENTRY(cpu_v6_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #D_CACHE_LINE_SIZE
+	subs	r1, r1, #D_CACHE_LINE_SIZE
+	bhi	1b
+	ret	lr
+
+/*
+ *	cpu_v6_switch_mm(pgd_phys, tsk)
+ *
+ *	Set the translation table base pointer to be pgd_phys
+ *
+ *	- pgd_phys - physical address of new TTB
+ *
+ *	It is assumed that:
+ *	- we are not using split page tables
+ */
+ENTRY(cpu_v6_switch_mm)
+#ifdef CONFIG_MMU
+	mov	r2, #0
+	mmid	r1, r1				@ get mm->context.id
+	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
+	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
+	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
+	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
+	bic	r2, r2, #0xff			@ extract the PID
+	and	r1, r1, #0xff
+	orr	r1, r1, r2			@ insert into new context ID
+#endif
+	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
+#endif
+	ret	lr
+
+/*
+ *	cpu_v6_set_pte_ext(ptep, pte, ext)
+ *
+ *	Set a level 2 translation table entry.
+ *
+ *	- ptep  - pointer to level 2 translation table entry
+ *		  (hardware version is stored at -1024 bytes)
+ *	- pte   - PTE value to store
+ *	- ext	- value for extended PTE bits
+ */
+	armv6_mt_table cpu_v6
+
+ENTRY(cpu_v6_set_pte_ext)
+#ifdef CONFIG_MMU
+	armv6_set_pte_ext cpu_v6
+#endif
+	ret	lr
+
+/* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */
+.globl	cpu_v6_suspend_size
+.equ	cpu_v6_suspend_size, 4 * 6
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_v6_do_suspend)
+	stmfd	sp!, {r4 - r9, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
+	mrc	p15, 0, r5, c3, c0, 0	@ Domain ID
+	mrc	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+#endif
+	mrc	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mrc	p15, 0, r8, c1, c0, 2	@ co-processor access control
+	mrc	p15, 0, r9, c1, c0, 0	@ control register
+	stmia	r0, {r4 - r9}
+	ldmfd	sp!, {r4- r9, pc}
+ENDPROC(cpu_v6_do_suspend)
+
+ENTRY(cpu_v6_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c14, 0	@ clean+invalidate D cache
+	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
+	mcr	p15, 0, ip, c7, c15, 0	@ clean+invalidate cache
+	mcr	p15, 0, ip, c7, c10, 4	@ drain write buffer
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0, {r4 - r9}
+	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r5, c3, c0, 0	@ Domain ID
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ Translation table base 0
+	mcr	p15, 0, r6, c2, c0, 1	@ Translation table base 1
+	mcr	p15, 0, ip, c2, c0, 2	@ TTB control register
+#endif
+	mcr	p15, 0, r7, c1, c0, 1	@ auxiliary control register
+	mcr	p15, 0, r8, c1, c0, 2	@ co-processor access control
+	mcr	p15, 0, ip, c7, c5, 4	@ ISB
+	mov	r0, r9			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_v6_do_resume)
+#endif
+
+	string	cpu_v6_name, "ARMv6-compatible processor"
+
+	.align
+
+/*
+ *	__v6_setup
+ *
+ *	Initialise TLB, Caches, and MMU state ready to switch the MMU
+ *	on.  Return in r0 the new CP15 C1 control register setting.
+ *
+ *	We automatically detect if we have a Harvard cache, and use the
+ *	Harvard cache control instructions insead of the unified cache
+ *	control instructions.
+ *
+ *	This should be able to cover all ARMv6 cores.
+ *
+ *	It is assumed that:
+ *	- cache type register is implemented
+ */
+__v6_setup:
+#ifdef CONFIG_SMP
+	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)	@ Enable SMP/nAMP mode
+	ALT_UP(nop)
+	orr	r0, r0, #0x20
+	ALT_SMP(mcr	p15, 0, r0, c1, c0, 1)
+	ALT_UP(nop)
+#endif
+
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c14, 0		@ clean+invalidate D cache
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	mcr	p15, 0, r0, c7, c15, 0		@ clean+invalidate cache
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r0, c8, c7, 0		@ invalidate I + D TLBs
+	mcr	p15, 0, r0, c2, c0, 2		@ TTB control register
+	ALT_SMP(orr	r4, r4, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r4, r4, #TTB_FLAGS_UP)
+	ALT_SMP(orr	r8, r8, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r8, r8, #TTB_FLAGS_UP)
+	mcr	p15, 0, r8, c2, c0, 1		@ load TTB1
+#endif /* CONFIG_MMU */
+	mcr	p15, 0, r0, c7, c10, 4		@ drain write buffer and
+						@ complete invalidations
+	adr	r5, v6_crval
+	ldmia	r5, {r5, r6}
+ ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
+	mrc	p15, 0, r0, c1, c0, 0		@ read control register
+	bic	r0, r0, r5			@ clear bits them
+	orr	r0, r0, r6			@ set them
+#ifdef CONFIG_ARM_ERRATA_364296
+	/*
+	 * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data
+	 * corruption with hit-under-miss enabled). The conditional code below
+	 * (setting the undocumented bit 31 in the auxiliary control register
+	 * and the FI bit in the control register) disables hit-under-miss
+	 * without putting the processor into full low interrupt latency mode.
+	 */
+	ldr	r6, =0x4107b362			@ id for ARM1136 r0p2
+	mrc	p15, 0, r5, c0, c0, 0		@ get processor id
+	teq	r5, r6				@ check for the faulty core
+	mrceq	p15, 0, r5, c1, c0, 1		@ load aux control reg
+	orreq	r5, r5, #(1 << 31)		@ set the undocumented bit 31
+	mcreq	p15, 0, r5, c1, c0, 1		@ write aux control reg
+	orreq	r0, r0, #(1 << 21)		@ low interrupt latency configuration
+#endif
+	ret	lr				@ return to head.S:__ret
+
+	/*
+	 *         V X F   I D LR
+	 * .... ...E PUI. .T.T 4RVI ZFRS BLDP WCAM
+	 * rrrr rrrx xxx0 0101 xxxx xxxx x111 xxxx < forced
+	 *         0 110       0011 1.00 .111 1101 < we want
+	 */
+	.type	v6_crval, #object
+v6_crval:
+	crval	clear=0x01e0fb7f, mmuset=0x00c0387d, ucset=0x00c0187c
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions v6, dabort=v6_early_abort, pabort=v6_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv6"
+	string	cpu_elf_name, "v6"
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	/*
+	 * Match any ARMv6 processor core.
+	 */
+	.type	__v6_proc_info, #object
+__v6_proc_info:
+	.long	0x0007b000
+	.long	0x0007f000
+	ALT_SMP(.long \
+		PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | \
+		PMD_FLAGS_SMP)
+	ALT_UP(.long \
+		PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | \
+		PMD_FLAGS_UP)
+	.long   PMD_TYPE_SECT | \
+		PMD_SECT_XN | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__v6_setup, __v6_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	/* See also feat_v6_fixup() for HWCAP_TLS */
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP|HWCAP_JAVA|HWCAP_TLS
+	.long	cpu_v6_name
+	.long	v6_processor_functions
+	.long	v6wbi_tlb_fns
+	.long	v6_user_fns
+	.long	v6_cache_fns
+	.size	__v6_proc_info, . - __v6_proc_info
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
new file mode 100644
index 0000000..f8d45ad
--- /dev/null
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -0,0 +1,165 @@
+/*
+ * arch/arm/mm/proc-v7-2level.S
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define TTB_S		(1 << 1)
+#define TTB_RGN_NC	(0 << 3)
+#define TTB_RGN_OC_WBWA	(1 << 3)
+#define TTB_RGN_OC_WT	(2 << 3)
+#define TTB_RGN_OC_WB	(3 << 3)
+#define TTB_NOS		(1 << 5)
+#define TTB_IRGN_NC	((0 << 0) | (0 << 6))
+#define TTB_IRGN_WBWA	((0 << 0) | (1 << 6))
+#define TTB_IRGN_WT	((1 << 0) | (0 << 6))
+#define TTB_IRGN_WB	((1 << 0) | (1 << 6))
+
+/* PTWs cacheable, inner WB not shareable, outer WB not shareable */
+#define TTB_FLAGS_UP	TTB_IRGN_WB|TTB_RGN_OC_WB
+#define PMD_FLAGS_UP	PMD_SECT_WB
+
+/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
+#define TTB_FLAGS_SMP	TTB_IRGN_WBWA|TTB_S|TTB_NOS|TTB_RGN_OC_WBWA
+#define PMD_FLAGS_SMP	PMD_SECT_WBWA|PMD_SECT_S
+
+/*
+ *	cpu_v7_switch_mm(pgd_phys, tsk)
+ *
+ *	Set the translation table base pointer to be pgd_phys
+ *
+ *	- pgd_phys - physical address of new TTB
+ *
+ *	It is assumed that:
+ *	- we are not using split page tables
+ *
+ *	Note that we always need to flush BTAC/BTB if IBE is set
+ *	even on Cortex-A8 revisions not affected by 430973.
+ *	If IBE is not set, the flush BTAC/BTB won't do anything.
+ */
+ENTRY(cpu_v7_switch_mm)
+#ifdef CONFIG_MMU
+	mmid	r1, r1				@ get mm->context.id
+	ALT_SMP(orr	r0, r0, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r0, r0, #TTB_FLAGS_UP)
+#ifdef CONFIG_PID_IN_CONTEXTIDR
+	mrc	p15, 0, r2, c13, c0, 1		@ read current context ID
+	lsr	r2, r2, #8			@ extract the PID
+	bfi	r1, r2, #8, #24			@ insert into new context ID
+#endif
+#ifdef CONFIG_ARM_ERRATA_754322
+	dsb
+#endif
+	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
+	isb
+	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
+	isb
+#endif
+	bx	lr
+ENDPROC(cpu_v7_switch_mm)
+
+/*
+ *	cpu_v7_set_pte_ext(ptep, pte)
+ *
+ *	Set a level 2 translation table entry.
+ *
+ *	- ptep  - pointer to level 2 translation table entry
+ *		  (hardware version is stored at +2048 bytes)
+ *	- pte   - PTE value to store
+ *	- ext	- value for extended PTE bits
+ */
+ENTRY(cpu_v7_set_pte_ext)
+#ifdef CONFIG_MMU
+	str	r1, [r0]			@ linux version
+
+	bic	r3, r1, #0x000003f0
+	bic	r3, r3, #PTE_TYPE_MASK
+	orr	r3, r3, r2
+	orr	r3, r3, #PTE_EXT_AP0 | 2
+
+	tst	r1, #1 << 4
+	orrne	r3, r3, #PTE_EXT_TEX(1)
+
+	eor	r1, r1, #L_PTE_DIRTY
+	tst	r1, #L_PTE_RDONLY | L_PTE_DIRTY
+	orrne	r3, r3, #PTE_EXT_APX
+
+	tst	r1, #L_PTE_USER
+	orrne	r3, r3, #PTE_EXT_AP1
+
+	tst	r1, #L_PTE_XN
+	orrne	r3, r3, #PTE_EXT_XN
+
+	tst	r1, #L_PTE_YOUNG
+	tstne	r1, #L_PTE_VALID
+	eorne	r1, r1, #L_PTE_NONE
+	tstne	r1, #L_PTE_NONE
+	moveq	r3, #0
+
+ ARM(	str	r3, [r0, #2048]! )
+ THUMB(	add	r0, r0, #2048 )
+ THUMB(	str	r3, [r0] )
+	ALT_SMP(W(nop))
+	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
+#endif
+	bx	lr
+ENDPROC(cpu_v7_set_pte_ext)
+
+	/*
+	 * Memory region attributes with SCTLR.TRE=1
+	 *
+	 *   n = TEX[0],C,B
+	 *   TR = PRRR[2n+1:2n]		- memory type
+	 *   IR = NMRR[2n+1:2n]		- inner cacheable property
+	 *   OR = NMRR[2n+17:2n+16]	- outer cacheable property
+	 *
+	 *			n	TR	IR	OR
+	 *   UNCACHED		000	00
+	 *   BUFFERABLE		001	10	00	00
+	 *   WRITETHROUGH	010	10	10	10
+	 *   WRITEBACK		011	10	11	11
+	 *   reserved		110
+	 *   WRITEALLOC		111	10	01	01
+	 *   DEV_SHARED		100	01
+	 *   DEV_NONSHARED	100	01
+	 *   DEV_WC		001	10
+	 *   DEV_CACHED		011	10
+	 *
+	 * Other attributes:
+	 *
+	 *   DS0 = PRRR[16] = 0		- device shareable property
+	 *   DS1 = PRRR[17] = 1		- device shareable property
+	 *   NS0 = PRRR[18] = 0		- normal shareable property
+	 *   NS1 = PRRR[19] = 1		- normal shareable property
+	 *   NOS = PRRR[24+n] = 1	- not outer shareable
+	 */
+.equ	PRRR,	0xff0a81a8
+.equ	NMRR,	0x40e040e0
+
+	/*
+	 * Macro for setting up the TTBRx and TTBCR registers.
+	 * - \ttb0 and \ttb1 updated with the corresponding flags.
+	 */
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
+	mcr	p15, 0, \zero, c2, c0, 2	@ TTB control register
+	ALT_SMP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
+	ALT_UP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
+	ALT_SMP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_UP)
+	mcr	p15, 0, \ttbr1, c2, c0, 1	@ load TTB1
+	.endm
+
+	/*   AT
+	 *  TFR   EV X F   I D LR    S
+	 * .EEE ..EE PUI. .T.T 4RVI ZWRS BLDP WCAM
+	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
+	 *   01    0 110       0011 1100 .111 1101 < we want
+	 */
+	.align	2
+	.type	v7_crval, #object
+v7_crval:
+	crval	clear=0x2120c302, mmuset=0x10c03c7d, ucset=0x00c01c7c
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
new file mode 100644
index 0000000..7d16bbc
--- /dev/null
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -0,0 +1,160 @@
+/*
+ * arch/arm/mm/proc-v7-3level.S
+ *
+ * Copyright (C) 2001 Deep Blue Solutions Ltd.
+ * Copyright (C) 2011 ARM Ltd.
+ * Author: Catalin Marinas <catalin.marinas@arm.com>
+ *   based on arch/arm/mm/proc-v7-2level.S
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <asm/assembler.h>
+
+#define TTB_IRGN_NC	(0 << 8)
+#define TTB_IRGN_WBWA	(1 << 8)
+#define TTB_IRGN_WT	(2 << 8)
+#define TTB_IRGN_WB	(3 << 8)
+#define TTB_RGN_NC	(0 << 10)
+#define TTB_RGN_OC_WBWA	(1 << 10)
+#define TTB_RGN_OC_WT	(2 << 10)
+#define TTB_RGN_OC_WB	(3 << 10)
+#define TTB_S		(3 << 12)
+#define TTB_EAE		(1 << 31)
+
+/* PTWs cacheable, inner WB not shareable, outer WB not shareable */
+#define TTB_FLAGS_UP	(TTB_IRGN_WB|TTB_RGN_OC_WB)
+#define PMD_FLAGS_UP	(PMD_SECT_WB)
+
+/* PTWs cacheable, inner WBWA shareable, outer WBWA not shareable */
+#define TTB_FLAGS_SMP	(TTB_IRGN_WBWA|TTB_S|TTB_RGN_OC_WBWA)
+#define PMD_FLAGS_SMP	(PMD_SECT_WBWA|PMD_SECT_S)
+
+#ifndef __ARMEB__
+#  define rpgdl	r0
+#  define rpgdh	r1
+#else
+#  define rpgdl	r1
+#  define rpgdh	r0
+#endif
+
+/*
+ * cpu_v7_switch_mm(pgd_phys, tsk)
+ *
+ * Set the translation table base pointer to be pgd_phys (physical address of
+ * the new TTB).
+ */
+ENTRY(cpu_v7_switch_mm)
+#ifdef CONFIG_MMU
+	mmid	r2, r2
+	asid	r2, r2
+	orr	rpgdh, rpgdh, r2, lsl #(48 - 32)	@ upper 32-bits of pgd
+	mcrr	p15, 0, rpgdl, rpgdh, c2		@ set TTB 0
+	isb
+#endif
+	ret	lr
+ENDPROC(cpu_v7_switch_mm)
+
+#ifdef __ARMEB__
+#define rl r3
+#define rh r2
+#else
+#define rl r2
+#define rh r3
+#endif
+
+/*
+ * cpu_v7_set_pte_ext(ptep, pte)
+ *
+ * Set a level 2 translation table entry.
+ * - ptep - pointer to level 3 translation table entry
+ * - pte - PTE value to store (64-bit in r2 and r3)
+ */
+ENTRY(cpu_v7_set_pte_ext)
+#ifdef CONFIG_MMU
+	tst	rl, #L_PTE_VALID
+	beq	1f
+	tst	rh, #1 << (57 - 32)		@ L_PTE_NONE
+	bicne	rl, #L_PTE_VALID
+	bne	1f
+
+	eor	ip, rh, #1 << (55 - 32)	@ toggle L_PTE_DIRTY in temp reg to
+					@ test for !L_PTE_DIRTY || L_PTE_RDONLY
+	tst	ip, #1 << (55 - 32) | 1 << (58 - 32)
+	orrne	rl, #PTE_AP2
+	biceq	rl, #PTE_AP2
+
+1:	strd	r2, r3, [r0]
+	ALT_SMP(W(nop))
+	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
+#endif
+	ret	lr
+ENDPROC(cpu_v7_set_pte_ext)
+
+	/*
+	 * Memory region attributes for LPAE (defined in pgtable-3level.h):
+	 *
+	 *   n = AttrIndx[2:0]
+	 *
+	 *			n	MAIR
+	 *   UNCACHED		000	00000000
+	 *   BUFFERABLE		001	01000100
+	 *   DEV_WC		001	01000100
+	 *   WRITETHROUGH	010	10101010
+	 *   WRITEBACK		011	11101110
+	 *   DEV_CACHED		011	11101110
+	 *   DEV_SHARED		100	00000100
+	 *   DEV_NONSHARED	100	00000100
+	 *   unused		101
+	 *   unused		110
+	 *   WRITEALLOC		111	11111111
+	 */
+.equ	PRRR,	0xeeaa4400			@ MAIR0
+.equ	NMRR,	0xff000004			@ MAIR1
+
+	/*
+	 * Macro for setting up the TTBRx and TTBCR registers.
+	 * - \ttbr1 updated.
+	 */
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
+	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
+	cmp	\ttbr1, \tmp, lsr #12		@ PHYS_OFFSET > PAGE_OFFSET?
+	mov	\tmp, #TTB_EAE			@ for TTB control egister
+	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
+	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
+	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP << 16)
+	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP << 16)
+	/*
+	 * Only use split TTBRs if PHYS_OFFSET <= PAGE_OFFSET (cmp above),
+	 * otherwise booting secondary CPUs would end up using TTBR1 for the
+	 * identity mapping set up in TTBR0.
+	 */
+	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
+	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
+	mov	\tmp, \ttbr1, lsr #20
+	mov	\ttbr1, \ttbr1, lsl #12
+	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
+	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
+	.endm
+
+	/*
+	 *   AT
+	 *  TFR   EV X F   IHD LR    S
+	 * .EEE ..EE PUI. .TAT 4RVI ZWRS BLDP WCAM
+	 * rxxx rrxx xxx0 0101 xxxx xxxx x111 xxxx < forced
+	 *   11    0 110    0  0011 1100 .111 1101 < we want
+	 */
+	.align	2
+	.type	v7_crval, #object
+v7_crval:
+	crval	clear=0x0122c302, mmuset=0x30c03c7d, ucset=0x00c01c7c
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
new file mode 100644
index 0000000..5544b82
--- /dev/null
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/arm-smccc.h>
+#include <linux/kernel.h>
+#include <linux/psci.h>
+#include <linux/smp.h>
+
+#include <asm/cp15.h>
+#include <asm/cputype.h>
+#include <asm/proc-fns.h>
+#include <asm/system_misc.h>
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
+
+extern void cpu_v7_iciallu_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_bpiall_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_smc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+extern void cpu_v7_hvc_switch_mm(phys_addr_t pgd_phys, struct mm_struct *mm);
+
+static void harden_branch_predictor_bpiall(void)
+{
+	write_sysreg(0, BPIALL);
+}
+
+static void harden_branch_predictor_iciallu(void)
+{
+	write_sysreg(0, ICIALLU);
+}
+
+static void __maybe_unused call_smc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void __maybe_unused call_hvc_arch_workaround_1(void)
+{
+	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
+}
+
+static void cpu_v7_spectre_init(void)
+{
+	const char *spectre_v2_method = NULL;
+	int cpu = smp_processor_id();
+
+	if (per_cpu(harden_branch_predictor_fn, cpu))
+		return;
+
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A8:
+	case ARM_CPU_PART_CORTEX_A9:
+	case ARM_CPU_PART_CORTEX_A12:
+	case ARM_CPU_PART_CORTEX_A17:
+	case ARM_CPU_PART_CORTEX_A73:
+	case ARM_CPU_PART_CORTEX_A75:
+		if (processor.switch_mm != cpu_v7_bpiall_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_bpiall;
+		spectre_v2_method = "BPIALL";
+		break;
+
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_BRAHMA_B15:
+		if (processor.switch_mm != cpu_v7_iciallu_switch_mm)
+			goto bl_error;
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_iciallu;
+		spectre_v2_method = "ICIALLU";
+		break;
+
+#ifdef CONFIG_ARM_PSCI
+	default:
+		/* Other ARM CPUs require no workaround */
+		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+			break;
+		/* fallthrough */
+		/* Cortex A57/A72 require firmware workaround */
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72: {
+		struct arm_smccc_res res;
+
+		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+			break;
+
+		switch (psci_ops.conduit) {
+		case PSCI_CONDUIT_HVC:
+			arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_hvc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_hvc_switch_mm;
+			spectre_v2_method = "hypervisor";
+			break;
+
+		case PSCI_CONDUIT_SMC:
+			arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+					  ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+			if ((int)res.a0 != 0)
+				break;
+			if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu)
+				goto bl_error;
+			per_cpu(harden_branch_predictor_fn, cpu) =
+				call_smc_arch_workaround_1;
+			processor.switch_mm = cpu_v7_smc_switch_mm;
+			spectre_v2_method = "firmware";
+			break;
+
+		default:
+			break;
+		}
+	}
+#endif
+	}
+
+	if (spectre_v2_method)
+		pr_info("CPU%u: Spectre v2: using %s workaround\n",
+			smp_processor_id(), spectre_v2_method);
+	return;
+
+bl_error:
+	pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n",
+		cpu);
+}
+#else
+static void cpu_v7_spectre_init(void)
+{
+}
+#endif
+
+static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
+						  u32 mask, const char *msg)
+{
+	u32 aux_cr;
+
+	asm("mrc p15, 0, %0, c1, c0, 1" : "=r" (aux_cr));
+
+	if ((aux_cr & mask) != mask) {
+		if (!*warned)
+			pr_err("CPU%u: %s", smp_processor_id(), msg);
+		*warned = true;
+		return false;
+	}
+	return true;
+}
+
+static DEFINE_PER_CPU(bool, spectre_warned);
+
+static bool check_spectre_auxcr(bool *warned, u32 bit)
+{
+	return IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR) &&
+		cpu_v7_check_auxcr_set(warned, bit,
+				       "Spectre v2: firmware did not set auxiliary control register IBE bit, system vulnerable\n");
+}
+
+void cpu_v7_ca8_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_ca15_ibe(void)
+{
+	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
+		cpu_v7_spectre_init();
+}
+
+void cpu_v7_bugs_init(void)
+{
+	cpu_v7_spectre_init();
+}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
new file mode 100644
index 0000000..339eb17
--- /dev/null
+++ b/arch/arm/mm/proc-v7.S
@@ -0,0 +1,819 @@
+/*
+ *  linux/arch/arm/mm/proc-v7.S
+ *
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7 processor support.
+ */
+#include <linux/arm-smccc.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/pgtable.h>
+#include <asm/memory.h>
+
+#include "proc-macros.S"
+
+#ifdef CONFIG_ARM_LPAE
+#include "proc-v7-3level.S"
+#else
+#include "proc-v7-2level.S"
+#endif
+
+ENTRY(cpu_v7_proc_init)
+	ret	lr
+ENDPROC(cpu_v7_proc_init)
+
+ENTRY(cpu_v7_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1000			@ ...i............
+	bic	r0, r0, #0x0006			@ .............ca.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+ENDPROC(cpu_v7_proc_fin)
+
+/*
+ *	cpu_v7_reset(loc, hyp)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ *	- hyp   - indicate if restart occurs in HYP mode
+ *
+ *	This code must be executed using a flat identity mapping with
+ *      caches disabled.
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_v7_reset)
+	mrc	p15, 0, r2, c1, c0, 0		@ ctrl register
+	bic	r2, r2, #0x1			@ ...............m
+ THUMB(	bic	r2, r2, #1 << 30 )		@ SCTLR.TE (Thumb exceptions)
+	mcr	p15, 0, r2, c1, c0, 0		@ disable MMU
+	isb
+#ifdef CONFIG_ARM_VIRT_EXT
+	teq	r1, #0
+	bne	__hyp_soft_restart
+#endif
+	bx	r0
+ENDPROC(cpu_v7_reset)
+	.popsection
+
+/*
+ *	cpu_v7_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v7_do_idle)
+	dsb					@ WFI may enter a low-power mode
+	wfi
+	ret	lr
+ENDPROC(cpu_v7_do_idle)
+
+ENTRY(cpu_v7_dcache_clean_area)
+	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
+	ALT_UP_B(1f)
+	ret	lr
+1:	dcache_line_size r2, r3
+2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, r2
+	subs	r1, r1, r2
+	bhi	2b
+	dsb	ishst
+	ret	lr
+ENDPROC(cpu_v7_dcache_clean_area)
+
+#ifdef CONFIG_ARM_PSCI
+	.arch_extension sec
+ENTRY(cpu_v7_smc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	smc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_smc_switch_mm)
+	.arch_extension virt
+ENTRY(cpu_v7_hvc_switch_mm)
+	stmfd	sp!, {r0 - r3}
+	movw	r0, #:lower16:ARM_SMCCC_ARCH_WORKAROUND_1
+	movt	r0, #:upper16:ARM_SMCCC_ARCH_WORKAROUND_1
+	hvc	#0
+	ldmfd	sp!, {r0 - r3}
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_hvc_switch_mm)
+#endif
+ENTRY(cpu_v7_iciallu_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 0		@ ICIALLU
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_iciallu_switch_mm)
+ENTRY(cpu_v7_bpiall_switch_mm)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c5, 6		@ flush BTAC/BTB
+	b	cpu_v7_switch_mm
+ENDPROC(cpu_v7_bpiall_switch_mm)
+
+	string	cpu_v7_name, "ARMv7 Processor"
+	.align
+
+/* Suspend/resume support: derived from arch/arm/mach-s5pv210/sleep.S */
+.globl	cpu_v7_suspend_size
+.equ	cpu_v7_suspend_size, 4 * 9
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_v7_do_suspend)
+	stmfd	sp!, {r4 - r11, lr}
+	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	stmia	r0!, {r4 - r5}
+#ifdef CONFIG_MMU
+	mrc	p15, 0, r6, c3, c0, 0	@ Domain ID
+#ifdef CONFIG_ARM_LPAE
+	mrrc	p15, 1, r5, r7, c2	@ TTB 1
+#else
+	mrc	p15, 0, r7, c2, c0, 1	@ TTB 1
+#endif
+	mrc	p15, 0, r11, c2, c0, 2	@ TTB control register
+#endif
+	mrc	p15, 0, r8, c1, c0, 0	@ Control register
+	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
+	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
+	stmia	r0, {r5 - r11}
+	ldmfd	sp!, {r4 - r11, pc}
+ENDPROC(cpu_v7_do_suspend)
+
+ENTRY(cpu_v7_do_resume)
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0	@ invalidate I cache
+	mcr	p15, 0, ip, c13, c0, 1	@ set reserved context ID
+	ldmia	r0!, {r4 - r5}
+	mcr	p15, 0, r4, c13, c0, 0	@ FCSE/PID
+	mcr	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
+	ldmia	r0, {r5 - r11}
+#ifdef CONFIG_MMU
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate TLBs
+	mcr	p15, 0, r6, c3, c0, 0	@ Domain ID
+#ifdef CONFIG_ARM_LPAE
+	mcrr	p15, 0, r1, ip, c2	@ TTB 0
+	mcrr	p15, 1, r5, r7, c2	@ TTB 1
+#else
+	ALT_SMP(orr	r1, r1, #TTB_FLAGS_SMP)
+	ALT_UP(orr	r1, r1, #TTB_FLAGS_UP)
+	mcr	p15, 0, r1, c2, c0, 0	@ TTB 0
+	mcr	p15, 0, r7, c2, c0, 1	@ TTB 1
+#endif
+	mcr	p15, 0, r11, c2, c0, 2	@ TTB control register
+	ldr	r4, =PRRR		@ PRRR
+	ldr	r5, =NMRR		@ NMRR
+	mcr	p15, 0, r4, c10, c2, 0	@ write PRRR
+	mcr	p15, 0, r5, c10, c2, 1	@ write NMRR
+#endif	/* CONFIG_MMU */
+	mrc	p15, 0, r4, c1, c0, 1	@ Read Auxiliary control register
+	teq	r4, r9			@ Is it already set?
+	mcrne	p15, 0, r9, c1, c0, 1	@ No, so write it
+	mcr	p15, 0, r10, c1, c0, 2	@ Co-processor access control
+	isb
+	dsb
+	mov	r0, r8			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_v7_do_resume)
+#endif
+
+.globl	cpu_ca9mp_suspend_size
+.equ	cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_ca9mp_do_suspend)
+	stmfd	sp!, {r4 - r5}
+	mrc	p15, 0, r4, c15, c0, 1		@ Diagnostic register
+	mrc	p15, 0, r5, c15, c0, 0		@ Power register
+	stmia	r0!, {r4 - r5}
+	ldmfd	sp!, {r4 - r5}
+	b	cpu_v7_do_suspend
+ENDPROC(cpu_ca9mp_do_suspend)
+
+ENTRY(cpu_ca9mp_do_resume)
+	ldmia	r0!, {r4 - r5}
+	mrc	p15, 0, r10, c15, c0, 1		@ Read Diagnostic register
+	teq	r4, r10				@ Already restored?
+	mcrne	p15, 0, r4, c15, c0, 1		@ No, so restore it
+	mrc	p15, 0, r10, c15, c0, 0		@ Read Power register
+	teq	r5, r10				@ Already restored?
+	mcrne	p15, 0, r5, c15, c0, 0		@ No, so restore it
+	b	cpu_v7_do_resume
+ENDPROC(cpu_ca9mp_do_resume)
+#endif
+
+#ifdef CONFIG_CPU_PJ4B
+	globl_equ	cpu_pj4b_switch_mm,     cpu_v7_switch_mm
+	globl_equ	cpu_pj4b_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_pj4b_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_pj4b_proc_fin, 	cpu_v7_proc_fin
+	globl_equ	cpu_pj4b_reset,	   	cpu_v7_reset
+#ifdef CONFIG_PJ4B_ERRATA_4742
+ENTRY(cpu_pj4b_do_idle)
+	dsb					@ WFI may enter a low-power mode
+	wfi
+	dsb					@barrier
+	ret	lr
+ENDPROC(cpu_pj4b_do_idle)
+#else
+	globl_equ	cpu_pj4b_do_idle,  	cpu_v7_do_idle
+#endif
+	globl_equ	cpu_pj4b_dcache_clean_area,	cpu_v7_dcache_clean_area
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_pj4b_do_suspend)
+	stmfd	sp!, {r6 - r10}
+	mrc	p15, 1, r6, c15, c1, 0  @ save CP15 - extra features
+	mrc	p15, 1, r7, c15, c2, 0	@ save CP15 - Aux Func Modes Ctrl 0
+	mrc	p15, 1, r8, c15, c1, 2	@ save CP15 - Aux Debug Modes Ctrl 2
+	mrc	p15, 1, r9, c15, c1, 1  @ save CP15 - Aux Debug Modes Ctrl 1
+	mrc	p15, 0, r10, c9, c14, 0  @ save CP15 - PMC
+	stmia	r0!, {r6 - r10}
+	ldmfd	sp!, {r6 - r10}
+	b cpu_v7_do_suspend
+ENDPROC(cpu_pj4b_do_suspend)
+
+ENTRY(cpu_pj4b_do_resume)
+	ldmia	r0!, {r6 - r10}
+	mcr	p15, 1, r6, c15, c1, 0  @ restore CP15 - extra features
+	mcr	p15, 1, r7, c15, c2, 0	@ restore CP15 - Aux Func Modes Ctrl 0
+	mcr	p15, 1, r8, c15, c1, 2	@ restore CP15 - Aux Debug Modes Ctrl 2
+	mcr	p15, 1, r9, c15, c1, 1  @ restore CP15 - Aux Debug Modes Ctrl 1
+	mcr	p15, 0, r10, c9, c14, 0  @ restore CP15 - PMC
+	b cpu_v7_do_resume
+ENDPROC(cpu_pj4b_do_resume)
+#endif
+.globl	cpu_pj4b_suspend_size
+.equ	cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5
+
+#endif
+
+/*
+ *	__v7_setup
+ *
+ *	Initialise TLB, Caches, and MMU state ready to switch the MMU
+ *	on.  Return in r0 the new CP15 C1 control register setting.
+ *
+ *	r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack
+ *	r4: TTBR0 (low word)
+ *	r5: TTBR0 (high word if LPAE)
+ *	r8: TTBR1
+ *	r9: Main ID register
+ *
+ *	This should be able to cover all ARMv7 cores.
+ *
+ *	It is assumed that:
+ *	- cache type register is implemented
+ */
+__v7_ca5mp_setup:
+__v7_ca9mp_setup:
+__v7_cr7mp_setup:
+__v7_cr8mp_setup:
+	mov	r10, #(1 << 0)			@ Cache/TLB ops broadcasting
+	b	1f
+__v7_ca7mp_setup:
+__v7_ca12mp_setup:
+__v7_ca15mp_setup:
+__v7_b15mp_setup:
+__v7_ca17mp_setup:
+	mov	r10, #0
+1:	adr	r0, __v7_setup_stack_ptr
+	ldr	r12, [r0]
+	add	r12, r12, r0			@ the local stack
+	stmia	r12, {r1-r6, lr}		@ v7_invalidate_l1 touches r0-r6
+	bl      v7_invalidate_l1
+	ldmia	r12, {r1-r6, lr}
+#ifdef CONFIG_SMP
+	orr	r10, r10, #(1 << 6)		@ Enable SMP/nAMP mode
+	ALT_SMP(mrc	p15, 0, r0, c1, c0, 1)
+	ALT_UP(mov	r0, r10)		@ fake it for UP
+	orr	r10, r10, r0			@ Set required bits
+	teq	r10, r0				@ Were they already set?
+	mcrne	p15, 0, r10, c1, c0, 1		@ No, update register
+#endif
+	b	__v7_setup_cont
+
+/*
+ * Errata:
+ *  r0, r10 available for use
+ *  r1, r2, r4, r5, r9, r13: must be preserved
+ *  r3: contains MIDR rX number in bits 23-20
+ *  r6: contains MIDR rXpY as 8-bit XY number
+ *  r9: MIDR
+ */
+__ca8_errata:
+#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
+	teq	r3, #0x00100000			@ only present in r1p*
+	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orreq	r0, r0, #(1 << 6)		@ set IBE to 1
+	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_458693
+	teq	r6, #0x20			@ only present in r2p0
+	mrceq	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orreq	r0, r0, #(1 << 5)		@ set L1NEON to 1
+	orreq	r0, r0, #(1 << 9)		@ set PLDNOP to 1
+	mcreq	p15, 0, r0, c1, c0, 1		@ write aux control register
+#endif
+#ifdef CONFIG_ARM_ERRATA_460075
+	teq	r6, #0x20			@ only present in r2p0
+	mrceq	p15, 1, r0, c9, c0, 2		@ read L2 cache aux ctrl register
+	tsteq	r0, #1 << 22
+	orreq	r0, r0, #(1 << 22)		@ set the Write Allocate disable bit
+	mcreq	p15, 1, r0, c9, c0, 2		@ write the L2 cache aux ctrl register
+#endif
+	b	__errata_finish
+
+__ca9_errata:
+#ifdef CONFIG_ARM_ERRATA_742230
+	cmp	r6, #0x22			@ only present up to r2p2
+	mrcle	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orrle	r0, r0, #1 << 4			@ set bit #4
+	mcrle	p15, 0, r0, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_742231
+	teq	r6, #0x20			@ present in r2p0
+	teqne	r6, #0x21			@ present in r2p1
+	teqne	r6, #0x22			@ present in r2p2
+	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orreq	r0, r0, #1 << 12		@ set bit #12
+	orreq	r0, r0, #1 << 22		@ set bit #22
+	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_743622
+	teq	r3, #0x00200000			@ only present in r2p*
+	mrceq	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orreq	r0, r0, #1 << 6			@ set bit #6
+	mcreq	p15, 0, r0, c15, c0, 1		@ write diagnostic register
+#endif
+#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP)
+	ALT_SMP(cmp r6, #0x30)			@ present prior to r3p0
+	ALT_UP_B(1f)
+	mrclt	p15, 0, r0, c15, c0, 1		@ read diagnostic register
+	orrlt	r0, r0, #1 << 11		@ set bit #11
+	mcrlt	p15, 0, r0, c15, c0, 1		@ write diagnostic register
+1:
+#endif
+	b	__errata_finish
+
+__ca15_errata:
+#ifdef CONFIG_ARM_ERRATA_773022
+	cmp	r6, #0x4			@ only present up to r0p4
+	mrcle	p15, 0, r0, c1, c0, 1		@ read aux control register
+	orrle	r0, r0, #1 << 1			@ disable loop buffer
+	mcrle	p15, 0, r0, c1, c0, 1		@ write aux control register
+#endif
+	b	__errata_finish
+
+__ca12_errata:
+#ifdef CONFIG_ARM_ERRATA_818325_852422
+	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orr	r10, r10, #1 << 12		@ set bit #12
+	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_821420
+	mrc	p15, 0, r10, c15, c0, 2		@ read internal feature reg
+	orr	r10, r10, #1 << 1		@ set bit #1
+	mcr	p15, 0, r10, c15, c0, 2		@ write internal feature reg
+#endif
+#ifdef CONFIG_ARM_ERRATA_825619
+	mrc	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orr	r10, r10, #1 << 24		@ set bit #24
+	mcr	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+	b	__errata_finish
+
+__ca17_errata:
+#ifdef CONFIG_ARM_ERRATA_852421
+	cmp	r6, #0x12			@ only present up to r1p2
+	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrle	r10, r10, #1 << 24		@ set bit #24
+	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+#ifdef CONFIG_ARM_ERRATA_852423
+	cmp	r6, #0x12			@ only present up to r1p2
+	mrcle	p15, 0, r10, c15, c0, 1		@ read diagnostic register
+	orrle	r10, r10, #1 << 12		@ set bit #12
+	mcrle	p15, 0, r10, c15, c0, 1		@ write diagnostic register
+#endif
+	b	__errata_finish
+
+__v7_pj4b_setup:
+#ifdef CONFIG_CPU_PJ4B
+
+/* Auxiliary Debug Modes Control 1 Register */
+#define PJ4B_STATIC_BP (1 << 2) /* Enable Static BP */
+#define PJ4B_INTER_PARITY (1 << 8) /* Disable Internal Parity Handling */
+#define PJ4B_CLEAN_LINE (1 << 16) /* Disable data transfer for clean line */
+
+/* Auxiliary Debug Modes Control 2 Register */
+#define PJ4B_FAST_LDR (1 << 23) /* Disable fast LDR */
+#define PJ4B_SNOOP_DATA (1 << 25) /* Do not interleave write and snoop data */
+#define PJ4B_CWF (1 << 27) /* Disable Critical Word First feature */
+#define PJ4B_OUTSDNG_NC (1 << 29) /* Disable outstanding non cacheable rqst */
+#define PJ4B_L1_REP_RR (1 << 30) /* L1 replacement - Strict round robin */
+#define PJ4B_AUX_DBG_CTRL2 (PJ4B_SNOOP_DATA | PJ4B_CWF |\
+			    PJ4B_OUTSDNG_NC | PJ4B_L1_REP_RR)
+
+/* Auxiliary Functional Modes Control Register 0 */
+#define PJ4B_SMP_CFB (1 << 1) /* Set SMP mode. Join the coherency fabric */
+#define PJ4B_L1_PAR_CHK (1 << 2) /* Support L1 parity checking */
+#define PJ4B_BROADCAST_CACHE (1 << 8) /* Broadcast Cache and TLB maintenance */
+
+/* Auxiliary Debug Modes Control 0 Register */
+#define PJ4B_WFI_WFE (1 << 22) /* WFI/WFE - serve the DVM and back to idle */
+
+	/* Auxiliary Debug Modes Control 1 Register */
+	mrc	p15, 1,	r0, c15, c1, 1
+	orr     r0, r0, #PJ4B_CLEAN_LINE
+	orr     r0, r0, #PJ4B_INTER_PARITY
+	bic	r0, r0, #PJ4B_STATIC_BP
+	mcr	p15, 1,	r0, c15, c1, 1
+
+	/* Auxiliary Debug Modes Control 2 Register */
+	mrc	p15, 1,	r0, c15, c1, 2
+	bic	r0, r0, #PJ4B_FAST_LDR
+	orr	r0, r0, #PJ4B_AUX_DBG_CTRL2
+	mcr	p15, 1,	r0, c15, c1, 2
+
+	/* Auxiliary Functional Modes Control Register 0 */
+	mrc	p15, 1,	r0, c15, c2, 0
+#ifdef CONFIG_SMP
+	orr	r0, r0, #PJ4B_SMP_CFB
+#endif
+	orr	r0, r0, #PJ4B_L1_PAR_CHK
+	orr	r0, r0, #PJ4B_BROADCAST_CACHE
+	mcr	p15, 1,	r0, c15, c2, 0
+
+	/* Auxiliary Debug Modes Control 0 Register */
+	mrc	p15, 1,	r0, c15, c1, 0
+	orr	r0, r0, #PJ4B_WFI_WFE
+	mcr	p15, 1,	r0, c15, c1, 0
+
+#endif /* CONFIG_CPU_PJ4B */
+
+__v7_setup:
+	adr	r0, __v7_setup_stack_ptr
+	ldr	r12, [r0]
+	add	r12, r12, r0			@ the local stack
+	stmia	r12, {r1-r6, lr}		@ v7_invalidate_l1 touches r0-r6
+	bl      v7_invalidate_l1
+	ldmia	r12, {r1-r6, lr}
+
+__v7_setup_cont:
+	and	r0, r9, #0xff000000		@ ARM?
+	teq	r0, #0x41000000
+	bne	__errata_finish
+	and	r3, r9, #0x00f00000		@ variant
+	and	r6, r9, #0x0000000f		@ revision
+	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
+	ubfx	r0, r9, #4, #12			@ primary part number
+
+	/* Cortex-A8 Errata */
+	ldr	r10, =0x00000c08		@ Cortex-A8 primary part number
+	teq	r0, r10
+	beq	__ca8_errata
+
+	/* Cortex-A9 Errata */
+	ldr	r10, =0x00000c09		@ Cortex-A9 primary part number
+	teq	r0, r10
+	beq	__ca9_errata
+
+	/* Cortex-A12 Errata */
+	ldr	r10, =0x00000c0d		@ Cortex-A12 primary part number
+	teq	r0, r10
+	beq	__ca12_errata
+
+	/* Cortex-A17 Errata */
+	ldr	r10, =0x00000c0e		@ Cortex-A17 primary part number
+	teq	r0, r10
+	beq	__ca17_errata
+
+	/* Cortex-A15 Errata */
+	ldr	r10, =0x00000c0f		@ Cortex-A15 primary part number
+	teq	r0, r10
+	beq	__ca15_errata
+
+__errata_finish:
+	mov	r10, #0
+	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
+#ifdef CONFIG_MMU
+	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
+	v7_ttb_setup r10, r4, r5, r8, r3	@ TTBCR, TTBRx setup
+	ldr	r3, =PRRR			@ PRRR
+	ldr	r6, =NMRR			@ NMRR
+	mcr	p15, 0, r3, c10, c2, 0		@ write PRRR
+	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
+#endif
+	dsb					@ Complete invalidations
+#ifndef CONFIG_ARM_THUMBEE
+	mrc	p15, 0, r0, c0, c1, 0		@ read ID_PFR0 for ThumbEE
+	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
+	teq	r0, #(1 << 12)			@ check if ThumbEE is present
+	bne	1f
+	mov	r3, #0
+	mcr	p14, 6, r3, c1, c0, 0		@ Initialize TEEHBR to 0
+	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
+	orr	r0, r0, #1			@ set the 1st bit in order to
+	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
+1:
+#endif
+	adr	r3, v7_crval
+	ldmia	r3, {r3, r6}
+ ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
+#ifdef CONFIG_SWP_EMULATE
+	orr     r3, r3, #(1 << 10)              @ set SW bit in "clear"
+	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
+#endif
+   	mrc	p15, 0, r0, c1, c0, 0		@ read control register
+	bic	r0, r0, r3			@ clear bits them
+	orr	r0, r0, r6			@ set them
+ THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
+	ret	lr				@ return to head.S:__ret
+
+	.align	2
+__v7_setup_stack_ptr:
+	.word	PHYS_RELATIVE(__v7_setup_stack, .)
+ENDPROC(__v7_setup)
+
+	.bss
+	.align	2
+__v7_setup_stack:
+	.space	4 * 7				@ 7 registers
+
+	__INITDATA
+
+	.weak cpu_v7_bugs_init
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	@ generic v7 bpiall on context switch
+	globl_equ	cpu_v7_bpiall_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_v7_bpiall_proc_fin,		cpu_v7_proc_fin
+	globl_equ	cpu_v7_bpiall_reset,		cpu_v7_reset
+	globl_equ	cpu_v7_bpiall_do_idle,		cpu_v7_do_idle
+	globl_equ	cpu_v7_bpiall_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_v7_bpiall_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_v7_bpiall_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_v7_bpiall_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_v7_bpiall_do_resume,	cpu_v7_do_resume
+#endif
+	define_processor_functions v7_bpiall, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_bpiall_processor_functions
+#else
+#define HARDENED_BPIALL_PROCESSOR_FUNCTIONS v7_processor_functions
+#endif
+
+#ifndef CONFIG_ARM_LPAE
+	@ Cortex-A8 - always needs bpiall switch_mm implementation
+	globl_equ	cpu_ca8_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca8_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca8_reset,		cpu_v7_reset
+	globl_equ	cpu_ca8_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca8_dcache_clean_area, cpu_v7_dcache_clean_area
+	globl_equ	cpu_ca8_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca8_switch_mm,	cpu_v7_bpiall_switch_mm
+	globl_equ	cpu_ca8_suspend_size,	cpu_v7_suspend_size
+#ifdef CONFIG_ARM_CPU_SUSPEND
+	globl_equ	cpu_ca8_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca8_do_resume,	cpu_v7_do_resume
+#endif
+	define_processor_functions ca8, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca8_ibe
+
+	@ Cortex-A9 - needs more registers preserved across suspend/resume
+	@ and bpiall switch_mm for hardening
+	globl_equ	cpu_ca9mp_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca9mp_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca9mp_reset,	cpu_v7_reset
+	globl_equ	cpu_ca9mp_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_bpiall_switch_mm
+#else
+	globl_equ	cpu_ca9mp_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca9mp_set_pte_ext,	cpu_v7_set_pte_ext
+	define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_bugs_init
+#endif
+
+	@ Cortex-A15 - needs iciallu switch_mm for hardening
+	globl_equ	cpu_ca15_proc_init,	cpu_v7_proc_init
+	globl_equ	cpu_ca15_proc_fin,	cpu_v7_proc_fin
+	globl_equ	cpu_ca15_reset,		cpu_v7_reset
+	globl_equ	cpu_ca15_do_idle,	cpu_v7_do_idle
+	globl_equ	cpu_ca15_dcache_clean_area, cpu_v7_dcache_clean_area
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_iciallu_switch_mm
+#else
+	globl_equ	cpu_ca15_switch_mm,	cpu_v7_switch_mm
+#endif
+	globl_equ	cpu_ca15_set_pte_ext,	cpu_v7_set_pte_ext
+	globl_equ	cpu_ca15_suspend_size,	cpu_v7_suspend_size
+	globl_equ	cpu_ca15_do_suspend,	cpu_v7_do_suspend
+	globl_equ	cpu_ca15_do_resume,	cpu_v7_do_resume
+	define_processor_functions ca15, dabort=v7_early_abort, pabort=v7_pabort, suspend=1, bugs=cpu_v7_ca15_ibe
+#ifdef CONFIG_CPU_PJ4B
+	define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
+#endif
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv7"
+	string	cpu_elf_name, "v7"
+	.align
+
+	.section ".proc.info.init", #alloc
+
+	/*
+	 * Standard v7 proc info content
+	 */
+.macro __v7_proc name, initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions, cache_fns = v7_cache_fns
+	ALT_SMP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
+			PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags)
+	ALT_UP(.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \
+			PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags)
+	.long	PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags
+	initfn	\initfunc, \name
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \
+		HWCAP_EDSP | HWCAP_TLS | \hwcaps
+	.long	cpu_v7_name
+	.long	\proc_fns
+	.long	v7wbi_tlb_fns
+	.long	v6_user_fns
+	.long	\cache_fns
+.endm
+
+#ifndef CONFIG_ARM_LPAE
+	/*
+	 * ARM Ltd. Cortex A5 processor.
+	 */
+	.type   __v7_ca5mp_proc_info, #object
+__v7_ca5mp_proc_info:
+	.long	0x410fc050
+	.long	0xff0ffff0
+	__v7_proc __v7_ca5mp_proc_info, __v7_ca5mp_setup
+	.size	__v7_ca5mp_proc_info, . - __v7_ca5mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A9 processor.
+	 */
+	.type   __v7_ca9mp_proc_info, #object
+__v7_ca9mp_proc_info:
+	.long	0x410fc090
+	.long	0xff0ffff0
+	__v7_proc __v7_ca9mp_proc_info, __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions
+	.size	__v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A8 processor.
+	 */
+	.type	__v7_ca8_proc_info, #object
+__v7_ca8_proc_info:
+	.long	0x410fc080
+	.long	0xff0ffff0
+	__v7_proc __v7_ca8_proc_info, __v7_setup, proc_fns = ca8_processor_functions
+	.size	__v7_ca8_proc_info, . - __v7_ca8_proc_info
+
+#endif	/* CONFIG_ARM_LPAE */
+
+	/*
+	 * Marvell PJ4B processor.
+	 */
+#ifdef CONFIG_CPU_PJ4B
+	.type   __v7_pj4b_proc_info, #object
+__v7_pj4b_proc_info:
+	.long	0x560f5800
+	.long	0xff0fff00
+	__v7_proc __v7_pj4b_proc_info, __v7_pj4b_setup, proc_fns = pj4b_processor_functions
+	.size	__v7_pj4b_proc_info, . - __v7_pj4b_proc_info
+#endif
+
+	/*
+	 * ARM Ltd. Cortex R7 processor.
+	 */
+	.type	__v7_cr7mp_proc_info, #object
+__v7_cr7mp_proc_info:
+	.long	0x410fc170
+	.long	0xff0ffff0
+	__v7_proc __v7_cr7mp_proc_info, __v7_cr7mp_setup
+	.size	__v7_cr7mp_proc_info, . - __v7_cr7mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex R8 processor.
+	 */
+	.type	__v7_cr8mp_proc_info, #object
+__v7_cr8mp_proc_info:
+	.long	0x410fc180
+	.long	0xff0ffff0
+	__v7_proc __v7_cr8mp_proc_info, __v7_cr8mp_setup
+	.size	__v7_cr8mp_proc_info, . - __v7_cr8mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A7 processor.
+	 */
+	.type	__v7_ca7mp_proc_info, #object
+__v7_ca7mp_proc_info:
+	.long	0x410fc070
+	.long	0xff0ffff0
+	__v7_proc __v7_ca7mp_proc_info, __v7_ca7mp_setup
+	.size	__v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A12 processor.
+	 */
+	.type	__v7_ca12mp_proc_info, #object
+__v7_ca12mp_proc_info:
+	.long	0x410fc0d0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca12mp_proc_info, __v7_ca12mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca12mp_proc_info, . - __v7_ca12mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A15 processor.
+	 */
+	.type	__v7_ca15mp_proc_info, #object
+__v7_ca15mp_proc_info:
+	.long	0x410fc0f0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca15mp_proc_info, __v7_ca15mp_setup, proc_fns = ca15_processor_functions
+	.size	__v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info
+
+	/*
+	 * Broadcom Corporation Brahma-B15 processor.
+	 */
+	.type	__v7_b15mp_proc_info, #object
+__v7_b15mp_proc_info:
+	.long	0x420f00f0
+	.long	0xff0ffff0
+	__v7_proc __v7_b15mp_proc_info, __v7_b15mp_setup, proc_fns = ca15_processor_functions, cache_fns = b15_cache_fns
+	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
+
+	/*
+	 * ARM Ltd. Cortex A17 processor.
+	 */
+	.type	__v7_ca17mp_proc_info, #object
+__v7_ca17mp_proc_info:
+	.long	0x410fc0e0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca17mp_proc_info, __v7_ca17mp_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca17mp_proc_info, . - __v7_ca17mp_proc_info
+
+	/* ARM Ltd. Cortex A73 processor */
+	.type	__v7_ca73_proc_info, #object
+__v7_ca73_proc_info:
+	.long	0x410fd090
+	.long	0xff0ffff0
+	__v7_proc __v7_ca73_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca73_proc_info, . - __v7_ca73_proc_info
+
+	/* ARM Ltd. Cortex A75 processor */
+	.type	__v7_ca75_proc_info, #object
+__v7_ca75_proc_info:
+	.long	0x410fd0a0
+	.long	0xff0ffff0
+	__v7_proc __v7_ca75_proc_info, __v7_setup, proc_fns = HARDENED_BPIALL_PROCESSOR_FUNCTIONS
+	.size	__v7_ca75_proc_info, . - __v7_ca75_proc_info
+
+	/*
+	 * Qualcomm Inc. Krait processors.
+	 */
+	.type	__krait_proc_info, #object
+__krait_proc_info:
+	.long	0x510f0400		@ Required ID value
+	.long	0xff0ffc00		@ Mask for ID
+	/*
+	 * Some Krait processors don't indicate support for SDIV and UDIV
+	 * instructions in the ARM instruction set, even though they actually
+	 * do support them. They also don't indicate support for fused multiply
+	 * instructions even though they actually do support them.
+	 */
+	__v7_proc __krait_proc_info, __v7_setup, hwcaps = HWCAP_IDIV | HWCAP_VFPv4
+	.size	__krait_proc_info, . - __krait_proc_info
+
+	/*
+	 * Match any ARMv7 processor core.
+	 */
+	.type	__v7_proc_info, #object
+__v7_proc_info:
+	.long	0x000f0000		@ Required ID value
+	.long	0x000f0000		@ Mask for ID
+	__v7_proc __v7_proc_info, __v7_setup
+	.size	__v7_proc_info, . - __v7_proc_info
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
new file mode 100644
index 0000000..47a5acc
--- /dev/null
+++ b/arch/arm/mm/proc-v7m.S
@@ -0,0 +1,237 @@
+/*
+ *  linux/arch/arm/mm/proc-v7m.S
+ *
+ *  Copyright (C) 2008 ARM Ltd.
+ *  Copyright (C) 2001 Deep Blue Solutions Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  This is the "shell" of the ARMv7-M processor support.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/v7m.h>
+#include "proc-macros.S"
+
+ENTRY(cpu_v7m_proc_init)
+	ret	lr
+ENDPROC(cpu_v7m_proc_init)
+
+ENTRY(cpu_v7m_proc_fin)
+	ret	lr
+ENDPROC(cpu_v7m_proc_fin)
+
+/*
+ *	cpu_v7m_reset(loc)
+ *
+ *	Perform a soft reset of the system.  Put the CPU into the
+ *	same state as it would be if it had been reset, and branch
+ *	to what would be the reset vector.
+ *
+ *	- loc   - location to jump to for soft reset
+ */
+	.align	5
+ENTRY(cpu_v7m_reset)
+	ret	r0
+ENDPROC(cpu_v7m_reset)
+
+/*
+ *	cpu_v7m_do_idle()
+ *
+ *	Idle the processor (eg, wait for interrupt).
+ *
+ *	IRQs are already disabled.
+ */
+ENTRY(cpu_v7m_do_idle)
+	wfi
+	ret	lr
+ENDPROC(cpu_v7m_do_idle)
+
+ENTRY(cpu_v7m_dcache_clean_area)
+	ret	lr
+ENDPROC(cpu_v7m_dcache_clean_area)
+
+/*
+ * There is no MMU, so here is nothing to do.
+ */
+ENTRY(cpu_v7m_switch_mm)
+	ret	lr
+ENDPROC(cpu_v7m_switch_mm)
+
+.globl	cpu_v7m_suspend_size
+.equ	cpu_v7m_suspend_size, 0
+
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_v7m_do_suspend)
+	ret	lr
+ENDPROC(cpu_v7m_do_suspend)
+
+ENTRY(cpu_v7m_do_resume)
+	ret	lr
+ENDPROC(cpu_v7m_do_resume)
+#endif
+
+ENTRY(cpu_cm7_dcache_clean_area)
+	dcache_line_size r2, r3
+	movw	r3, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+	movt	r3, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_DCCMVAC
+
+1:	str	r0, [r3]		@ clean D entry
+	add	r0, r0, r2
+	subs	r1, r1, r2
+	bhi	1b
+	dsb
+	ret	lr
+ENDPROC(cpu_cm7_dcache_clean_area)
+
+ENTRY(cpu_cm7_proc_fin)
+	movw	r2, #:lower16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	movt	r2, #:upper16:(BASEADDR_V7M_SCB + V7M_SCB_CCR)
+	ldr	r0, [r2]
+	bic	r0, r0, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC)
+	str	r0, [r2]
+	ret	lr
+ENDPROC(cpu_cm7_proc_fin)
+
+	.section ".init.text", #alloc, #execinstr
+
+__v7m_cm7_setup:
+	mov	r8, #(V7M_SCB_CCR_DC | V7M_SCB_CCR_IC| V7M_SCB_CCR_BP)
+	b	__v7m_setup_cont
+/*
+ *	__v7m_setup
+ *
+ *	This should be able to cover all ARMv7-M cores.
+ */
+__v7m_setup:
+	mov	r8, 0
+
+__v7m_setup_cont:
+	@ Configure the vector table base address
+	ldr	r0, =BASEADDR_V7M_SCB
+	ldr	r12, =vector_table
+	str	r12, [r0, V7M_SCB_VTOR]
+
+	@ enable UsageFault, BusFault and MemManage fault.
+	ldr	r5, [r0, #V7M_SCB_SHCSR]
+	orr	r5, #(V7M_SCB_SHCSR_USGFAULTENA | V7M_SCB_SHCSR_BUSFAULTENA | V7M_SCB_SHCSR_MEMFAULTENA)
+	str	r5, [r0, #V7M_SCB_SHCSR]
+
+	@ Lower the priority of the SVC and PendSV exceptions
+	mov	r5, #0x80000000
+	str	r5, [r0, V7M_SCB_SHPR2]	@ set SVC priority
+	mov	r5, #0x00800000
+	str	r5, [r0, V7M_SCB_SHPR3]	@ set PendSV priority
+
+	@ SVC to switch to handler mode. Notice that this requires sp to
+	@ point to writeable memory because the processor saves
+	@ some registers to the stack.
+	badr	r1, 1f
+	ldr	r5, [r12, #11 * 4]	@ read the SVC vector entry
+	str	r1, [r12, #11 * 4]	@ write the temporary SVC vector entry
+	dsb
+	mov	r6, lr			@ save LR
+	ldr	sp, =init_thread_union + THREAD_START_SP
+	stmia	sp, {r0-r3, r12}
+	cpsie	i
+	svc	#0
+1:	cpsid	i
+	ldmia	sp, {r0-r3, r12}
+	str	r5, [r12, #11 * 4]	@ restore the original SVC vector entry
+	mov	lr, r6			@ restore LR
+
+	@ Special-purpose control register
+	mov	r1, #1
+	msr	control, r1		@ Thread mode has unpriviledged access
+
+	@ Configure caches (if implemented)
+	teq     r8, #0
+	stmneia	sp, {r0-r6, lr}		@ v7m_invalidate_l1 touches r0-r6
+	blne	v7m_invalidate_l1
+	teq     r8, #0			@ re-evalutae condition
+	ldmneia	sp, {r0-r6, lr}
+
+	@ Configure the System Control Register to ensure 8-byte stack alignment
+	@ Note the STKALIGN bit is either RW or RAO.
+	ldr	r0, [r0, V7M_SCB_CCR]   @ system control register
+	orr	r0, #V7M_SCB_CCR_STKALIGN
+	orr	r0, r0, r8
+
+	ret	lr
+ENDPROC(__v7m_setup)
+
+/*
+ * Cortex-M7 processor functions
+ */
+	globl_equ	cpu_cm7_proc_init,	cpu_v7m_proc_init
+	globl_equ	cpu_cm7_reset,		cpu_v7m_reset
+	globl_equ	cpu_cm7_do_idle,	cpu_v7m_do_idle
+	globl_equ	cpu_cm7_switch_mm,	cpu_v7m_switch_mm
+
+	define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+	define_processor_functions cm7, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1
+
+	.section ".rodata"
+	string cpu_arch_name, "armv7m"
+	string cpu_elf_name "v7m"
+	string cpu_v7m_name "ARMv7-M"
+
+	.section ".proc.info.init", #alloc
+
+.macro __v7m_proc name, initfunc, cache_fns = nop_cache_fns, hwcaps = 0,  proc_fns = v7m_processor_functions
+	.long	0			/* proc_info_list.__cpu_mm_mmu_flags */
+	.long	0			/* proc_info_list.__cpu_io_mmu_flags */
+	initfn	\initfunc, \name
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \hwcaps
+	.long	cpu_v7m_name
+	.long   \proc_fns
+	.long	0			/* proc_info_list.tlb */
+	.long	0			/* proc_info_list.user */
+	.long	\cache_fns
+.endm
+
+	/*
+	 * Match ARM Cortex-M7 processor.
+	 */
+	.type	__v7m_cm7_proc_info, #object
+__v7m_cm7_proc_info:
+	.long	0x410fc270		/* ARM Cortex-M7 0xC27 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm7_proc_info, __v7m_cm7_setup, hwcaps = HWCAP_EDSP, cache_fns = v7m_cache_fns, proc_fns = cm7_processor_functions
+	.size	__v7m_cm7_proc_info, . - __v7m_cm7_proc_info
+
+	/*
+	 * Match ARM Cortex-M4 processor.
+	 */
+	.type	__v7m_cm4_proc_info, #object
+__v7m_cm4_proc_info:
+	.long	0x410fc240		/* ARM Cortex-M4 0xC24 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm4_proc_info, __v7m_setup, hwcaps = HWCAP_EDSP
+	.size	__v7m_cm4_proc_info, . - __v7m_cm4_proc_info
+
+	/*
+	 * Match ARM Cortex-M3 processor.
+	 */
+	.type	__v7m_cm3_proc_info, #object
+__v7m_cm3_proc_info:
+	.long	0x410fc230		/* ARM Cortex-M3 0xC23 */
+	.long	0xff0ffff0		/* Mask off revision, patch release */
+	__v7m_proc __v7m_cm3_proc_info, __v7m_setup
+	.size	__v7m_cm3_proc_info, . - __v7m_cm3_proc_info
+
+	/*
+	 * Match any ARMv7-M processor core.
+	 */
+	.type	__v7m_proc_info, #object
+__v7m_proc_info:
+	.long	0x000f0000		@ Required ID value
+	.long	0x000f0000		@ Mask for ID
+	__v7m_proc __v7m_proc_info, __v7m_setup
+	.size	__v7m_proc_info, . - __v7m_proc_info
+
diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S
new file mode 100644
index 0000000..293dcc2
--- /dev/null
+++ b/arch/arm/mm/proc-xsc3.S
@@ -0,0 +1,532 @@
+/*
+ * linux/arch/arm/mm/proc-xsc3.S
+ *
+ * Original Author: Matthew Gilbert
+ * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
+ *
+ * Copyright 2004 (C) Intel Corp.
+ * Copyright 2005 (C) MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
+ * an extension to Intel's original XScale core that adds the following
+ * features:
+ *
+ * - ARMv6 Supersections
+ * - Low Locality Reference pages (replaces mini-cache)
+ * - 36-bit addressing
+ * - L2 cache
+ * - Cache coherency if chipset supports it
+ *
+ * Based on original XScale code by Nicolas Pitre.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the
+ * area is larger than this, then we flush the whole cache.
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * The cache line size of the L1 I, L1 D and unified L2 cache.
+ */
+#define CACHELINESIZE	32
+
+/*
+ * The size of the L1 D cache.
+ */
+#define CACHESIZE	32768
+
+/*
+ * This macro is used to wait for a CP15 write and is needed when we
+ * have to ensure that the last operation to the coprocessor was
+ * completed before continuing with operation.
+ */
+	.macro	cpwait_ret, lr, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
+						@ flush instruction pipeline
+	.endm
+
+/*
+ * This macro cleans and invalidates the entire L1 D cache.
+ */
+
+ 	.macro  clean_d_cache rd, rs
+	mov	\rd, #0x1f00
+	orr	\rd, \rd, #0x00e0
+1:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
+	adds	\rd, \rd, #0x40000000
+	bcc	1b
+	subs	\rd, \rd, #0x20
+	bpl	1b
+	.endm
+
+	.text
+
+/*
+ * cpu_xsc3_proc_init()
+ *
+ * Nothing too exciting at the moment
+ */
+ENTRY(cpu_xsc3_proc_init)
+	ret	lr
+
+/*
+ * cpu_xsc3_proc_fin()
+ */
+ENTRY(cpu_xsc3_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...IZ...........
+	bic	r0, r0, #0x0006			@ .............CA.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_xsc3_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_xsc3_reset)
+	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r1			@ reset CPSR
+	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x3900			@ ..VIZ..S........
+	bic	r1, r1, #0x0086			@ ........B....CA.
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
+	bic	r1, r1, #0x0001			@ ...............M
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	@ CAUTION: MMU turned off from this point.  We count on the pipeline
+	@ already containing those two last instructions to survive.
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
+	ret	r0
+ENDPROC(cpu_xsc3_reset)
+	.popsection
+
+/*
+ * cpu_xsc3_do_idle()
+ *
+ * Cause the processor to idle
+ *
+ * For now we do nothing but go to idle mode for every case
+ *
+ * XScale supports clock switching, but using idle mode support
+ * allows external hardware to react to system state changes.
+ */
+	.align	5
+
+ENTRY(cpu_xsc3_do_idle)
+	mov	r0, #1
+	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(xsc3_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(xsc3_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(xsc3_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(xsc3_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	clean_d_cache r0, r1
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, vm_flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- vma	- vma_area_struct describing address space
+ */
+	.align	5
+ENTRY(xsc3_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #MAX_AREA_SIZE
+	bhs	__flush_whole_cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
+	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the I cache and the D cache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	Note: single I-cache line invalidation isn't used here since
+ *	it also trashes the mini I-cache used by JTAG debuggers.
+ */
+ENTRY(xsc3_coherent_kern_range)
+/* FALLTHROUGH */
+ENTRY(xsc3_coherent_user_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache.
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(xsc3_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+xsc3_dma_inv_range:
+	tst	r0, #CACHELINESIZE - 1
+	bic	r0, r0, #CACHELINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	tst	r1, #CACHELINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+xsc3_dma_clean_range:
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xsc3_dma_flush_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(xsc3_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	xsc3_dma_clean_range
+	bcs	xsc3_dma_inv_range
+	b	xsc3_dma_flush_range
+ENDPROC(xsc3_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(xsc3_dma_unmap_area)
+	ret	lr
+ENDPROC(xsc3_dma_unmap_area)
+
+	.globl	xsc3_flush_kern_cache_louis
+	.equ	xsc3_flush_kern_cache_louis, xsc3_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions xsc3
+
+ENTRY(cpu_xsc3_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
+	add	r0, r0, #CACHELINESIZE
+	subs	r1, r1, #CACHELINESIZE
+	bhi	1b
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_xsc3_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_xsc3_switch_mm)
+	clean_d_cache r1, r2
+	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	orr	r0, r0, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
+	cpwait_ret lr, ip
+
+/*
+ * cpu_xsc3_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ */
+cpu_xsc3_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
+	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+
+	.align	5
+ENTRY(cpu_xsc3_set_pte_ext)
+	xscale_set_pte_ext_prologue
+
+	tst	r1, #L_PTE_SHARED		@ shared?
+	and	r1, r1, #L_PTE_MT_MASK
+	adr	ip, cpu_xsc3_mt_table
+	ldr	ip, [ip, r1]
+	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
+	bic	r2, r2, #0x0c			@ clear old C,B bits
+	orr	r2, r2, ip
+
+	xscale_set_pte_ext_epilogue
+	ret	lr
+
+	.ltorg
+	.align
+
+.globl	cpu_xsc3_suspend_size
+.equ	cpu_xsc3_suspend_size, 4 * 6
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_xsc3_do_suspend)
+	stmfd	sp!, {r4 - r9, lr}
+	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
+	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mrc	p15, 0, r6, c13, c0, 0	@ PID
+	mrc 	p15, 0, r7, c3, c0, 0	@ domain ID
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mrc 	p15, 0, r9, c1, c0, 0	@ control reg
+	bic	r4, r4, #2		@ clear frequency change bit
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmia	sp!, {r4 - r9, pc}
+ENDPROC(cpu_xsc3_do_suspend)
+
+ENTRY(cpu_xsc3_do_resume)
+	ldmia	r0, {r4 - r9}		@ load cp regs
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
+	mcr	p15, 0, ip, c7, c10, 4	@ drain write (&fill) buffer
+	mcr	p15, 0, ip, c7, c5, 4	@ flush prefetch buffer
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
+	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
+	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mcr	p15, 0, r6, c13, c0, 0	@ PID
+	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
+	orr	r1, r1, #0x18		@ cache the page table in L2
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mov	r0, r9			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_xsc3_do_resume)
+#endif
+
+	.type	__xsc3_setup, #function
+__xsc3_setup:
+	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r0
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
+	orr	r4, r4, #0x18			@ cache the page table in L2
+	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
+
+	mov	r0, #1 << 6			@ cp6 access for early sched_clock
+	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
+
+	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
+	and	r0, r0, #2			@ preserve bit P bit setting
+	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
+	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
+
+	adr	r5, xsc3_crval
+	ldmia	r5, {r5, r6}
+
+#ifdef CONFIG_CACHE_XSC3L2
+	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
+	ands	r0, r0, #0xf8
+	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
+#endif
+
+	mrc	p15, 0, r0, c1, c0, 0		@ get control register
+	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
+	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
+						@ ...I Z..S .... .... (uc)
+	ret	lr
+
+	.size	__xsc3_setup, . - __xsc3_setup
+
+	.type	xsc3_crval, #object
+xsc3_crval:
+	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions xsc3, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+	string	cpu_xsc3_name, "XScale-V3 based processor"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+.macro xsc3_proc_info name:req, cpu_val:req, cpu_mask:req
+	.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+	.long	\cpu_val
+	.long	\cpu_mask
+	.long	PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long	PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__xsc3_setup, __\name\()_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	cpu_xsc3_name
+	.long	xsc3_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xsc3_mc_user_fns
+	.long	xsc3_cache_fns
+	.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	xsc3_proc_info xsc3, 0x69056000, 0xffffe000
+
+/* Note: PXA935 changed its implementor ID from Intel to Marvell */
+	xsc3_proc_info xsc3_pxa935, 0x56056000, 0xffffe000
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
new file mode 100644
index 0000000..3d75b79
--- /dev/null
+++ b/arch/arm/mm/proc-xscale.S
@@ -0,0 +1,661 @@
+/*
+ *  linux/arch/arm/mm/proc-xscale.S
+ *
+ *  Author:	Nicolas Pitre
+ *  Created:	November 2000
+ *  Copyright:	(C) 2000, 2001 MontaVista Software Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * MMU functions for the Intel XScale CPUs
+ *
+ * 2001 Aug 21:
+ *	some contributions by Brett Gaines <brett.w.gaines@intel.com>
+ *	Copyright 2001 by Intel Corp.
+ *
+ * 2001 Sep 08:
+ *	Completely revisited, many important fixes
+ *	Nicolas Pitre <nico@fluxnic.net>
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/hwcap.h>
+#include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include "proc-macros.S"
+
+/*
+ * This is the maximum size of an area which will be flushed.  If the area
+ * is larger than this, then we flush the whole cache
+ */
+#define MAX_AREA_SIZE	32768
+
+/*
+ * the cache line size of the I and D cache
+ */
+#define CACHELINESIZE	32
+
+/*
+ * the size of the data cache
+ */
+#define CACHESIZE	32768
+
+/*
+ * Virtual address used to allocate the cache when flushed
+ *
+ * This must be an address range which is _never_ used.  It should
+ * apparently have a mapping in the corresponding page table for
+ * compatibility with future CPUs that _could_ require it.  For instance we
+ * don't care.
+ *
+ * This must be aligned on a 2*CACHESIZE boundary.  The code selects one of
+ * the 2 areas in alternance each time the clean_d_cache macro is used.
+ * Without this the XScale core exhibits cache eviction problems and no one
+ * knows why.
+ *
+ * Reminder: the vector table is located at 0xffff0000-0xffff0fff.
+ */
+#define CLEAN_ADDR	0xfffe0000
+
+/*
+ * This macro is used to wait for a CP15 write and is needed
+ * when we have to ensure that the last operation to the co-pro
+ * was completed before continuing with operation.
+ */
+	.macro	cpwait, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	mov	\rd, \rd			@ wait for completion
+	sub 	pc, pc, #4			@ flush instruction pipeline
+	.endm
+
+	.macro	cpwait_ret, lr, rd
+	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
+	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
+						@ flush instruction pipeline
+	.endm
+
+/*
+ * This macro cleans the entire dcache using line allocate.
+ * The main loop has been unrolled to reduce loop overhead.
+ * rd and rs are two scratch registers.
+ */
+	.macro  clean_d_cache, rd, rs
+	ldr	\rs, =clean_addr
+	ldr	\rd, [\rs]
+	eor	\rd, \rd, #CACHESIZE
+	str	\rd, [\rs]
+	add	\rs, \rd, #CACHESIZE
+1:	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	mcr	p15, 0, \rd, c7, c2, 5		@ allocate D cache line
+	add	\rd, \rd, #CACHELINESIZE
+	teq	\rd, \rs
+	bne	1b
+	.endm
+
+	.data
+	.align	2
+clean_addr:	.word	CLEAN_ADDR
+
+	.text
+
+/*
+ * cpu_xscale_proc_init()
+ *
+ * Nothing too exciting at the moment
+ */
+ENTRY(cpu_xscale_proc_init)
+	@ enable write buffer coalescing. Some bootloader disable it
+	mrc	p15, 0, r1, c1, c0, 1
+	bic	r1, r1, #1
+	mcr	p15, 0, r1, c1, c0, 1
+	ret	lr
+
+/*
+ * cpu_xscale_proc_fin()
+ */
+ENTRY(cpu_xscale_proc_fin)
+	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
+	bic	r0, r0, #0x1800			@ ...IZ...........
+	bic	r0, r0, #0x0006			@ .............CA.
+	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
+	ret	lr
+
+/*
+ * cpu_xscale_reset(loc)
+ *
+ * Perform a soft reset of the system.  Put the CPU into the
+ * same state as it would be if it had been reset, and branch
+ * to what would be the reset vector.
+ *
+ * loc: location to jump to for soft reset
+ *
+ * Beware PXA270 erratum E7.
+ */
+	.align	5
+	.pushsection	.idmap.text, "ax"
+ENTRY(cpu_xscale_reset)
+	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
+	msr	cpsr_c, r1			@ reset CPSR
+	mcr	p15, 0, r1, c10, c4, 1		@ unlock I-TLB
+	mcr	p15, 0, r1, c8, c5, 0		@ invalidate I-TLB
+	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x0086			@ ........B....CA.
+	bic	r1, r1, #0x3900			@ ..VIZ..S........
+	sub	pc, pc, #4			@ flush pipeline
+	@ *** cache line aligned ***
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	bic	r1, r1, #0x0001			@ ...............M
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I,D caches & BTB
+	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
+	@ CAUTION: MMU turned off from this point. We count on the pipeline
+	@ already containing those two last instructions to survive.
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	ret	r0
+ENDPROC(cpu_xscale_reset)
+	.popsection
+
+/*
+ * cpu_xscale_do_idle()
+ *
+ * Cause the processor to idle
+ *
+ * For now we do nothing but go to idle mode for every case
+ *
+ * XScale supports clock switching, but using idle mode support
+ * allows external hardware to react to system state changes.
+ */
+	.align	5
+
+ENTRY(cpu_xscale_do_idle)
+	mov	r0, #1
+	mcr	p14, 0, r0, c7, c0, 0		@ Go to IDLE
+	ret	lr
+
+/* ================================= CACHE ================================ */
+
+/*
+ *	flush_icache_all()
+ *
+ *	Unconditionally clean and invalidate the entire icache.
+ */
+ENTRY(xscale_flush_icache_all)
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
+	ret	lr
+ENDPROC(xscale_flush_icache_all)
+
+/*
+ *	flush_user_cache_all()
+ *
+ *	Invalidate all cache entries in a particular address
+ *	space.
+ */
+ENTRY(xscale_flush_user_cache_all)
+	/* FALLTHROUGH */
+
+/*
+ *	flush_kern_cache_all()
+ *
+ *	Clean and invalidate the entire cache.
+ */
+ENTRY(xscale_flush_kern_cache_all)
+	mov	r2, #VM_EXEC
+	mov	ip, #0
+__flush_whole_cache:
+	clean_d_cache r0, r1
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	flush_user_cache_range(start, end, vm_flags)
+ *
+ *	Invalidate a range of cache entries in the specified
+ *	address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end	- end address (exclusive, may not be aligned)
+ *	- vma	- vma_area_struct describing address space
+ */
+	.align	5
+ENTRY(xscale_flush_user_cache_range)
+	mov	ip, #0
+	sub	r3, r1, r0			@ calculate total size
+	cmp	r3, #MAX_AREA_SIZE
+	bhs	__flush_whole_cache
+
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c7, c5, 1		@ Invalidate I cache line
+	mcr	p15, 0, r0, c7, c10, 1		@ Clean D cache line
+	mcr	p15, 0, r0, c7, c6, 1		@ Invalidate D cache line
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, ip, c7, c5, 6		@ Invalidate BTB
+	mcrne	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	coherent_kern_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ *
+ *	Note: single I-cache line invalidation isn't used here since
+ *	it also trashes the mini I-cache used by JTAG debuggers.
+ */
+ENTRY(xscale_coherent_kern_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	coherent_user_range(start, end)
+ *
+ *	Ensure coherency between the Icache and the Dcache in the
+ *	region described by start.  If you have non-snooping
+ *	Harvard caches, you need to implement this function.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xscale_coherent_user_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c5, 1		@ Invalidate I cache entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 6		@ Invalidate BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	flush_kern_dcache_area(void *addr, size_t size)
+ *
+ *	Ensure no D cache aliasing occurs, either with itself or
+ *	the I cache
+ *
+ *	- addr	- kernel address
+ *	- size	- region size
+ */
+ENTRY(xscale_flush_kern_dcache_area)
+	add	r1, r0, r1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mov	r0, #0
+	mcr	p15, 0, r0, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	dma_inv_range(start, end)
+ *
+ *	Invalidate (discard) the specified virtual address range.
+ *	May not write back any entries.  If 'start' or 'end'
+ *	are not cache line aligned, those lines must be written
+ *	back.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+xscale_dma_inv_range:
+	tst	r0, #CACHELINESIZE - 1
+	bic	r0, r0, #CACHELINESIZE - 1
+	mcrne	p15, 0, r0, c7, c10, 1		@ clean D entry
+	tst	r1, #CACHELINESIZE - 1
+	mcrne	p15, 0, r1, c7, c10, 1		@ clean D entry
+1:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	dma_clean_range(start, end)
+ *
+ *	Clean the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+xscale_dma_clean_range:
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	dma_flush_range(start, end)
+ *
+ *	Clean and invalidate the specified virtual address range.
+ *
+ *	- start  - virtual start address
+ *	- end	 - virtual end address
+ */
+ENTRY(xscale_dma_flush_range)
+	bic	r0, r0, #CACHELINESIZE - 1
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	mcr	p15, 0, r0, c7, c6, 1		@ invalidate D entry
+	add	r0, r0, #CACHELINESIZE
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r0, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	ret	lr
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(xscale_dma_map_area)
+	add	r1, r1, r0
+	cmp	r2, #DMA_TO_DEVICE
+	beq	xscale_dma_clean_range
+	bcs	xscale_dma_inv_range
+	b	xscale_dma_flush_range
+ENDPROC(xscale_dma_map_area)
+
+/*
+ *	dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(xscale_80200_A0_A1_dma_map_area)
+	add	r1, r1, r0
+	teq	r2, #DMA_TO_DEVICE
+	beq	xscale_dma_clean_range
+	b	xscale_dma_flush_range
+ENDPROC(xscale_80200_A0_A1_dma_map_area)
+
+/*
+ *	dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(xscale_dma_unmap_area)
+	ret	lr
+ENDPROC(xscale_dma_unmap_area)
+
+	.globl	xscale_flush_kern_cache_louis
+	.equ	xscale_flush_kern_cache_louis, xscale_flush_kern_cache_all
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions xscale
+
+/*
+ * On stepping A0/A1 of the 80200, invalidating D-cache by line doesn't
+ * clear the dirty bits, which means that if we invalidate a dirty line,
+ * the dirty data can still be written back to external memory later on.
+ *
+ * The recommended workaround is to always do a clean D-cache line before
+ * doing an invalidate D-cache line, so on the affected processors,
+ * dma_inv_range() is implemented as dma_flush_range().
+ *
+ * See erratum #25 of "Intel 80200 Processor Specification Update",
+ * revision January 22, 2003, available at:
+ *     http://www.intel.com/design/iio/specupdt/273415.htm
+ */
+.macro a0_alias basename
+	.globl xscale_80200_A0_A1_\basename
+	.type xscale_80200_A0_A1_\basename , %function
+	.equ xscale_80200_A0_A1_\basename , xscale_\basename
+.endm
+
+/*
+ * Most of the cache functions are unchanged for these processor revisions.
+ * Export suitable alias symbols for the unchanged functions:
+ */
+	a0_alias flush_icache_all
+	a0_alias flush_user_cache_all
+	a0_alias flush_kern_cache_all
+	a0_alias flush_kern_cache_louis
+	a0_alias flush_user_cache_range
+	a0_alias coherent_kern_range
+	a0_alias coherent_user_range
+	a0_alias flush_kern_dcache_area
+	a0_alias dma_flush_range
+	a0_alias dma_unmap_area
+
+	@ define struct cpu_cache_fns (see <asm/cacheflush.h> and proc-macros.S)
+	define_cache_functions xscale_80200_A0_A1
+
+ENTRY(cpu_xscale_dcache_clean_area)
+1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	add	r0, r0, #CACHELINESIZE
+	subs	r1, r1, #CACHELINESIZE
+	bhi	1b
+	ret	lr
+
+/* =============================== PageTable ============================== */
+
+/*
+ * cpu_xscale_switch_mm(pgd)
+ *
+ * Set the translation base pointer to be as described by pgd.
+ *
+ * pgd: new page tables
+ */
+	.align	5
+ENTRY(cpu_xscale_switch_mm)
+	clean_d_cache r1, r2
+	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+	cpwait_ret lr, ip
+
+/*
+ * cpu_xscale_set_pte_ext(ptep, pte, ext)
+ *
+ * Set a PTE and flush it out
+ *
+ * Errata 40: must set memory to write-through for user read-only pages.
+ */
+cpu_xscale_mt_table:
+	.long	0x00						@ L_PTE_MT_UNCACHED
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_BUFFERABLE
+	.long	PTE_CACHEABLE					@ L_PTE_MT_WRITETHROUGH
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
+	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
+	.long	0x00						@ unused
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE			@ L_PTE_MT_MINICACHE
+	.long	PTE_EXT_TEX(1) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC
+	.long	0x00						@ unused
+	.long	PTE_BUFFERABLE					@ L_PTE_MT_DEV_WC
+	.long	0x00						@ unused
+	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
+	.long	0x00						@ L_PTE_MT_DEV_NONSHARED
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+	.long	0x00						@ unused
+
+	.align	5
+ENTRY(cpu_xscale_set_pte_ext)
+	xscale_set_pte_ext_prologue
+
+	@
+	@ Erratum 40: must set memory to write-through for user read-only pages
+	@
+	and	ip, r1, #(L_PTE_MT_MASK | L_PTE_USER | L_PTE_RDONLY) & ~(4 << 2)
+	teq	ip, #L_PTE_MT_WRITEBACK | L_PTE_USER | L_PTE_RDONLY
+
+	moveq	r1, #L_PTE_MT_WRITETHROUGH
+	and	r1, r1, #L_PTE_MT_MASK
+	adr	ip, cpu_xscale_mt_table
+	ldr	ip, [ip, r1]
+	bic	r2, r2, #0x0c
+	orr	r2, r2, ip
+
+	xscale_set_pte_ext_epilogue
+	ret	lr
+
+	.ltorg
+	.align
+
+.globl	cpu_xscale_suspend_size
+.equ	cpu_xscale_suspend_size, 4 * 6
+#ifdef CONFIG_ARM_CPU_SUSPEND
+ENTRY(cpu_xscale_do_suspend)
+	stmfd	sp!, {r4 - r9, lr}
+	mrc	p14, 0, r4, c6, c0, 0	@ clock configuration, for turbo mode
+	mrc	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mrc	p15, 0, r6, c13, c0, 0	@ PID
+	mrc	p15, 0, r7, c3, c0, 0	@ domain ID
+	mrc	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mrc	p15, 0, r9, c1, c0, 0	@ control reg
+	bic	r4, r4, #2		@ clear frequency change bit
+	stmia	r0, {r4 - r9}		@ store cp regs
+	ldmfd	sp!, {r4 - r9, pc}
+ENDPROC(cpu_xscale_do_suspend)
+
+ENTRY(cpu_xscale_do_resume)
+	ldmia	r0, {r4 - r9}		@ load cp regs
+	mov	ip, #0
+	mcr	p15, 0, ip, c8, c7, 0	@ invalidate I & D TLBs
+	mcr	p15, 0, ip, c7, c7, 0	@ invalidate I & D caches, BTB
+	mcr	p14, 0, r4, c6, c0, 0	@ clock configuration, turbo mode.
+	mcr	p15, 0, r5, c15, c1, 0	@ CP access reg
+	mcr	p15, 0, r6, c13, c0, 0	@ PID
+	mcr	p15, 0, r7, c3, c0, 0	@ domain ID
+	mcr	p15, 0, r1, c2, c0, 0	@ translation table base addr
+	mcr	p15, 0, r8, c1, c0, 1	@ auxiliary control reg
+	mov	r0, r9			@ control register
+	b	cpu_resume_mmu
+ENDPROC(cpu_xscale_do_resume)
+#endif
+
+	.type	__xscale_setup, #function
+__xscale_setup:
+	mcr	p15, 0, ip, c7, c7, 0		@ invalidate I, D caches & BTB
+	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
+	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I, D TLBs
+	mov	r0, #1 << 6			@ cp6 for IOP3xx and Bulverde
+	orr	r0, r0, #1 << 13		@ Its undefined whether this
+	mcr	p15, 0, r0, c15, c1, 0		@ affects USR or SVC modes
+
+	adr	r5, xscale_crval
+	ldmia	r5, {r5, r6}
+	mrc	p15, 0, r0, c1, c0, 0		@ get control register
+	bic	r0, r0, r5
+	orr	r0, r0, r6
+	ret	lr
+	.size	__xscale_setup, . - __xscale_setup
+
+	/*
+	 *  R
+	 * .RVI ZFRS BLDP WCAM
+	 * ..11 1.01 .... .101
+	 * 
+	 */
+	.type	xscale_crval, #object
+xscale_crval:
+	crval	clear=0x00003b07, mmuset=0x00003905, ucset=0x00001900
+
+	__INITDATA
+
+	@ define struct processor (see <asm/proc-fns.h> and proc-macros.S)
+	define_processor_functions xscale, dabort=v5t_early_abort, pabort=legacy_pabort, suspend=1
+
+	.section ".rodata"
+
+	string	cpu_arch_name, "armv5te"
+	string	cpu_elf_name, "v5"
+
+	string	cpu_80200_A0_A1_name, "XScale-80200 A0/A1"
+	string	cpu_80200_name, "XScale-80200"
+	string	cpu_80219_name, "XScale-80219"
+	string	cpu_8032x_name, "XScale-IOP8032x Family"
+	string	cpu_8033x_name, "XScale-IOP8033x Family"
+	string	cpu_pxa250_name, "XScale-PXA250"
+	string	cpu_pxa210_name, "XScale-PXA210"
+	string	cpu_ixp42x_name, "XScale-IXP42x Family"
+	string	cpu_ixp43x_name, "XScale-IXP43x Family"
+	string	cpu_ixp46x_name, "XScale-IXP46x Family"
+	string	cpu_ixp2400_name, "XScale-IXP2400"
+	string	cpu_ixp2800_name, "XScale-IXP2800"
+	string	cpu_pxa255_name, "XScale-PXA255"
+	string	cpu_pxa270_name, "XScale-PXA270"
+
+	.align
+
+	.section ".proc.info.init", #alloc
+
+.macro xscale_proc_info name:req, cpu_val:req, cpu_mask:req, cpu_name:req, cache
+	.type	__\name\()_proc_info,#object
+__\name\()_proc_info:
+	.long	\cpu_val
+	.long	\cpu_mask
+	.long	PMD_TYPE_SECT | \
+		PMD_SECT_BUFFERABLE | \
+		PMD_SECT_CACHEABLE | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	.long	PMD_TYPE_SECT | \
+		PMD_SECT_AP_WRITE | \
+		PMD_SECT_AP_READ
+	initfn	__xscale_setup, __\name\()_proc_info
+	.long	cpu_arch_name
+	.long	cpu_elf_name
+	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
+	.long	\cpu_name
+	.long	xscale_processor_functions
+	.long	v4wbi_tlb_fns
+	.long	xscale_mc_user_fns
+	.ifb \cache
+		.long	xscale_cache_fns
+	.else
+		.long	\cache
+	.endif
+	.size	__\name\()_proc_info, . - __\name\()_proc_info
+.endm
+
+	xscale_proc_info 80200_A0_A1, 0x69052000, 0xfffffffe, cpu_80200_name, \
+		cache=xscale_80200_A0_A1_cache_fns
+	xscale_proc_info 80200, 0x69052000, 0xfffffff0, cpu_80200_name
+	xscale_proc_info 80219, 0x69052e20, 0xffffffe0, cpu_80219_name
+	xscale_proc_info 8032x, 0x69052420, 0xfffff7e0, cpu_8032x_name
+	xscale_proc_info 8033x, 0x69054010, 0xfffffd30, cpu_8033x_name
+	xscale_proc_info pxa250, 0x69052100, 0xfffff7f0, cpu_pxa250_name
+	xscale_proc_info pxa210, 0x69052120, 0xfffff3f0, cpu_pxa210_name
+	xscale_proc_info ixp2400, 0x69054190, 0xfffffff0, cpu_ixp2400_name
+	xscale_proc_info ixp2800, 0x690541a0, 0xfffffff0, cpu_ixp2800_name
+	xscale_proc_info ixp42x, 0x690541c0, 0xffffffc0, cpu_ixp42x_name
+	xscale_proc_info ixp43x, 0x69054040, 0xfffffff0, cpu_ixp43x_name
+	xscale_proc_info ixp46x, 0x69054200, 0xffffff00, cpu_ixp46x_name
+	xscale_proc_info pxa255, 0x69052d00, 0xfffffff0, cpu_pxa255_name
+	xscale_proc_info pxa270, 0x69054110, 0xfffffff0, cpu_pxa270_name
diff --git a/arch/arm/mm/ptdump_debugfs.c b/arch/arm/mm/ptdump_debugfs.c
new file mode 100644
index 0000000..be8d87b
--- /dev/null
+++ b/arch/arm/mm/ptdump_debugfs.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include <asm/ptdump.h>
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	struct ptdump_info *info = m->private;
+
+	ptdump_walk_pgd(m, info);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ptdump_show, inode->i_private);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+{
+	struct dentry *pe;
+
+	pe = debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
+	return pe ? 0 : -ENOMEM;
+
+}
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
new file mode 100644
index 0000000..1867f3e
--- /dev/null
+++ b/arch/arm/mm/pv-fixup-asm.S
@@ -0,0 +1,88 @@
+/*
+ *  Copyright (C) 2015 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This assembly is required to safely remap the physical address space
+ * for Keystone 2
+ */
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/cp15.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+
+	.section ".idmap.text", "ax"
+
+#define L1_ORDER 3
+#define L2_ORDER 3
+
+ENTRY(lpae_pgtables_remap_asm)
+	stmfd	sp!, {r4-r8, lr}
+
+	mrc	p15, 0, r8, c1, c0, 0		@ read control reg
+	bic	ip, r8, #CR_M			@ disable caches and MMU
+	mcr	p15, 0, ip, c1, c0, 0
+	dsb
+	isb
+
+	/* Update level 2 entries covering the kernel */
+	ldr	r6, =(_end - 1)
+	add	r7, r2, #0x1000
+	add	r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER
+	add	r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER)
+1:	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L2_ORDER
+	cmp	r7, r6
+	bls	1b
+
+	/* Update level 2 entries for the boot data */
+	add	r7, r2, #0x1000
+	add	r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
+	bic	r7, r7, #(1 << L2_ORDER) - 1
+	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L2_ORDER
+	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7]
+
+	/* Update level 1 entries */
+	mov	r6, #4
+	mov	r7, r2
+2:	ldrd	r4, [r7]
+	adds	r4, r4, r0
+	adc	r5, r5, r1
+	strd	r4, [r7], #1 << L1_ORDER
+	subs	r6, r6, #1
+	bne	2b
+
+	mrrc	p15, 0, r4, r5, c2		@ read TTBR0
+	adds	r4, r4, r0			@ update physical address
+	adc	r5, r5, r1
+	mcrr	p15, 0, r4, r5, c2		@ write back TTBR0
+	mrrc	p15, 1, r4, r5, c2		@ read TTBR1
+	adds	r4, r4, r0			@ update physical address
+	adc	r5, r5, r1
+	mcrr	p15, 1, r4, r5, c2		@ write back TTBR1
+
+	dsb
+
+	mov	ip, #0
+	mcr	p15, 0, ip, c7, c5, 0		@ I+BTB cache invalidate
+	mcr	p15, 0, ip, c8, c7, 0		@ local_flush_tlb_all()
+	dsb
+	isb
+
+	mcr	p15, 0, r8, c1, c0, 0		@ re-enable MMU
+	dsb
+	isb
+
+	ldmfd	sp!, {r4-r8, pc}
+ENDPROC(lpae_pgtables_remap_asm)
diff --git a/arch/arm/mm/tcm.h b/arch/arm/mm/tcm.h
new file mode 100644
index 0000000..2410192
--- /dev/null
+++ b/arch/arm/mm/tcm.h
@@ -0,0 +1,17 @@
+/*
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * TCM memory handling for ARM systems
+ *
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Rickard Andersson <rickard.andersson@stericsson.com>
+ */
+
+#ifdef CONFIG_HAVE_TCM
+void __init tcm_init(void);
+#else
+/* No TCM support, just blank inlines to be optimized out */
+static inline void tcm_init(void)
+{
+}
+#endif
diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S
new file mode 100644
index 0000000..d2d9ecb
--- /dev/null
+++ b/arch/arm/mm/tlb-fa.S
@@ -0,0 +1,70 @@
+/*
+ *  linux/arch/arm/mm/tlb-fa.S
+ *
+ *  Copyright (C) 2005 Faraday Corp.
+ *  Copyright (C) 2008-2009 Paulius Zaleckas <paulius.zaleckas@teltonika.lt>
+ *
+ * Based on tlb-v4wbi.S:
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4, Faraday variation.
+ *  This assume an unified TLBs, with a write buffer, and branch target buffer (BTB)
+ *
+ *  Processors: FA520 FA526 FA626
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+
+/*
+ *	flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	4
+ENTRY(fa_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3			@ == mm ?
+	retne	lr				@ no, we dont do anything
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	ret	lr
+
+
+ENTRY(fa_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate UTLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r3, c7, c10, 4		@ data write barrier
+	mcr	p15, 0, r3, c7, c5, 4		@ prefetch flush (isb)
+	ret	lr
+
+	__INITDATA
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions fa, fa_tlb_flags
diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S
new file mode 100644
index 0000000..a2b5dca
--- /dev/null
+++ b/arch/arm/mm/tlb-v4.S
@@ -0,0 +1,62 @@
+/*
+ *  linux/arch/arm/mm/tlbv4.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 TLB handling functions.
+ *  These assume a split I/D TLBs, and no write buffer.
+ *
+ * Processors: ARM720T
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+	.align	5
+/*
+ *	v4_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified user address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3				@ == mm ?
+	retne	lr				@ no, we dont do anything
+.v4_flush_kern_tlb_range:
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c7, 1		@ invalidate TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+/*
+ *	v4_flush_kern_tlb_range(start, end)
+ *
+ *	Invalidate a range of TLB entries in the specified kernel
+ *	address range.
+ *
+ *	- start - virtual address (may not be aligned)
+ *	- end   - virtual address (may not be aligned)
+ */
+.globl v4_flush_kern_tlb_range
+.equ v4_flush_kern_tlb_range, .v4_flush_kern_tlb_range
+
+	__INITDATA
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions v4, v4_tlb_flags
diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S
new file mode 100644
index 0000000..5a093b4
--- /dev/null
+++ b/arch/arm/mm/tlb-v4wb.S
@@ -0,0 +1,74 @@
+/*
+ *  linux/arch/arm/mm/tlbv4wb.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 TLB handling functions.
+ *  These assume a split I/D TLBs w/o I TLB entry, with a write buffer.
+ *
+ *  Processors: SA110 SA1100 SA1110
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+	.align	5
+/*
+ *	v4wb_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4wb_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3				@ == mm ?
+	retne	lr				@ no, we dont do anything
+	vma_vm_flags r2, r2
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+/*
+ *	v4_flush_kern_tlb_range(start, end)
+ *
+ *	Invalidate a range of TLB entries in the specified kernel
+ *	address range.
+ *
+ *	- start - virtual address (may not be aligned)
+ *	- end   - virtual address (may not be aligned)
+ */
+ENTRY(v4wb_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+	mcr	p15, 0, r3, c8, c5, 0		@ invalidate I TLB
+1:	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+	__INITDATA
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions v4wb, v4wb_tlb_flags
diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S
new file mode 100644
index 0000000..0588615
--- /dev/null
+++ b/arch/arm/mm/tlb-v4wbi.S
@@ -0,0 +1,65 @@
+/*
+ *  linux/arch/arm/mm/tlbv4wbi.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 4 and version 5 TLB handling functions.
+ *  These assume a split I/D TLBs, with a write buffer.
+ *
+ *  Processors: ARM920 ARM922 ARM925 ARM926 XScale
+ */
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+/*
+ *	v4wb_flush_user_tlb_range(start, end, mm)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - range start address
+ *	- end   - range end address
+ *	- mm    - mm_struct describing address space
+ */
+	.align	5
+ENTRY(v4wbi_flush_user_tlb_range)
+	vma_vm_mm ip, r2
+	act_mm	r3				@ get current->active_mm
+	eors	r3, ip, r3			@ == mm ?
+	retne	lr				@ no, we dont do anything
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	vma_vm_flags r2, r2
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	tst	r2, #VM_EXEC
+	mcrne	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
+	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+ENTRY(v4wbi_flush_kern_tlb_range)
+	mov	r3, #0
+	mcr	p15, 0, r3, c7, c10, 4		@ drain WB
+	bic	r0, r0, #0x0ff
+	bic	r0, r0, #0xf00
+1:	mcr	p15, 0, r0, c8, c5, 1		@ invalidate I TLB entry
+	mcr	p15, 0, r0, c8, c6, 1		@ invalidate D TLB entry
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	ret	lr
+
+	__INITDATA
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions v4wbi, v4wbi_tlb_flags
diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S
new file mode 100644
index 0000000..6f689be
--- /dev/null
+++ b/arch/arm/mm/tlb-v6.S
@@ -0,0 +1,93 @@
+/*
+ *  linux/arch/arm/mm/tlb-v6.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 6 TLB handling functions.
+ *  These assume a split I/D TLB.
+ */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+#define HARVARD_TLB
+
+/*
+ *	v6wbi_flush_user_tlb_range(start, end, vma)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- vma   - vma_struct describing address range
+ *
+ *	It is assumed that:
+ *	- the "Invalidate single entry" instruction will invalidate
+ *	  both the I and the D TLBs on Harvard-style TLBs
+ */
+ENTRY(v6wbi_flush_user_tlb_range)
+	vma_vm_mm r3, r2			@ get vma->vm_mm
+	mov	ip, #0
+	mmid	r3, r3				@ get vm_mm->context.id
+	mcr	p15, 0, ip, c7, c10, 4		@ drain write buffer
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	asid	r3, r3				@ mask ASID
+	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
+	mov	r1, r1, lsl #PAGE_SHIFT
+	vma_vm_flags r2, r2			@ get vma->vm_flags
+1:
+#ifdef HARVARD_TLB
+	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA (was 1)
+	tst	r2, #VM_EXEC			@ Executable area ?
+	mcrne	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA (was 1)
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA (was 1)
+#endif
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, ip, c7, c10, 4		@ data synchronization barrier
+	ret	lr
+
+/*
+ *	v6wbi_flush_kern_tlb_range(start,end)
+ *
+ *	Invalidate a range of kernel TLB entries
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ */
+ENTRY(v6wbi_flush_kern_tlb_range)
+	mov	r2, #0
+	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	mov	r0, r0, lsl #PAGE_SHIFT
+	mov	r1, r1, lsl #PAGE_SHIFT
+1:
+#ifdef HARVARD_TLB
+	mcr	p15, 0, r0, c8, c6, 1		@ TLB invalidate D MVA
+	mcr	p15, 0, r0, c8, c5, 1		@ TLB invalidate I MVA
+#else
+	mcr	p15, 0, r0, c8, c7, 1		@ TLB invalidate MVA
+#endif
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	mcr	p15, 0, r2, c7, c10, 4		@ data synchronization barrier
+	mcr	p15, 0, r2, c7, c5, 4		@ prefetch flush (isb)
+	ret	lr
+
+	__INIT
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions v6wbi, v6wbi_tlb_flags
diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S
new file mode 100644
index 0000000..e5101a3
--- /dev/null
+++ b/arch/arm/mm/tlb-v7.S
@@ -0,0 +1,95 @@
+/*
+ *  linux/arch/arm/mm/tlb-v7.S
+ *
+ *  Copyright (C) 1997-2002 Russell King
+ *  Modified for ARMv7 by Catalin Marinas
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ *  ARM architecture version 6 TLB handling functions.
+ *  These assume a split I/D TLB.
+ */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include "proc-macros.S"
+
+/*
+ *	v7wbi_flush_user_tlb_range(start, end, vma)
+ *
+ *	Invalidate a range of TLB entries in the specified address space.
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ *	- vma   - vma_struct describing address range
+ *
+ *	It is assumed that:
+ *	- the "Invalidate single entry" instruction will invalidate
+ *	  both the I and the D TLBs on Harvard-style TLBs
+ */
+ENTRY(v7wbi_flush_user_tlb_range)
+	vma_vm_mm r3, r2			@ get vma->vm_mm
+	mmid	r3, r3				@ get vm_mm->context.id
+	dsb	ish
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	asid	r3, r3				@ mask ASID
+#ifdef CONFIG_ARM_ERRATA_720789
+	ALT_SMP(W(mov)	r3, #0	)
+	ALT_UP(W(nop)		)
+#endif
+	orr	r0, r3, r0, lsl #PAGE_SHIFT	@ Create initial MVA
+	mov	r1, r1, lsl #PAGE_SHIFT
+1:
+#ifdef CONFIG_ARM_ERRATA_720789
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
+#else
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+#endif
+	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
+
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	dsb	ish
+	ret	lr
+ENDPROC(v7wbi_flush_user_tlb_range)
+
+/*
+ *	v7wbi_flush_kern_tlb_range(start,end)
+ *
+ *	Invalidate a range of kernel TLB entries
+ *
+ *	- start - start address (may not be aligned)
+ *	- end   - end address (exclusive, may not be aligned)
+ */
+ENTRY(v7wbi_flush_kern_tlb_range)
+	dsb	ish
+	mov	r0, r0, lsr #PAGE_SHIFT		@ align address
+	mov	r1, r1, lsr #PAGE_SHIFT
+	mov	r0, r0, lsl #PAGE_SHIFT
+	mov	r1, r1, lsl #PAGE_SHIFT
+1:
+#ifdef CONFIG_ARM_ERRATA_720789
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 3)	@ TLB invalidate U MVA all ASID (shareable)
+#else
+	ALT_SMP(mcr	p15, 0, r0, c8, c3, 1)	@ TLB invalidate U MVA (shareable)
+#endif
+	ALT_UP(mcr	p15, 0, r0, c8, c7, 1)	@ TLB invalidate U MVA
+	add	r0, r0, #PAGE_SZ
+	cmp	r0, r1
+	blo	1b
+	dsb	ish
+	isb
+	ret	lr
+ENDPROC(v7wbi_flush_kern_tlb_range)
+
+	__INIT
+
+	/* define struct cpu_tlb_fns (see <asm/tlbflush.h> and proc-macros.S) */
+	define_tlb_functions v7wbi, v7wbi_tlb_flags_up, flags_smp=v7wbi_tlb_flags_smp