v4.19.13 snapshot.
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 0000000..dacf3f4
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,664 @@
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+	bool "DMA Engine support"
+	depends on HAS_DMA
+	help
+	  DMA engines can do asynchronous data transfers without
+	  involving the host CPU.  Currently, this framework can be
+	  used to offload memory copies in the network stack and
+	  RAID operations in the MD driver.  This menu only presents
+	  DMA Device drivers supported by the configured arch, it may
+	  be empty in some cases.
+
+config DMADEVICES_DEBUG
+        bool "DMA Engine debugging"
+        depends on DMADEVICES != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables DMA engine core and driver debugging.
+
+config DMADEVICES_VDEBUG
+        bool "DMA Engine verbose debugging"
+        depends on DMADEVICES_DEBUG != n
+        help
+          This is an option for use by developers; most people should
+          say N here.  This enables deeper (more verbose) debugging of
+          the DMA engine core and drivers.
+
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+#core
+config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	bool
+
+config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	bool
+
+config DMA_ENGINE
+	bool
+
+config DMA_VIRTUAL_CHANNELS
+	tristate
+
+config DMA_ACPI
+	def_bool y
+	depends on ACPI
+
+config DMA_OF
+	def_bool y
+	depends on OF
+	select DMA_ENGINE
+
+#devices
+config ALTERA_MSGDMA
+	tristate "Altera / Intel mSGDMA Engine"
+	select DMA_ENGINE
+	help
+	  Enable support for Altera / Intel mSGDMA controller.
+
+config AMBA_PL08X
+	bool "ARM PrimeCell PL080 or PL081 support"
+	depends on ARM_AMBA
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Say yes if your platform has a PL08x DMAC device which can
+	  provide DMA engine support. This includes the original ARM
+	  PL080 and PL081, Samsungs PL080 derivative and Faraday
+	  Technology's FTDMAC020 PL080 derivative.
+
+config AMCC_PPC440SPE_ADMA
+	tristate "AMCC PPC440SPe ADMA support"
+	depends on 440SPe || 440SP
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	  Enable support for the AMCC PPC440SPe RAID engines.
+
+config AT_HDMAC
+	tristate "Atmel AHB DMA support"
+	depends on ARCH_AT91
+	select DMA_ENGINE
+	help
+	  Support the Atmel AHB DMA controller.
+
+config AT_XDMAC
+	tristate "Atmel XDMA support"
+	depends on ARCH_AT91
+	select DMA_ENGINE
+	help
+	  Support the Atmel XDMA controller.
+
+config AXI_DMAC
+	tristate "Analog Devices AXI-DMAC DMA support"
+	depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_SOCFPGA || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Analog Devices AXI-DMAC peripheral. This DMA
+	  controller is often used in Analog Device's reference designs for FPGA
+	  platforms.
+
+config BCM_SBA_RAID
+	tristate "Broadcom SBA RAID engine support"
+	depends on ARM64 || COMPILE_TEST
+	depends on MAILBOX && RAID6_PQ
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_DISABLE_XOR_VAL_DMA
+	select ASYNC_TX_DISABLE_PQ_VAL_DMA
+	default m if ARCH_BCM_IPROC
+	help
+	  Enable support for Broadcom SBA RAID Engine. The SBA RAID
+	  engine is available on most of the Broadcom iProc SoCs. It
+	  has the capability to offload memcpy, xor and pq computation
+	  for raid5/6.
+
+config COH901318
+	bool "ST-Ericsson COH901318 DMA support"
+	select DMA_ENGINE
+	depends on ARCH_U300 || COMPILE_TEST
+	help
+	  Enable support for ST-Ericsson COH 901 318 DMA.
+
+config DMA_BCM2835
+	tristate "BCM2835 DMA engine support"
+	depends on ARCH_BCM2835
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
+config DMA_JZ4740
+	tristate "JZ4740 DMA support"
+	depends on MACH_JZ4740 || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
+config DMA_JZ4780
+	tristate "JZ4780 DMA support"
+	depends on MACH_JZ4780 || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  This selects support for the DMA controller in Ingenic JZ4780 SoCs.
+	  If you have a board based on such a SoC and wish to use DMA for
+	  devices which can use the DMA controller, say Y or M here.
+
+config DMA_SA11X0
+	tristate "SA-11x0 DMA support"
+	depends on ARCH_SA1100 || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine found on Intel StrongARM SA-1100 and
+	  SA-1110 SoCs.  This DMA engine can only be used with on-chip
+	  devices.
+
+config DMA_SUN4I
+	tristate "Allwinner A10 DMA SoCs support"
+	depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I
+	default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the DMA controller present in the sun4i,
+	  sun5i and sun7i Allwinner ARM SoCs.
+
+config DMA_SUN6I
+	tristate "Allwinner A31 SoCs DMA support"
+	depends on MACH_SUN6I || MACH_SUN8I || (ARM64 && ARCH_SUNXI) || COMPILE_TEST
+	depends on RESET_CONTROLLER
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the DMA engine first found in Allwinner A31 SoCs.
+
+config DW_AXI_DMAC
+	tristate "Synopsys DesignWare AXI DMA support"
+	depends on OF || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for Synopsys DesignWare AXI DMA controller.
+	  NOTE: This driver wasn't tested on 64 bit platform because
+	  of lack 64 bit platform with Synopsys DW AXI DMAC.
+
+config EP93XX_DMA
+	bool "Cirrus Logic EP93xx DMA support"
+	depends on ARCH_EP93XX || COMPILE_TEST
+	select DMA_ENGINE
+	help
+	  Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+
+config FSL_DMA
+	tristate "Freescale Elo series DMA support"
+	depends on FSL_SOC
+	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	---help---
+	  Enable support for the Freescale Elo series DMA controllers.
+	  The Elo is the DMA controller on some mpc82xx and mpc83xx parts, the
+	  EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
+	  some Txxx and Bxxx parts.
+
+config FSL_EDMA
+	tristate "Freescale eDMA engine support"
+	depends on OF
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the Freescale eDMA engine with programmable channel
+	  multiplexing capability for DMA request sources(slot).
+	  This module can be found on Freescale Vybrid and LS-1 SoCs.
+
+config FSL_RAID
+        tristate "Freescale RAID engine Support"
+        depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
+        select DMA_ENGINE
+        select DMA_ENGINE_RAID
+        ---help---
+          Enable support for Freescale RAID Engine. RAID Engine is
+          available on some QorIQ SoCs (like P5020/P5040). It has
+          the capability to offload memcpy, xor and pq computation
+	  for raid5/6.
+
+config IMG_MDC_DMA
+	tristate "IMG MDC support"
+	depends on MIPS || COMPILE_TEST
+	depends on MFD_SYSCON
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the IMG multi-threaded DMA controller (MDC).
+
+config IMX_DMA
+	tristate "i.MX DMA support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	help
+	  Support the i.MX DMA engine. This engine is integrated into
+	  Freescale i.MX1/21/27 chips.
+
+config IMX_SDMA
+	tristate "i.MX SDMA support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the i.MX SDMA engine. This engine is integrated into
+	  Freescale i.MX25/31/35/51/53/6 chips.
+
+config INTEL_IDMA64
+	tristate "Intel integrated DMA 64-bit support"
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable DMA support for Intel Low Power Subsystem such as found on
+	  Intel Skylake PCH.
+
+config INTEL_IOATDMA
+	tristate "Intel I/OAT DMA support"
+	depends on PCI && X86_64
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select DCA
+	help
+	  Enable support for the Intel(R) I/OAT DMA engine present
+	  in recent Intel Xeon chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config INTEL_IOP_ADMA
+	tristate "Intel IOP ADMA support"
+	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	  Enable support for the Intel(R) IOP Series RAID engines.
+
+config INTEL_MIC_X100_DMA
+	tristate "Intel MIC X100 DMA Driver"
+	depends on 64BIT && X86 && INTEL_MIC_BUS
+	select DMA_ENGINE
+	help
+	  This enables DMA support for the Intel Many Integrated Core
+	  (MIC) family of PCIe form factor coprocessor X100 devices that
+	  run a 64 bit Linux OS. This driver will be used by both MIC
+	  host and card drivers.
+
+	  If you are building host kernel with a MIC device or a card
+	  kernel for a MIC device, then say M (recommended) or Y, else
+	  say N. If unsure say N.
+
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+config K3_DMA
+	tristate "Hisilicon K3 DMA support"
+	depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for Hisilicon K3 platform
+	  devices.
+
+config LPC18XX_DMAMUX
+	bool "NXP LPC18xx/43xx DMA MUX for PL080"
+	depends on ARCH_LPC18XX || COMPILE_TEST
+	depends on OF && AMBA_PL08X
+	select MFD_SYSCON
+	help
+	  Enable support for DMA on NXP LPC18xx/43xx platforms
+	  with PL080 and multiplexed DMA request lines.
+
+config MMP_PDMA
+	bool "MMP PDMA support"
+	depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST
+	select DMA_ENGINE
+	help
+	  Support the MMP PDMA engine for PXA and MMP platform.
+
+config MMP_TDMA
+	bool "MMP Two-Channel DMA support"
+	depends on ARCH_MMP || COMPILE_TEST
+	select DMA_ENGINE
+	select MMP_SRAM if ARCH_MMP
+	select GENERIC_ALLOCATOR
+	help
+	  Support the MMP Two-Channel DMA engine.
+	  This engine used for MMP Audio DMA and pxa910 SQU.
+	  It needs sram driver under mach-mmp.
+
+config MOXART_DMA
+	tristate "MOXART DMA support"
+	depends on ARCH_MOXART
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the MOXA ART SoC DMA controller.
+ 
+	  Say Y here if you enabled MMP ADMA, otherwise say N.
+
+config MPC512X_DMA
+	tristate "Freescale MPC512x built-in DMA engine support"
+	depends on PPC_MPC512x || PPC_MPC831x
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Freescale MPC512x built-in DMA engine.
+
+config MV_XOR
+	bool "Marvell XOR engine support"
+	depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	---help---
+	  Enable support for the Marvell XOR engine.
+
+config MV_XOR_V2
+	bool "Marvell XOR engine version 2 support "
+	depends on ARM64
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	select GENERIC_MSI_IRQ_DOMAIN
+	---help---
+	  Enable support for the Marvell version 2 XOR engine.
+
+	  This engine provides acceleration for copy, XOR and RAID6
+	  operations, and is available on Marvell Armada 7K and 8K
+	  platforms.
+
+config MXS_DMA
+	bool "MXS DMA support"
+	depends on ARCH_MXS || ARCH_MXC || COMPILE_TEST
+	select STMP_DEVICE
+	select DMA_ENGINE
+	help
+	  Support the MXS DMA engine. This engine including APBH-DMA
+	  and APBX-DMA is integrated into some Freescale chips.
+
+config MX3_IPU
+	bool "MX3x Image Processing Unit support"
+	depends on ARCH_MXC
+	select DMA_ENGINE
+	default y
+	help
+	  If you plan to use the Image Processing unit in the i.MX3x, say
+	  Y here. If unsure, select Y.
+
+config MX3_IPU_IRQS
+	int "Number of dynamically mapped interrupts for IPU"
+	depends on MX3_IPU
+	range 2 137
+	default 4
+	help
+	  Out of 137 interrupt sources on i.MX31 IPU only very few are used.
+	  To avoid bloating the irq_desc[] array we allocate a sufficient
+	  number of IRQ slots and map them dynamically to specific sources.
+
+config NBPFAXI_DMA
+	tristate "Renesas Type-AXI NBPF DMA support"
+	select DMA_ENGINE
+	depends on ARM || COMPILE_TEST
+	help
+	  Support for "Type-AXI" NBPF DMA IPs from Renesas
+
+config OWL_DMA
+	tristate "Actions Semi Owl SoCs DMA support"
+	depends on ARCH_ACTIONS
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Actions Semi Owl SoCs DMA controller.
+
+config PCH_DMA
+	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
+	depends on PCI && (X86_32 || COMPILE_TEST)
+	select DMA_ENGINE
+	help
+	  Enable support for Intel EG20T PCH DMA engine.
+
+	  This driver also can be used for LAPIS Semiconductor IOH(Input/
+	  Output Hub), ML7213, ML7223 and ML7831.
+	  ML7213 IOH is for IVI(In-Vehicle Infotainment) use, ML7223 IOH is
+	  for MP(Media Phone) use and ML7831 IOH is for general purpose use.
+	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
+	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
+
+config PL330_DMA
+	tristate "DMA API Driver for PL330"
+	select DMA_ENGINE
+	depends on ARM_AMBA
+	help
+	  Select if your platform has one or more PL330 DMACs.
+	  You need to provide platform specific settings via
+	  platform_data for a dma-pl330 device.
+
+config PXA_DMA
+	bool "PXA DMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for PXA. It is also compatible with MMP PDMA
+	  platform. The internal DMA IP of all PXA variants is supported, with
+	  16 to 32 channels for peripheral to memory or memory to memory
+	  transfers.
+
+config SIRF_DMA
+	tristate "CSR SiRFprimaII/SiRFmarco DMA support"
+	depends on ARCH_SIRF
+	select DMA_ENGINE
+	help
+	  Enable support for the CSR SiRFprimaII DMA engine.
+
+config STE_DMA40
+	bool "ST-Ericsson DMA40 support"
+	depends on ARCH_U8500
+	select DMA_ENGINE
+	help
+	  Support for ST-Ericsson DMA40 controller
+
+config ST_FDMA
+	tristate "ST FDMA dmaengine support"
+	depends on ARCH_STI
+	depends on REMOTEPROC
+	select ST_SLIM_REMOTEPROC
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for ST FDMA controller.
+	  It supports 16 independent DMA channels, accepts up to 32 DMA requests
+
+	  Say Y here if you have such a chipset.
+	  If unsure, say N.
+
+config STM32_DMA
+	bool "STMicroelectronics STM32 DMA support"
+	depends on ARCH_STM32 || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the on-chip DMA controller on STMicroelectronics
+	  STM32 MCUs.
+	  If you have a board based on such a MCU and wish to use DMA say Y
+	  here.
+
+config STM32_DMAMUX
+	bool "STMicroelectronics STM32 dma multiplexer support"
+	depends on STM32_DMA || COMPILE_TEST
+	help
+	  Enable support for the on-chip DMA multiplexer on STMicroelectronics
+	  STM32 MCUs.
+	  If you have a board based on such a MCU and wish to use DMAMUX say Y
+	  here.
+
+config STM32_MDMA
+	bool "STMicroelectronics STM32 master dma support"
+	depends on ARCH_STM32 || COMPILE_TEST
+	depends on OF
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the on-chip MDMA controller on STMicroelectronics
+	  STM32 platforms.
+	  If you have a board based on STM32 SoC and wish to use the master DMA
+	  say Y here.
+
+config SPRD_DMA
+	tristate "Spreadtrum DMA support"
+	depends on ARCH_SPRD || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the on-chip DMA controller on Spreadtrum platform.
+
+config S3C24XX_DMAC
+	bool "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
+
+config TXX9_DMAC
+	tristate "Toshiba TXx9 SoC DMA support"
+	depends on MACH_TX49XX || MACH_TX39XX
+	select DMA_ENGINE
+	help
+	  Support the TXx9 SoC internal DMA controller.  This can be
+	  integrated in chips such as the Toshiba TX4927/38/39.
+
+config TEGRA20_APB_DMA
+	bool "NVIDIA Tegra20 APB DMA support"
+	depends on ARCH_TEGRA
+	select DMA_ENGINE
+	help
+	  Support for the NVIDIA Tegra20 APB DMA controller driver. The
+	  DMA controller is having multiple DMA channel which can be
+	  configured for different peripherals like audio, UART, SPI,
+	  I2C etc which is in APB bus.
+	  This DMA controller transfers data from memory to peripheral fifo
+	  or vice versa. It does not support memory to memory data transfer.
+
+config TEGRA210_ADMA
+	tristate "NVIDIA Tegra210 ADMA support"
+	depends on (ARCH_TEGRA_210_SOC || COMPILE_TEST) && PM_CLK
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the NVIDIA Tegra210 ADMA controller driver. The
+	  DMA controller has multiple DMA channels and is used to service
+	  various audio clients in the Tegra210 audio processing engine
+	  (APE). This DMA controller transfers data from memory to
+	  peripheral and vice versa. It does not support memory to
+	  memory data transfer.
+
+config TIMB_DMA
+	tristate "Timberdale FPGA DMA support"
+	depends on MFD_TIMBERDALE || COMPILE_TEST
+	select DMA_ENGINE
+	help
+	  Enable support for the Timberdale FPGA DMA engine.
+
+config XGENE_DMA
+	tristate "APM X-Gene DMA support"
+	depends on ARCH_XGENE || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	help
+	  Enable support for the APM X-Gene SoC DMA engine.
+
+config XILINX_DMA
+	tristate "Xilinx AXI DMAS Engine"
+	depends on (ARCH_ZYNQ || MICROBLAZE || ARM64)
+	select DMA_ENGINE
+	help
+	  Enable support for Xilinx AXI VDMA Soft IP.
+
+	  AXI VDMA engine provides high-bandwidth direct memory access
+	  between memory and AXI4-Stream video type target
+	  peripherals including peripherals which support AXI4-
+	  Stream Video Protocol.  It has two stream interfaces/
+	  channels, Memory Mapped to Stream (MM2S) and Stream to
+	  Memory Mapped (S2MM) for the data transfers.
+	  AXI CDMA engine provides high-bandwidth direct memory access
+	  between a memory-mapped source address and a memory-mapped
+	  destination address.
+	  AXI DMA engine provides high-bandwidth one dimensional direct
+	  memory access between memory and AXI4-Stream target peripherals.
+
+config XILINX_ZYNQMP_DMA
+	tristate "Xilinx ZynqMP DMA Engine"
+	depends on (ARCH_ZYNQ || MICROBLAZE || ARM64)
+	select DMA_ENGINE
+	help
+	  Enable support for Xilinx ZynqMP DMA controller.
+
+config ZX_DMA
+	tristate "ZTE ZX DMA support"
+	depends on ARCH_ZX || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine for ZTE ZX family platform devices.
+
+
+# driver files
+source "drivers/dma/bestcomm/Kconfig"
+
+source "drivers/dma/mediatek/Kconfig"
+
+source "drivers/dma/qcom/Kconfig"
+
+source "drivers/dma/dw/Kconfig"
+
+source "drivers/dma/hsu/Kconfig"
+
+source "drivers/dma/sh/Kconfig"
+
+source "drivers/dma/ti/Kconfig"
+
+# clients
+comment "DMA Clients"
+	depends on DMA_ENGINE
+
+config ASYNC_TX_DMA
+	bool "Async_tx: Offload support for the async_tx api"
+	depends on DMA_ENGINE
+	help
+	  This allows the async_tx api to take advantage of offload engines for
+	  memcpy, memset, xor, and raid6 p+q operations.  If your platform has
+	  a dma engine that can perform raid operations and you have enabled
+	  MD_RAID456 say Y.
+
+	  If unsure, say N.
+
+config DMATEST
+	tristate "DMA Test client"
+	depends on DMA_ENGINE
+	select DMA_ENGINE_RAID
+	help
+	  Simple DMA test client. Say N unless you're debugging a
+	  DMA Device driver.
+
+config DMA_ENGINE_RAID
+	bool
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 0000000..c91702d
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+#dmaengine debug flags
+subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG)  := -DDEBUG
+subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
+
+#core
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
+obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
+obj-$(CONFIG_DMA_OF) += of-dma.o
+
+#dmatest
+obj-$(CONFIG_DMATEST) += dmatest.o
+
+#devices
+obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
+obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
+obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o
+obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
+obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
+obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
+obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
+obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
+obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
+obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac/
+obj-$(CONFIG_DW_DMAC_CORE) += dw/
+obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
+obj-$(CONFIG_FSL_RAID) += fsl_raid.o
+obj-$(CONFIG_HSU_DMA) += hsu/
+obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_INTEL_IDMA64) += idma64.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
+obj-$(CONFIG_K3_DMA) += k3dma.o
+obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
+obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
+obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
+obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
+obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_MV_XOR_V2) += mv_xor_v2.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
+obj-$(CONFIG_MX3_IPU) += ipu/
+obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
+obj-$(CONFIG_OWL_DMA) += owl-dma.o
+obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
+obj-$(CONFIG_PXA_DMA) += pxa_dma.o
+obj-$(CONFIG_RENESAS_DMA) += sh/
+obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
+obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
+obj-$(CONFIG_STM32_DMA) += stm32-dma.o
+obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o
+obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o
+obj-$(CONFIG_SPRD_DMA) += sprd-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
+obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_TEGRA210_ADMA) += tegra210-adma.o
+obj-$(CONFIG_TIMB_DMA) += timb_dma.o
+obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
+obj-$(CONFIG_ZX_DMA) += zx_dma.o
+obj-$(CONFIG_ST_FDMA) += st_fdma.o
+
+obj-y += mediatek/
+obj-y += qcom/
+obj-y += ti/
+obj-y += xilinx/
diff --git a/drivers/dma/TODO b/drivers/dma/TODO
new file mode 100644
index 0000000..b8045cd
--- /dev/null
+++ b/drivers/dma/TODO
@@ -0,0 +1,12 @@
+TODO for slave dma
+
+1. Move remaining drivers to use new slave interface
+2. Remove old slave pointer machansim
+3. Make issue_pending to start the transaction in below drivers
+	- mpc512x_dma
+	- imx-dma
+	- imx-sdma
+	- mxs-dma.c
+	- intel_mid_dma
+4. Check other subsystems for dma drivers and merge/move to dmaengine
+5. Remove dma_slave_config's dma direction.
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
new file mode 100644
index 0000000..4a748c3
--- /dev/null
+++ b/drivers/dma/acpi-dma.c
@@ -0,0 +1,467 @@
+/*
+ * ACPI helpers for DMA request / controller
+ *
+ * Based on of-dma.c
+ *
+ * Copyright (C) 2013, Intel Corporation
+ * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *	    Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+#include <linux/property.h>
+
+static LIST_HEAD(acpi_dma_list);
+static DEFINE_MUTEX(acpi_dma_lock);
+
+/**
+ * acpi_dma_parse_resource_group - match device and parse resource group
+ * @grp:	CSRT resource group
+ * @adev:	ACPI device to match with
+ * @adma:	struct acpi_dma of the given DMA controller
+ *
+ * In order to match a device from DSDT table to the corresponding CSRT device
+ * we use MMIO address and IRQ.
+ *
+ * Return:
+ * 1 on success, 0 when no information is available, or appropriate errno value
+ * on error.
+ */
+static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
+		struct acpi_device *adev, struct acpi_dma *adma)
+{
+	const struct acpi_csrt_shared_info *si;
+	struct list_head resource_list;
+	struct resource_entry *rentry;
+	resource_size_t mem = 0, irq = 0;
+	int ret;
+
+	if (grp->shared_info_length != sizeof(struct acpi_csrt_shared_info))
+		return -ENODEV;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+	if (ret <= 0)
+		return 0;
+
+	list_for_each_entry(rentry, &resource_list, node) {
+		if (resource_type(rentry->res) == IORESOURCE_MEM)
+			mem = rentry->res->start;
+		else if (resource_type(rentry->res) == IORESOURCE_IRQ)
+			irq = rentry->res->start;
+	}
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	/* Consider initial zero values as resource not found */
+	if (mem == 0 && irq == 0)
+		return 0;
+
+	si = (const struct acpi_csrt_shared_info *)&grp[1];
+
+	/* Match device by MMIO and IRQ */
+	if (si->mmio_base_low != lower_32_bits(mem) ||
+	    si->mmio_base_high != upper_32_bits(mem) ||
+	    si->gsi_interrupt != irq)
+		return 0;
+
+	dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
+		(char *)&grp->vendor_id, grp->device_id, grp->revision);
+
+	/* Check if the request line range is available */
+	if (si->base_request_line == 0 && si->num_handshake_signals == 0)
+		return 0;
+
+	adma->base_request_line = si->base_request_line;
+	adma->end_request_line = si->base_request_line +
+				 si->num_handshake_signals - 1;
+
+	dev_dbg(&adev->dev, "request line base: 0x%04x end: 0x%04x\n",
+		adma->base_request_line, adma->end_request_line);
+
+	return 1;
+}
+
+/**
+ * acpi_dma_parse_csrt - parse CSRT to exctract additional DMA resources
+ * @adev:	ACPI device to match with
+ * @adma:	struct acpi_dma of the given DMA controller
+ *
+ * CSRT or Core System Resources Table is a proprietary ACPI table
+ * introduced by Microsoft. This table can contain devices that are not in
+ * the system DSDT table. In particular DMA controllers might be described
+ * here.
+ *
+ * We are using this table to get the request line range of the specific DMA
+ * controller to be used later.
+ */
+static void acpi_dma_parse_csrt(struct acpi_device *adev, struct acpi_dma *adma)
+{
+	struct acpi_csrt_group *grp, *end;
+	struct acpi_table_csrt *csrt;
+	acpi_status status;
+	int ret;
+
+	status = acpi_get_table(ACPI_SIG_CSRT, 0,
+				(struct acpi_table_header **)&csrt);
+	if (ACPI_FAILURE(status)) {
+		if (status != AE_NOT_FOUND)
+			dev_warn(&adev->dev, "failed to get the CSRT table\n");
+		return;
+	}
+
+	grp = (struct acpi_csrt_group *)(csrt + 1);
+	end = (struct acpi_csrt_group *)((void *)csrt + csrt->header.length);
+
+	while (grp < end) {
+		ret = acpi_dma_parse_resource_group(grp, adev, adma);
+		if (ret < 0) {
+			dev_warn(&adev->dev,
+				 "error in parsing resource group\n");
+			return;
+		}
+
+		grp = (struct acpi_csrt_group *)((void *)grp + grp->length);
+	}
+}
+
+/**
+ * acpi_dma_controller_register - Register a DMA controller to ACPI DMA helpers
+ * @dev:		struct device of DMA controller
+ * @acpi_dma_xlate:	translation function which converts a dma specifier
+ *			into a dma_chan structure
+ * @data		pointer to controller specific data to be used by
+ *			translation function
+ *
+ * Allocated memory should be freed with appropriate acpi_dma_controller_free()
+ * call.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	struct acpi_device *adev;
+	struct acpi_dma	*adma;
+
+	if (!dev || !acpi_dma_xlate)
+		return -EINVAL;
+
+	/* Check if the device was enumerated by ACPI */
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return -EINVAL;
+
+	adma = kzalloc(sizeof(*adma), GFP_KERNEL);
+	if (!adma)
+		return -ENOMEM;
+
+	adma->dev = dev;
+	adma->acpi_dma_xlate = acpi_dma_xlate;
+	adma->data = data;
+
+	acpi_dma_parse_csrt(adev, adma);
+
+	/* Now queue acpi_dma controller structure in list */
+	mutex_lock(&acpi_dma_lock);
+	list_add_tail(&adma->dma_controllers, &acpi_dma_list);
+	mutex_unlock(&acpi_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_register);
+
+/**
+ * acpi_dma_controller_free - Remove a DMA controller from ACPI DMA helpers list
+ * @dev:	struct device of DMA controller
+ *
+ * Memory allocated by acpi_dma_controller_register() is freed here.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int acpi_dma_controller_free(struct device *dev)
+{
+	struct acpi_dma *adma;
+
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&acpi_dma_lock);
+
+	list_for_each_entry(adma, &acpi_dma_list, dma_controllers)
+		if (adma->dev == dev) {
+			list_del(&adma->dma_controllers);
+			mutex_unlock(&acpi_dma_lock);
+			kfree(adma);
+			return 0;
+		}
+
+	mutex_unlock(&acpi_dma_lock);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(acpi_dma_controller_free);
+
+static void devm_acpi_dma_release(struct device *dev, void *res)
+{
+	acpi_dma_controller_free(dev);
+}
+
+/**
+ * devm_acpi_dma_controller_register - resource managed acpi_dma_controller_register()
+ * @dev:		device that is registering this DMA controller
+ * @acpi_dma_xlate:	translation function
+ * @data		pointer to controller specific data
+ *
+ * Managed acpi_dma_controller_register(). DMA controller registered by this
+ * function are automatically freed on driver detach. See
+ * acpi_dma_controller_register() for more information.
+ *
+ * Return:
+ * 0 on success or appropriate errno value on error.
+ */
+int devm_acpi_dma_controller_register(struct device *dev,
+		struct dma_chan *(*acpi_dma_xlate)
+		(struct acpi_dma_spec *, struct acpi_dma *),
+		void *data)
+{
+	void *res;
+	int ret;
+
+	res = devres_alloc(devm_acpi_dma_release, 0, GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	ret = acpi_dma_controller_register(dev, acpi_dma_xlate, data);
+	if (ret) {
+		devres_free(res);
+		return ret;
+	}
+	devres_add(dev, res);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_register);
+
+/**
+ * devm_acpi_dma_controller_free - resource managed acpi_dma_controller_free()
+ *
+ * Unregister a DMA controller registered with
+ * devm_acpi_dma_controller_register(). Normally this function will not need to
+ * be called and the resource management code will ensure that the resource is
+ * freed.
+ */
+void devm_acpi_dma_controller_free(struct device *dev)
+{
+	WARN_ON(devres_release(dev, devm_acpi_dma_release, NULL, NULL));
+}
+EXPORT_SYMBOL_GPL(devm_acpi_dma_controller_free);
+
+/**
+ * acpi_dma_update_dma_spec - prepare dma specifier to pass to translation function
+ * @adma:	struct acpi_dma of DMA controller
+ * @dma_spec:	dma specifier to update
+ *
+ * Accordingly to ACPI 5.0 Specification Table 6-170 "Fixed DMA Resource
+ * Descriptor":
+ *	DMA Request Line bits is a platform-relative number uniquely
+ *	identifying the request line assigned. Request line-to-Controller
+ *	mapping is done in a controller-specific OS driver.
+ * That's why we can safely adjust slave_id when the appropriate controller is
+ * found.
+ *
+ * Return:
+ * 0, if no information is avaiable, -1 on mismatch, and 1 otherwise.
+ */
+static int acpi_dma_update_dma_spec(struct acpi_dma *adma,
+		struct acpi_dma_spec *dma_spec)
+{
+	/* Set link to the DMA controller device */
+	dma_spec->dev = adma->dev;
+
+	/* Check if the request line range is available */
+	if (adma->base_request_line == 0 && adma->end_request_line == 0)
+		return 0;
+
+	/* Check if slave_id falls to the range */
+	if (dma_spec->slave_id < adma->base_request_line ||
+	    dma_spec->slave_id > adma->end_request_line)
+		return -1;
+
+	/*
+	 * Here we adjust slave_id. It should be a relative number to the base
+	 * request line.
+	 */
+	dma_spec->slave_id -= adma->base_request_line;
+
+	return 1;
+}
+
+struct acpi_dma_parser_data {
+	struct acpi_dma_spec dma_spec;
+	size_t index;
+	size_t n;
+};
+
+/**
+ * acpi_dma_parse_fixed_dma - Parse FixedDMA ACPI resources to a DMA specifier
+ * @res:	struct acpi_resource to get FixedDMA resources from
+ * @data:	pointer to a helper struct acpi_dma_parser_data
+ */
+static int acpi_dma_parse_fixed_dma(struct acpi_resource *res, void *data)
+{
+	struct acpi_dma_parser_data *pdata = data;
+
+	if (res->type == ACPI_RESOURCE_TYPE_FIXED_DMA) {
+		struct acpi_resource_fixed_dma *dma = &res->data.fixed_dma;
+
+		if (pdata->n++ == pdata->index) {
+			pdata->dma_spec.chan_id = dma->channels;
+			pdata->dma_spec.slave_id = dma->request_lines;
+		}
+	}
+
+	/* Tell the ACPI core to skip this resource */
+	return 1;
+}
+
+/**
+ * acpi_dma_request_slave_chan_by_index - Get the DMA slave channel
+ * @dev:	struct device to get DMA request from
+ * @index:	index of FixedDMA descriptor for @dev
+ *
+ * Return:
+ * Pointer to appropriate dma channel on success or an error pointer.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
+		size_t index)
+{
+	struct acpi_dma_parser_data pdata;
+	struct acpi_dma_spec *dma_spec = &pdata.dma_spec;
+	struct list_head resource_list;
+	struct acpi_device *adev;
+	struct acpi_dma *adma;
+	struct dma_chan *chan = NULL;
+	int found;
+
+	/* Check if the device was enumerated by ACPI */
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return ERR_PTR(-ENODEV);
+
+	memset(&pdata, 0, sizeof(pdata));
+	pdata.index = index;
+
+	/* Initial values for the request line and channel */
+	dma_spec->chan_id = -1;
+	dma_spec->slave_id = -1;
+
+	INIT_LIST_HEAD(&resource_list);
+	acpi_dev_get_resources(adev, &resource_list,
+			acpi_dma_parse_fixed_dma, &pdata);
+	acpi_dev_free_resource_list(&resource_list);
+
+	if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0)
+		return ERR_PTR(-ENODEV);
+
+	mutex_lock(&acpi_dma_lock);
+
+	list_for_each_entry(adma, &acpi_dma_list, dma_controllers) {
+		/*
+		 * We are not going to call translation function if slave_id
+		 * doesn't fall to the request range.
+		 */
+		found = acpi_dma_update_dma_spec(adma, dma_spec);
+		if (found < 0)
+			continue;
+		chan = adma->acpi_dma_xlate(dma_spec, adma);
+		/*
+		 * Try to get a channel only from the DMA controller that
+		 * matches the slave_id. See acpi_dma_update_dma_spec()
+		 * description for the details.
+		 */
+		if (found > 0 || chan)
+			break;
+	}
+
+	mutex_unlock(&acpi_dma_lock);
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index);
+
+/**
+ * acpi_dma_request_slave_chan_by_name - Get the DMA slave channel
+ * @dev:	struct device to get DMA request from
+ * @name:	represents corresponding FixedDMA descriptor for @dev
+ *
+ * In order to support both Device Tree and ACPI in a single driver we
+ * translate the names "tx" and "rx" here based on the most common case where
+ * the first FixedDMA descriptor is TX and second is RX.
+ *
+ * If the device has "dma-names" property the FixedDMA descriptor indices
+ * are retrieved based on those. Otherwise the function falls back using
+ * hardcoded indices.
+ *
+ * Return:
+ * Pointer to appropriate dma channel on success or an error pointer.
+ */
+struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev,
+		const char *name)
+{
+	int index;
+
+	index = device_property_match_string(dev, "dma-names", name);
+	if (index < 0) {
+		if (!strcmp(name, "tx"))
+			index = 0;
+		else if (!strcmp(name, "rx"))
+			index = 1;
+		else
+			return ERR_PTR(-ENODEV);
+	}
+
+	dev_dbg(dev, "Looking for DMA channel \"%s\" at index %d...\n", name, index);
+	return acpi_dma_request_slave_chan_by_index(dev, index);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name);
+
+/**
+ * acpi_dma_simple_xlate - Simple ACPI DMA engine translation helper
+ * @dma_spec: pointer to ACPI DMA specifier
+ * @adma: pointer to ACPI DMA controller data
+ *
+ * A simple translation function for ACPI based devices. Passes &struct
+ * dma_spec to the DMA controller driver provided filter function.
+ *
+ * Return:
+ * Pointer to the channel if found or %NULL otherwise.
+ */
+struct dma_chan *acpi_dma_simple_xlate(struct acpi_dma_spec *dma_spec,
+		struct acpi_dma *adma)
+{
+	struct acpi_dma_filter_info *info = adma->data;
+
+	if (!info || !info->filter_fn)
+		return NULL;
+
+	return dma_request_channel(info->dma_cap, info->filter_fn, dma_spec);
+}
+EXPORT_SYMBOL_GPL(acpi_dma_simple_xlate);
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
new file mode 100644
index 0000000..55f9c62
--- /dev/null
+++ b/drivers/dma/altera-msgdma.c
@@ -0,0 +1,934 @@
+/*
+ * DMA driver for Altera mSGDMA IP core
+ *
+ * Copyright (C) 2017 Stefan Roese <sr@denx.de>
+ *
+ * Based on drivers/dma/xilinx/zynqmp_dma.c, which is:
+ * Copyright (C) 2016 Xilinx, Inc. All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+
+#define MSGDMA_MAX_TRANS_LEN		U32_MAX
+#define MSGDMA_DESC_NUM			1024
+
+/**
+ * struct msgdma_extended_desc - implements an extended descriptor
+ * @read_addr_lo: data buffer source address low bits
+ * @write_addr_lo: data buffer destination address low bits
+ * @len: the number of bytes to transfer per descriptor
+ * @burst_seq_num: bit 31:24 write burst
+ *		   bit 23:16 read burst
+ *		   bit 15:00 sequence number
+ * @stride: bit 31:16 write stride
+ *	    bit 15:00 read stride
+ * @read_addr_hi: data buffer source address high bits
+ * @write_addr_hi: data buffer destination address high bits
+ * @control: characteristics of the transfer
+ */
+struct msgdma_extended_desc {
+	u32 read_addr_lo;
+	u32 write_addr_lo;
+	u32 len;
+	u32 burst_seq_num;
+	u32 stride;
+	u32 read_addr_hi;
+	u32 write_addr_hi;
+	u32 control;
+};
+
+/* mSGDMA descriptor control field bit definitions */
+#define MSGDMA_DESC_CTL_SET_CH(x)	((x) & 0xff)
+#define MSGDMA_DESC_CTL_GEN_SOP		BIT(8)
+#define MSGDMA_DESC_CTL_GEN_EOP		BIT(9)
+#define MSGDMA_DESC_CTL_PARK_READS	BIT(10)
+#define MSGDMA_DESC_CTL_PARK_WRITES	BIT(11)
+#define MSGDMA_DESC_CTL_END_ON_EOP	BIT(12)
+#define MSGDMA_DESC_CTL_END_ON_LEN	BIT(13)
+#define MSGDMA_DESC_CTL_TR_COMP_IRQ	BIT(14)
+#define MSGDMA_DESC_CTL_EARLY_IRQ	BIT(15)
+#define MSGDMA_DESC_CTL_TR_ERR_IRQ	GENMASK(23, 16)
+#define MSGDMA_DESC_CTL_EARLY_DONE	BIT(24)
+
+/*
+ * Writing "1" the "go" bit commits the entire descriptor into the
+ * descriptor FIFO(s)
+ */
+#define MSGDMA_DESC_CTL_GO		BIT(31)
+
+/* Tx buffer control flags */
+#define MSGDMA_DESC_CTL_TX_FIRST	(MSGDMA_DESC_CTL_GEN_SOP |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_MIDDLE	(MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_LAST		(MSGDMA_DESC_CTL_GEN_EOP |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_TX_SINGLE	(MSGDMA_DESC_CTL_GEN_SOP |	\
+					 MSGDMA_DESC_CTL_GEN_EOP |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+#define MSGDMA_DESC_CTL_RX_SINGLE	(MSGDMA_DESC_CTL_END_ON_EOP |	\
+					 MSGDMA_DESC_CTL_END_ON_LEN |	\
+					 MSGDMA_DESC_CTL_TR_COMP_IRQ |	\
+					 MSGDMA_DESC_CTL_EARLY_IRQ |	\
+					 MSGDMA_DESC_CTL_TR_ERR_IRQ |	\
+					 MSGDMA_DESC_CTL_GO)
+
+/* mSGDMA extended descriptor stride definitions */
+#define MSGDMA_DESC_STRIDE_RD		0x00000001
+#define MSGDMA_DESC_STRIDE_WR		0x00010000
+#define MSGDMA_DESC_STRIDE_RW		0x00010001
+
+/* mSGDMA dispatcher control and status register map */
+#define MSGDMA_CSR_STATUS		0x00	/* Read / Clear */
+#define MSGDMA_CSR_CONTROL		0x04	/* Read / Write */
+#define MSGDMA_CSR_RW_FILL_LEVEL	0x08	/* 31:16 - write fill level */
+						/* 15:00 - read fill level */
+#define MSGDMA_CSR_RESP_FILL_LEVEL	0x0c	/* response FIFO fill level */
+#define MSGDMA_CSR_RW_SEQ_NUM		0x10	/* 31:16 - write seq number */
+						/* 15:00 - read seq number */
+
+/* mSGDMA CSR status register bit definitions */
+#define MSGDMA_CSR_STAT_BUSY			BIT(0)
+#define MSGDMA_CSR_STAT_DESC_BUF_EMPTY		BIT(1)
+#define MSGDMA_CSR_STAT_DESC_BUF_FULL		BIT(2)
+#define MSGDMA_CSR_STAT_RESP_BUF_EMPTY		BIT(3)
+#define MSGDMA_CSR_STAT_RESP_BUF_FULL		BIT(4)
+#define MSGDMA_CSR_STAT_STOPPED			BIT(5)
+#define MSGDMA_CSR_STAT_RESETTING		BIT(6)
+#define MSGDMA_CSR_STAT_STOPPED_ON_ERR		BIT(7)
+#define MSGDMA_CSR_STAT_STOPPED_ON_EARLY	BIT(8)
+#define MSGDMA_CSR_STAT_IRQ			BIT(9)
+#define MSGDMA_CSR_STAT_MASK			GENMASK(9, 0)
+#define MSGDMA_CSR_STAT_MASK_WITHOUT_IRQ	GENMASK(8, 0)
+
+#define DESC_EMPTY	(MSGDMA_CSR_STAT_DESC_BUF_EMPTY | \
+			 MSGDMA_CSR_STAT_RESP_BUF_EMPTY)
+
+/* mSGDMA CSR control register bit definitions */
+#define MSGDMA_CSR_CTL_STOP			BIT(0)
+#define MSGDMA_CSR_CTL_RESET			BIT(1)
+#define MSGDMA_CSR_CTL_STOP_ON_ERR		BIT(2)
+#define MSGDMA_CSR_CTL_STOP_ON_EARLY		BIT(3)
+#define MSGDMA_CSR_CTL_GLOBAL_INTR		BIT(4)
+#define MSGDMA_CSR_CTL_STOP_DESCS		BIT(5)
+
+/* mSGDMA CSR fill level bits */
+#define MSGDMA_CSR_WR_FILL_LEVEL_GET(v)		(((v) & 0xffff0000) >> 16)
+#define MSGDMA_CSR_RD_FILL_LEVEL_GET(v)		((v) & 0x0000ffff)
+#define MSGDMA_CSR_RESP_FILL_LEVEL_GET(v)	((v) & 0x0000ffff)
+
+#define MSGDMA_CSR_SEQ_NUM_GET(v)		(((v) & 0xffff0000) >> 16)
+
+/* mSGDMA response register map */
+#define MSGDMA_RESP_BYTES_TRANSFERRED	0x00
+#define MSGDMA_RESP_STATUS		0x04
+
+/* mSGDMA response register bit definitions */
+#define MSGDMA_RESP_EARLY_TERM	BIT(8)
+#define MSGDMA_RESP_ERR_MASK	0xff
+
+/**
+ * struct msgdma_sw_desc - implements a sw descriptor
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @free_list: node of the free SW descriprots list
+ */
+struct msgdma_sw_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct msgdma_extended_desc hw_desc;
+	struct list_head node;
+	struct list_head tx_list;
+};
+
+/**
+ * struct msgdma_device - DMA device structure
+ */
+struct msgdma_device {
+	spinlock_t lock;
+	struct device *dev;
+	struct tasklet_struct irq_tasklet;
+	struct list_head pending_list;
+	struct list_head free_list;
+	struct list_head active_list;
+	struct list_head done_list;
+	u32 desc_free_cnt;
+	bool idle;
+
+	struct dma_device dmadev;
+	struct dma_chan	dmachan;
+	dma_addr_t hw_desq;
+	struct msgdma_sw_desc *sw_desq;
+	unsigned int npendings;
+
+	struct dma_slave_config slave_cfg;
+
+	int irq;
+
+	/* mSGDMA controller */
+	void __iomem *csr;
+
+	/* mSGDMA descriptors */
+	void __iomem *desc;
+
+	/* mSGDMA response */
+	void __iomem *resp;
+};
+
+#define to_mdev(chan)	container_of(chan, struct msgdma_device, dmachan)
+#define tx_to_desc(tx)	container_of(tx, struct msgdma_sw_desc, async_tx)
+
+/**
+ * msgdma_get_descriptor - Get the sw descriptor from the pool
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: The sw descriptor
+ */
+static struct msgdma_sw_desc *msgdma_get_descriptor(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->lock, flags);
+	desc = list_first_entry(&mdev->free_list, struct msgdma_sw_desc, node);
+	list_del(&desc->node);
+	spin_unlock_irqrestore(&mdev->lock, flags);
+
+	INIT_LIST_HEAD(&desc->tx_list);
+
+	return desc;
+}
+
+/**
+ * msgdma_free_descriptor - Issue pending transactions
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_free_descriptor(struct msgdma_device *mdev,
+				   struct msgdma_sw_desc *desc)
+{
+	struct msgdma_sw_desc *child, *next;
+
+	mdev->desc_free_cnt++;
+	list_add_tail(&desc->node, &mdev->free_list);
+	list_for_each_entry_safe(child, next, &desc->tx_list, node) {
+		mdev->desc_free_cnt++;
+		list_move_tail(&child->node, &mdev->free_list);
+	}
+}
+
+/**
+ * msgdma_free_desc_list - Free descriptors list
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @list: List to parse and delete the descriptor
+ */
+static void msgdma_free_desc_list(struct msgdma_device *mdev,
+				  struct list_head *list)
+{
+	struct msgdma_sw_desc *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node)
+		msgdma_free_descriptor(mdev, desc);
+}
+
+/**
+ * msgdma_desc_config - Configure the descriptor
+ * @desc: Hw descriptor pointer
+ * @dst: Destination buffer address
+ * @src: Source buffer address
+ * @len: Transfer length
+ */
+static void msgdma_desc_config(struct msgdma_extended_desc *desc,
+			       dma_addr_t dst, dma_addr_t src, size_t len,
+			       u32 stride)
+{
+	/* Set lower 32bits of src & dst addresses in the descriptor */
+	desc->read_addr_lo = lower_32_bits(src);
+	desc->write_addr_lo = lower_32_bits(dst);
+
+	/* Set upper 32bits of src & dst addresses in the descriptor */
+	desc->read_addr_hi = upper_32_bits(src);
+	desc->write_addr_hi = upper_32_bits(dst);
+
+	desc->len = len;
+	desc->stride = stride;
+	desc->burst_seq_num = 0;	/* 0 will result in max burst length */
+
+	/*
+	 * Don't set interrupt on xfer end yet, this will be done later
+	 * for the "last" descriptor
+	 */
+	desc->control = MSGDMA_DESC_CTL_TR_ERR_IRQ | MSGDMA_DESC_CTL_GO |
+		MSGDMA_DESC_CTL_END_ON_LEN;
+}
+
+/**
+ * msgdma_desc_config_eod - Mark the descriptor as end descriptor
+ * @desc: Hw descriptor pointer
+ */
+static void msgdma_desc_config_eod(struct msgdma_extended_desc *desc)
+{
+	desc->control |= MSGDMA_DESC_CTL_TR_COMP_IRQ;
+}
+
+/**
+ * msgdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor pointer
+ *
+ * Return: cookie value
+ */
+static dma_cookie_t msgdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct msgdma_device *mdev = to_mdev(tx->chan);
+	struct msgdma_sw_desc *new;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	new = tx_to_desc(tx);
+	spin_lock_irqsave(&mdev->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	list_add_tail(&new->node, &mdev->pending_list);
+	spin_unlock_irqrestore(&mdev->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * msgdma_prep_memcpy - prepare descriptors for memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: Destination buffer address
+ * @dma_src: Source buffer address
+ * @len: Transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+		   dma_addr_t dma_src, size_t len, ulong flags)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct msgdma_sw_desc *new, *first = NULL;
+	struct msgdma_extended_desc *desc;
+	size_t copy;
+	u32 desc_cnt;
+	unsigned long irqflags;
+
+	desc_cnt = DIV_ROUND_UP(len, MSGDMA_MAX_TRANS_LEN);
+
+	spin_lock_irqsave(&mdev->lock, irqflags);
+	if (desc_cnt > mdev->desc_free_cnt) {
+		spin_unlock_irqrestore(&mdev->lock, irqflags);
+		dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+		return NULL;
+	}
+	mdev->desc_free_cnt -= desc_cnt;
+	spin_unlock_irqrestore(&mdev->lock, irqflags);
+
+	do {
+		/* Allocate and populate the descriptor */
+		new = msgdma_get_descriptor(mdev);
+
+		copy = min_t(size_t, len, MSGDMA_MAX_TRANS_LEN);
+		desc = &new->hw_desc;
+		msgdma_desc_config(desc, dma_dst, dma_src, copy,
+				   MSGDMA_DESC_STRIDE_RW);
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+		if (!first)
+			first = new;
+		else
+			list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	msgdma_desc_config_eod(desc);
+	async_tx_ack(&first->async_tx);
+	first->async_tx.flags = flags;
+
+	return &first->async_tx;
+}
+
+/**
+ * msgdma_prep_slave_sg - prepare descriptors for a slave sg transaction
+ *
+ * @dchan: DMA channel
+ * @sgl: Destination scatter list
+ * @sg_len: Number of entries in destination scatter list
+ * @dir: DMA transfer direction
+ * @flags: transfer ack flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *
+msgdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		     unsigned int sg_len, enum dma_transfer_direction dir,
+		     unsigned long flags, void *context)
+
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct dma_slave_config *cfg = &mdev->slave_cfg;
+	struct msgdma_sw_desc *new, *first = NULL;
+	void *desc = NULL;
+	size_t len, avail;
+	dma_addr_t dma_dst, dma_src;
+	u32 desc_cnt = 0, i;
+	struct scatterlist *sg;
+	u32 stride;
+	unsigned long irqflags;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		desc_cnt += DIV_ROUND_UP(sg_dma_len(sg), MSGDMA_MAX_TRANS_LEN);
+
+	spin_lock_irqsave(&mdev->lock, irqflags);
+	if (desc_cnt > mdev->desc_free_cnt) {
+		spin_unlock_irqrestore(&mdev->lock, irqflags);
+		dev_dbg(mdev->dev, "mdev %p descs are not available\n", mdev);
+		return NULL;
+	}
+	mdev->desc_free_cnt -= desc_cnt;
+	spin_unlock_irqrestore(&mdev->lock, irqflags);
+
+	avail = sg_dma_len(sgl);
+
+	/* Run until we are out of scatterlist entries */
+	while (true) {
+		/* Allocate and populate the descriptor */
+		new = msgdma_get_descriptor(mdev);
+
+		desc = &new->hw_desc;
+		len = min_t(size_t, avail, MSGDMA_MAX_TRANS_LEN);
+
+		if (dir == DMA_MEM_TO_DEV) {
+			dma_src = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+			dma_dst = cfg->dst_addr;
+			stride = MSGDMA_DESC_STRIDE_RD;
+		} else {
+			dma_src = cfg->src_addr;
+			dma_dst = sg_dma_address(sgl) + sg_dma_len(sgl) - avail;
+			stride = MSGDMA_DESC_STRIDE_WR;
+		}
+		msgdma_desc_config(desc, dma_dst, dma_src, len, stride);
+		avail -= len;
+
+		if (!first)
+			first = new;
+		else
+			list_add_tail(&new->node, &first->tx_list);
+
+		/* Fetch the next scatterlist entry */
+		if (avail == 0) {
+			if (sg_len == 0)
+				break;
+			sgl = sg_next(sgl);
+			if (sgl == NULL)
+				break;
+			sg_len--;
+			avail = sg_dma_len(sgl);
+		}
+	}
+
+	msgdma_desc_config_eod(desc);
+	first->async_tx.flags = flags;
+
+	return &first->async_tx;
+}
+
+static int msgdma_dma_config(struct dma_chan *dchan,
+			     struct dma_slave_config *config)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+
+	memcpy(&mdev->slave_cfg, config, sizeof(*config));
+
+	return 0;
+}
+
+static void msgdma_reset(struct msgdma_device *mdev)
+{
+	u32 val;
+	int ret;
+
+	/* Reset mSGDMA */
+	iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+	iowrite32(MSGDMA_CSR_CTL_RESET, mdev->csr + MSGDMA_CSR_CONTROL);
+
+	ret = readl_poll_timeout(mdev->csr + MSGDMA_CSR_STATUS, val,
+				 (val & MSGDMA_CSR_STAT_RESETTING) == 0,
+				 1, 10000);
+	if (ret)
+		dev_err(mdev->dev, "DMA channel did not reset\n");
+
+	/* Clear all status bits */
+	iowrite32(MSGDMA_CSR_STAT_MASK, mdev->csr + MSGDMA_CSR_STATUS);
+
+	/* Enable the DMA controller including interrupts */
+	iowrite32(MSGDMA_CSR_CTL_STOP_ON_ERR | MSGDMA_CSR_CTL_STOP_ON_EARLY |
+		  MSGDMA_CSR_CTL_GLOBAL_INTR, mdev->csr + MSGDMA_CSR_CONTROL);
+
+	mdev->idle = true;
+};
+
+static void msgdma_copy_one(struct msgdma_device *mdev,
+			    struct msgdma_sw_desc *desc)
+{
+	void __iomem *hw_desc = mdev->desc;
+
+	/*
+	 * Check if the DESC FIFO it not full. If its full, we need to wait
+	 * for at least one entry to become free again
+	 */
+	while (ioread32(mdev->csr + MSGDMA_CSR_STATUS) &
+	       MSGDMA_CSR_STAT_DESC_BUF_FULL)
+		mdelay(1);
+
+	/*
+	 * The descriptor needs to get copied into the descriptor FIFO
+	 * of the DMA controller. The descriptor will get flushed to the
+	 * FIFO, once the last word (control word) is written. Since we
+	 * are not 100% sure that memcpy() writes all word in the "correct"
+	 * oder (address from low to high) on all architectures, we make
+	 * sure this control word is written last by single coding it and
+	 * adding some write-barriers here.
+	 */
+	memcpy((void __force *)hw_desc, &desc->hw_desc,
+	       sizeof(desc->hw_desc) - sizeof(u32));
+
+	/* Write control word last to flush this descriptor into the FIFO */
+	mdev->idle = false;
+	wmb();
+	iowrite32(desc->hw_desc.control, hw_desc +
+		  offsetof(struct msgdma_extended_desc, control));
+	wmb();
+}
+
+/**
+ * msgdma_copy_desc_to_fifo - copy descriptor(s) into controller FIFO
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ * @desc: Transaction descriptor pointer
+ */
+static void msgdma_copy_desc_to_fifo(struct msgdma_device *mdev,
+				     struct msgdma_sw_desc *desc)
+{
+	struct msgdma_sw_desc *sdesc, *next;
+
+	msgdma_copy_one(mdev, desc);
+
+	list_for_each_entry_safe(sdesc, next, &desc->tx_list, node)
+		msgdma_copy_one(mdev, sdesc);
+}
+
+/**
+ * msgdma_start_transfer - Initiate the new transfer
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_start_transfer(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+
+	if (!mdev->idle)
+		return;
+
+	desc = list_first_entry_or_null(&mdev->pending_list,
+					struct msgdma_sw_desc, node);
+	if (!desc)
+		return;
+
+	list_splice_tail_init(&mdev->pending_list, &mdev->active_list);
+	msgdma_copy_desc_to_fifo(mdev, desc);
+}
+
+/**
+ * msgdma_issue_pending - Issue pending transactions
+ * @chan: DMA channel pointer
+ */
+static void msgdma_issue_pending(struct dma_chan *chan)
+{
+	struct msgdma_device *mdev = to_mdev(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->lock, flags);
+	msgdma_start_transfer(mdev);
+	spin_unlock_irqrestore(&mdev->lock, flags);
+}
+
+/**
+ * msgdma_chan_desc_cleanup - Cleanup the completed descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_chan_desc_cleanup(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc, *next;
+
+	list_for_each_entry_safe(desc, next, &mdev->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		list_del(&desc->node);
+
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock(&mdev->lock);
+			callback(callback_param);
+			spin_lock(&mdev->lock);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		msgdma_free_descriptor(mdev, desc);
+	}
+}
+
+/**
+ * msgdma_complete_descriptor - Mark the active descriptor as complete
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_complete_descriptor(struct msgdma_device *mdev)
+{
+	struct msgdma_sw_desc *desc;
+
+	desc = list_first_entry_or_null(&mdev->active_list,
+					struct msgdma_sw_desc, node);
+	if (!desc)
+		return;
+	list_del(&desc->node);
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_descriptors - Free channel descriptors
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_free_descriptors(struct msgdma_device *mdev)
+{
+	msgdma_free_desc_list(mdev, &mdev->active_list);
+	msgdma_free_desc_list(mdev, &mdev->pending_list);
+	msgdma_free_desc_list(mdev, &mdev->done_list);
+}
+
+/**
+ * msgdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel pointer
+ */
+static void msgdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->lock, flags);
+	msgdma_free_descriptors(mdev);
+	spin_unlock_irqrestore(&mdev->lock, flags);
+	kfree(mdev->sw_desq);
+}
+
+/**
+ * msgdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: Number of descriptors on success and failure value on error
+ */
+static int msgdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct msgdma_device *mdev = to_mdev(dchan);
+	struct msgdma_sw_desc *desc;
+	int i;
+
+	mdev->sw_desq = kcalloc(MSGDMA_DESC_NUM, sizeof(*desc), GFP_NOWAIT);
+	if (!mdev->sw_desq)
+		return -ENOMEM;
+
+	mdev->idle = true;
+	mdev->desc_free_cnt = MSGDMA_DESC_NUM;
+
+	INIT_LIST_HEAD(&mdev->free_list);
+
+	for (i = 0; i < MSGDMA_DESC_NUM; i++) {
+		desc = mdev->sw_desq + i;
+		dma_async_tx_descriptor_init(&desc->async_tx, &mdev->dmachan);
+		desc->async_tx.tx_submit = msgdma_tx_submit;
+		list_add_tail(&desc->node, &mdev->free_list);
+	}
+
+	return MSGDMA_DESC_NUM;
+}
+
+/**
+ * msgdma_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Altera sSGDMA channel structure
+ */
+static void msgdma_tasklet(unsigned long data)
+{
+	struct msgdma_device *mdev = (struct msgdma_device *)data;
+	u32 count;
+	u32 __maybe_unused size;
+	u32 __maybe_unused status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdev->lock, flags);
+
+	/* Read number of responses that are available */
+	count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
+	dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
+		__func__, __LINE__, count);
+
+	while (count--) {
+		/*
+		 * Read both longwords to purge this response from the FIFO
+		 * On Avalon-MM implementations, size and status do not
+		 * have any real values, like transferred bytes or error
+		 * bits. So we need to just drop these values.
+		 */
+		size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED);
+		status = ioread32(mdev->resp + MSGDMA_RESP_STATUS);
+
+		msgdma_complete_descriptor(mdev);
+		msgdma_chan_desc_cleanup(mdev);
+	}
+
+	spin_unlock_irqrestore(&mdev->lock, flags);
+}
+
+/**
+ * msgdma_irq_handler - Altera mSGDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Altera mSGDMA device structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t msgdma_irq_handler(int irq, void *data)
+{
+	struct msgdma_device *mdev = data;
+	u32 status;
+
+	status = ioread32(mdev->csr + MSGDMA_CSR_STATUS);
+	if ((status & MSGDMA_CSR_STAT_BUSY) == 0) {
+		/* Start next transfer if the DMA controller is idle */
+		spin_lock(&mdev->lock);
+		mdev->idle = true;
+		msgdma_start_transfer(mdev);
+		spin_unlock(&mdev->lock);
+	}
+
+	tasklet_schedule(&mdev->irq_tasklet);
+
+	/* Clear interrupt in mSGDMA controller */
+	iowrite32(MSGDMA_CSR_STAT_IRQ, mdev->csr + MSGDMA_CSR_STATUS);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * msgdma_chan_remove - Channel remove function
+ * @mdev: Pointer to the Altera mSGDMA device structure
+ */
+static void msgdma_dev_remove(struct msgdma_device *mdev)
+{
+	if (!mdev)
+		return;
+
+	devm_free_irq(mdev->dev, mdev->irq, mdev);
+	tasklet_kill(&mdev->irq_tasklet);
+	list_del(&mdev->dmachan.device_node);
+}
+
+static int request_and_map(struct platform_device *pdev, const char *name,
+			   struct resource **res, void __iomem **ptr)
+{
+	struct resource *region;
+	struct device *device = &pdev->dev;
+
+	*res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	if (*res == NULL) {
+		dev_err(device, "resource %s not defined\n", name);
+		return -ENODEV;
+	}
+
+	region = devm_request_mem_region(device, (*res)->start,
+					 resource_size(*res), dev_name(device));
+	if (region == NULL) {
+		dev_err(device, "unable to request %s\n", name);
+		return -EBUSY;
+	}
+
+	*ptr = devm_ioremap_nocache(device, region->start,
+				    resource_size(region));
+	if (*ptr == NULL) {
+		dev_err(device, "ioremap_nocache of %s failed!", name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * msgdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int msgdma_probe(struct platform_device *pdev)
+{
+	struct msgdma_device *mdev;
+	struct dma_device *dma_dev;
+	struct resource *dma_res;
+	int ret;
+
+	mdev = devm_kzalloc(&pdev->dev, sizeof(*mdev), GFP_NOWAIT);
+	if (!mdev)
+		return -ENOMEM;
+
+	mdev->dev = &pdev->dev;
+
+	/* Map CSR space */
+	ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr);
+	if (ret)
+		return ret;
+
+	/* Map (extended) descriptor space */
+	ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc);
+	if (ret)
+		return ret;
+
+	/* Map response space */
+	ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, mdev);
+
+	/* Get interrupt nr from platform data */
+	mdev->irq = platform_get_irq(pdev, 0);
+	if (mdev->irq < 0)
+		return -ENXIO;
+
+	ret = devm_request_irq(&pdev->dev, mdev->irq, msgdma_irq_handler,
+			       0, dev_name(&pdev->dev), mdev);
+	if (ret)
+		return ret;
+
+	tasklet_init(&mdev->irq_tasklet, msgdma_tasklet, (unsigned long)mdev);
+
+	dma_cookie_init(&mdev->dmachan);
+
+	spin_lock_init(&mdev->lock);
+
+	INIT_LIST_HEAD(&mdev->active_list);
+	INIT_LIST_HEAD(&mdev->pending_list);
+	INIT_LIST_HEAD(&mdev->done_list);
+	INIT_LIST_HEAD(&mdev->free_list);
+
+	dma_dev = &mdev->dmadev;
+
+	/* Set DMA capabilities */
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM) |
+		BIT(DMA_MEM_TO_MEM);
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	/* Init DMA link list */
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Set base routines */
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_issue_pending = msgdma_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	dma_dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
+	dma_dev->device_prep_dma_memcpy = msgdma_prep_memcpy;
+	dma_dev->device_prep_slave_sg = msgdma_prep_slave_sg;
+	dma_dev->device_config = msgdma_dma_config;
+
+	dma_dev->device_alloc_chan_resources = msgdma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = msgdma_free_chan_resources;
+
+	mdev->dmachan.device = dma_dev;
+	list_add_tail(&mdev->dmachan.device_node, &dma_dev->channels);
+
+	/* Set DMA mask to 64 bits */
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret)
+			goto fail;
+	}
+
+	msgdma_reset(mdev);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto fail;
+
+	dev_notice(&pdev->dev, "Altera mSGDMA driver probe success\n");
+
+	return 0;
+
+fail:
+	msgdma_dev_remove(mdev);
+
+	return ret;
+}
+
+/**
+ * msgdma_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int msgdma_remove(struct platform_device *pdev)
+{
+	struct msgdma_device *mdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&mdev->dmadev);
+	msgdma_dev_remove(mdev);
+
+	dev_notice(&pdev->dev, "Altera mSGDMA driver removed\n");
+
+	return 0;
+}
+
+static struct platform_driver msgdma_driver = {
+	.driver = {
+		.name = "altera-msgdma",
+	},
+	.probe = msgdma_probe,
+	.remove = msgdma_remove,
+};
+
+module_platform_driver(msgdma_driver);
+
+MODULE_ALIAS("platform:altera-msgdma");
+MODULE_DESCRIPTION("Altera mSGDMA driver");
+MODULE_AUTHOR("Stefan Roese <sr@denx.de>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
new file mode 100644
index 0000000..97483df
--- /dev/null
+++ b/drivers/dma/amba-pl08x.c
@@ -0,0 +1,3097 @@
+/*
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ * Copyirght (c) 2017 Linaro Ltd.
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is in this distribution in the file
+ * called COPYING.
+ *
+ * Documentation: ARM DDI 0196G == PL080
+ * Documentation: ARM DDI 0218E == PL081
+ * Documentation: S3C6410 User's Manual == PL080S
+ *
+ * PL080 & PL081 both have 16 sets of DMA signals that can be routed to any
+ * channel.
+ *
+ * The PL080 has 8 channels available for simultaneous use, and the PL081
+ * has only two channels. So on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * The PL080 has a dual bus master, PL081 has a single master.
+ *
+ * PL080S is a version modified by Samsung and used in S3C64xx SoCs.
+ * It differs in following aspects:
+ * - CH_CONFIG register at different offset,
+ * - separate CH_CONTROL2 register for transfer size,
+ * - bigger maximum transfer size,
+ * - 8-word aligned LLI, instead of 4-word, due to extra CCTL2 word,
+ * - no support for peripheral flow control.
+ *
+ * Memory to peripheral transfer may be visualized as
+ *	Get data from memory to DMAC
+ *	Until no data left
+ *		On burst request from peripheral
+ *			Destination burst from DMAC to peripheral
+ *			Clear burst request
+ *	Raise terminal count interrupt
+ *
+ * For peripherals with a FIFO:
+ * Source      burst size == half the depth of the peripheral FIFO
+ * Destination burst size == the depth of the peripheral FIFO
+ *
+ * (Bursts are irrelevant for mem to mem transfers - there are no burst
+ * signals, the DMA controller will simply facilitate its AHB master.)
+ *
+ * ASSUMES default (little) endianness for DMA transfers
+ *
+ * The PL08x has two flow control settings:
+ *  - DMAC flow control: the transfer size defines the number of transfers
+ *    which occur for the current LLI entry, and the DMAC raises TC at the
+ *    end of every LLI entry.  Observed behaviour shows the DMAC listening
+ *    to both the BREQ and SREQ signals (contrary to documented),
+ *    transferring data if either is active.  The LBREQ and LSREQ signals
+ *    are ignored.
+ *
+ *  - Peripheral flow control: the transfer size is ignored (and should be
+ *    zero).  The data is transferred from the current LLI entry, until
+ *    after the final transfer signalled by LBREQ or LSREQ.  The DMAC
+ *    will then move to the next LLI entry. Unsupported by PL080S.
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/pl08x.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/amba/pl080.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DRIVER_NAME	"pl08xdmac"
+
+#define PL80X_DMA_BUSWIDTHS \
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+static struct amba_driver pl08x_amba_driver;
+struct pl08x_driver_data;
+
+/**
+ * struct vendor_data - vendor-specific config parameters for PL08x derivatives
+ * @config_offset: offset to the configuration register
+ * @channels: the number of channels available in this variant
+ * @signals: the number of request signals available from the hardware
+ * @dualmaster: whether this version supports dual AHB masters or not.
+ * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the
+ *	channels have Nomadik security extension bits that need to be checked
+ *	for permission before use and some registers are missing
+ * @pl080s: whether this variant is a Samsung PL080S, which has separate
+ *	register and LLI word for transfer size.
+ * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020
+ * @max_transfer_size: the maximum single element transfer size for this
+ *	PL08x variant.
+ */
+struct vendor_data {
+	u8 config_offset;
+	u8 channels;
+	u8 signals;
+	bool dualmaster;
+	bool nomadik;
+	bool pl080s;
+	bool ftdmac020;
+	u32 max_transfer_size;
+};
+
+/**
+ * struct pl08x_bus_data - information of source or destination
+ * busses for a transfer
+ * @addr: current address
+ * @maxwidth: the maximum width of a transfer on this bus
+ * @buswidth: the width of this bus in bytes: 1, 2 or 4
+ */
+struct pl08x_bus_data {
+	dma_addr_t addr;
+	u8 maxwidth;
+	u8 buswidth;
+};
+
+#define IS_BUS_ALIGNED(bus) IS_ALIGNED((bus)->addr, (bus)->buswidth)
+
+/**
+ * struct pl08x_phy_chan - holder for the physical channels
+ * @id: physical index to this channel
+ * @base: memory base address for this physical channel
+ * @reg_config: configuration address for this physical channel
+ * @reg_control: control address for this physical channel
+ * @reg_src: transfer source address register
+ * @reg_dst: transfer destination address register
+ * @reg_lli: transfer LLI address register
+ * @reg_busy: if the variant has a special per-channel busy register,
+ * this contains a pointer to it
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: the virtual channel currently being served by this physical
+ * channel
+ * @locked: channel unavailable for the system, e.g. dedicated to secure
+ * world
+ * @ftdmac020: channel is on a FTDMAC020
+ * @pl080s: channel is on a PL08s
+ */
+struct pl08x_phy_chan {
+	unsigned int id;
+	void __iomem *base;
+	void __iomem *reg_config;
+	void __iomem *reg_control;
+	void __iomem *reg_src;
+	void __iomem *reg_dst;
+	void __iomem *reg_lli;
+	void __iomem *reg_busy;
+	spinlock_t lock;
+	struct pl08x_dma_chan *serving;
+	bool locked;
+	bool ftdmac020;
+	bool pl080s;
+};
+
+/**
+ * struct pl08x_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct pl08x_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/**
+ * struct pl08x_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @llis_bus: DMA memory address (physical) start for the LLIs
+ * @llis_va: virtual memory address start for the LLIs
+ * @cctl: control reg values for current txd
+ * @ccfg: config reg values for current txd
+ * @done: this marks completed descriptors, which should not have their
+ *   mux released.
+ * @cyclic: indicate cyclic transfers
+ */
+struct pl08x_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	dma_addr_t llis_bus;
+	u32 *llis_va;
+	/* Default cctl value for LLIs */
+	u32 cctl;
+	/*
+	 * Settings to be put into the physical channel when we
+	 * trigger this txd.  Other registers are in llis_va[0].
+	 */
+	u32 ccfg;
+	bool done;
+	bool cyclic;
+};
+
+/**
+ * enum pl08x_dma_chan_state - holds the PL08x specific virtual channel
+ * states
+ * @PL08X_CHAN_IDLE: the channel is idle
+ * @PL08X_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @PL08X_CHAN_PAUSED: the channel has allocated a physical transport
+ * channel, but the transfer is currently paused
+ * @PL08X_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum pl08x_dma_chan_state {
+	PL08X_CHAN_IDLE,
+	PL08X_CHAN_RUNNING,
+	PL08X_CHAN_PAUSED,
+	PL08X_CHAN_WAITING,
+};
+
+/**
+ * struct pl08x_dma_chan - this structure wraps a DMA ENGINE channel
+ * @vc: wrappped virtual channel
+ * @phychan: the physical channel utilized by this channel, if there is one
+ * @name: name of channel
+ * @cd: channel platform data
+ * @cfg: slave configuration
+ * @at: active transaction on this channel
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, paused, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ * @signal: the physical DMA request signal which this channel is using
+ * @mux_use: count of descriptors using this DMA request signal setting
+ */
+struct pl08x_dma_chan {
+	struct virt_dma_chan vc;
+	struct pl08x_phy_chan *phychan;
+	const char *name;
+	struct pl08x_channel_data *cd;
+	struct dma_slave_config cfg;
+	struct pl08x_txd *at;
+	struct pl08x_driver_data *host;
+	enum pl08x_dma_chan_state state;
+	bool slave;
+	int signal;
+	unsigned mux_use;
+};
+
+/**
+ * struct pl08x_driver_data - the local state holder for the PL08x
+ * @slave: optional slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @has_slave: the PL08x has a slave engine (routed signals)
+ * @base: virtual memory base (remapped) for the PL08x
+ * @adev: the corresponding AMBA (PrimeCell) bus entry
+ * @vd: vendor data for this PL08x variant
+ * @pd: platform data passed in from the platform/machine
+ * @phy_chans: array of data for the physical channels
+ * @pool: a pool for the LLI descriptors
+ * @lli_buses: bitmask to or in to LLI pointer selecting AHB port for LLI
+ * fetches
+ * @mem_buses: set to indicate memory transfers on AHB2.
+ * @lli_words: how many words are used in each LLI item for this variant
+ */
+struct pl08x_driver_data {
+	struct dma_device slave;
+	struct dma_device memcpy;
+	bool has_slave;
+	void __iomem *base;
+	struct amba_device *adev;
+	const struct vendor_data *vd;
+	struct pl08x_platform_data *pd;
+	struct pl08x_phy_chan *phy_chans;
+	struct dma_pool *pool;
+	u8 lli_buses;
+	u8 mem_buses;
+	u8 lli_words;
+};
+
+/*
+ * PL08X specific defines
+ */
+
+/* The order of words in an LLI. */
+#define PL080_LLI_SRC		0
+#define PL080_LLI_DST		1
+#define PL080_LLI_LLI		2
+#define PL080_LLI_CCTL		3
+#define PL080S_LLI_CCTL2	4
+
+/* Total words in an LLI. */
+#define PL080_LLI_WORDS		4
+#define PL080S_LLI_WORDS	8
+
+/*
+ * Number of LLIs in each LLI buffer allocated for one transfer
+ * (maximum times we call dma_pool_alloc on this pool without freeing)
+ */
+#define MAX_NUM_TSFR_LLIS	512
+#define PL08X_ALIGN		8
+
+static inline struct pl08x_dma_chan *to_pl08x_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pl08x_dma_chan, vc.chan);
+}
+
+static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct pl08x_txd, vd.tx);
+}
+
+/*
+ * Mux handling.
+ *
+ * This gives us the DMA request input to the PL08x primecell which the
+ * peripheral described by the channel data will be routed to, possibly
+ * via a board/SoC specific external MUX.  One important point to note
+ * here is that this does not depend on the physical channel.
+ */
+static int pl08x_request_mux(struct pl08x_dma_chan *plchan)
+{
+	const struct pl08x_platform_data *pd = plchan->host->pd;
+	int ret;
+
+	if (plchan->mux_use++ == 0 && pd->get_xfer_signal) {
+		ret = pd->get_xfer_signal(plchan->cd);
+		if (ret < 0) {
+			plchan->mux_use = 0;
+			return ret;
+		}
+
+		plchan->signal = ret;
+	}
+	return 0;
+}
+
+static void pl08x_release_mux(struct pl08x_dma_chan *plchan)
+{
+	const struct pl08x_platform_data *pd = plchan->host->pd;
+
+	if (plchan->signal >= 0) {
+		WARN_ON(plchan->mux_use == 0);
+
+		if (--plchan->mux_use == 0 && pd->put_xfer_signal) {
+			pd->put_xfer_signal(plchan->cd, plchan->signal);
+			plchan->signal = -1;
+		}
+	}
+}
+
+/*
+ * Physical channel handling
+ */
+
+/* Whether a certain channel is busy or not */
+static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
+{
+	unsigned int val;
+
+	/* If we have a special busy register, take a shortcut */
+	if (ch->reg_busy) {
+		val = readl(ch->reg_busy);
+		return !!(val & BIT(ch->id));
+	}
+	val = readl(ch->reg_config);
+	return val & PL080_CONFIG_ACTIVE;
+}
+
+/*
+ * pl08x_write_lli() - Write an LLI into the DMA controller.
+ *
+ * The PL08x derivatives support linked lists, but the first item of the
+ * list containing the source, destination, control word and next LLI is
+ * ignored. Instead the driver has to write those values directly into the
+ * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE
+ * register need to be set up for the first transfer.
+ */
+static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
+		struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
+{
+	if (pl08x->vd->pl080s)
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, cctl2=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL],
+			lli[PL080S_LLI_CCTL2], ccfg);
+	else
+		dev_vdbg(&pl08x->adev->dev,
+			"WRITE channel %d: csrc=0x%08x, cdst=0x%08x, "
+			"clli=0x%08x, cctl=0x%08x, ccfg=0x%08x\n",
+			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
+			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
+
+	writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src);
+	writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst);
+	writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli);
+
+	/*
+	 * The FTMAC020 has a different layout in the CCTL word of the LLI
+	 * and the CCTL register which is split in CSR and SIZE registers.
+	 * Convert the LLI item CCTL into the proper values to write into
+	 * the CSR and SIZE registers.
+	 */
+	if (phychan->ftdmac020) {
+		u32 llictl = lli[PL080_LLI_CCTL];
+		u32 val = 0;
+
+		/* Write the transfer size (12 bits) to the size register */
+		writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK,
+			       phychan->base + FTDMAC020_CH_SIZE);
+		/*
+		 * Then write the control bits 28..16 to the control register
+		 * by shuffleing the bits around to where they are in the
+		 * main register. The mapping is as follows:
+		 * Bit 28: TC_MSK - mask on all except last LLI
+		 * Bit 27..25: SRC_WIDTH
+		 * Bit 24..22: DST_WIDTH
+		 * Bit 21..20: SRCAD_CTRL
+		 * Bit 19..17: DSTAD_CTRL
+		 * Bit 17: SRC_SEL
+		 * Bit 16: DST_SEL
+		 */
+		if (llictl & FTDMAC020_LLI_TC_MSK)
+			val |= FTDMAC020_CH_CSR_TC_MSK;
+		val |= ((llictl  & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+			(FTDMAC020_LLI_SRC_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+			(FTDMAC020_LLI_DST_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_DST_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_SRCAD_CTL_MSK) >>
+			(FTDMAC020_LLI_SRCAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DSTAD_CTL_MSK) >>
+			(FTDMAC020_LLI_DSTAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT));
+		if (llictl & FTDMAC020_LLI_SRC_SEL)
+			val |= FTDMAC020_CH_CSR_SRC_SEL;
+		if (llictl & FTDMAC020_LLI_DST_SEL)
+			val |= FTDMAC020_CH_CSR_DST_SEL;
+
+		/*
+		 * Set up the bits that exist in the CSR but are not
+		 * part the LLI, i.e. only gets written to the control
+		 * register right here.
+		 *
+		 * FIXME: do not just handle memcpy, also handle slave DMA.
+		 */
+		switch (pl08x->pd->memcpy_burst_size) {
+		default:
+		case PL08X_BURST_SZ_1:
+			val |= PL080_BSIZE_1 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_4:
+			val |= PL080_BSIZE_4 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_8:
+			val |= PL080_BSIZE_8 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_16:
+			val |= PL080_BSIZE_16 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_32:
+			val |= PL080_BSIZE_32 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_64:
+			val |= PL080_BSIZE_64 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_128:
+			val |= PL080_BSIZE_128 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_256:
+			val |= PL080_BSIZE_256 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		}
+
+		/* Protection flags */
+		if (pl08x->pd->memcpy_prot_buff)
+			val |= FTDMAC020_CH_CSR_PROT2;
+		if (pl08x->pd->memcpy_prot_cache)
+			val |= FTDMAC020_CH_CSR_PROT3;
+		/* We are the kernel, so we are in privileged mode */
+		val |= FTDMAC020_CH_CSR_PROT1;
+
+		writel_relaxed(val, phychan->reg_control);
+	} else {
+		/* Bits are just identical */
+		writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control);
+	}
+
+	/* Second control word on the PL080s */
+	if (pl08x->vd->pl080s)
+		writel_relaxed(lli[PL080S_LLI_CCTL2],
+				phychan->base + PL080S_CH_CONTROL2);
+
+	writel(ccfg, phychan->reg_config);
+}
+
+/*
+ * Set the initial DMA register values i.e. those for the first LLI
+ * The next LLI pointer and the configuration interrupt bit have
+ * been set when the LLIs were constructed.  Poke them into the hardware
+ * and start the transfer.
+ */
+static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *phychan = plchan->phychan;
+	struct virt_dma_desc *vd = vchan_next_desc(&plchan->vc);
+	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+	u32 val;
+
+	list_del(&txd->vd.node);
+
+	plchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (pl08x_phy_channel_busy(phychan))
+		cpu_relax();
+
+	pl08x_write_lli(pl08x, phychan, &txd->llis_va[0], txd->ccfg);
+
+	/* Enable the DMA channel */
+	/* Do not access config register until channel shows as disabled */
+	while (readl(pl08x->base + PL080_EN_CHAN) & BIT(phychan->id))
+		cpu_relax();
+
+	/* Do not access config register until channel shows as inactive */
+	if (phychan->ftdmac020) {
+		val = readl(phychan->reg_config);
+		while (val & FTDMAC020_CH_CFG_BUSY)
+			val = readl(phychan->reg_config);
+
+		val = readl(phychan->reg_control);
+		while (val & FTDMAC020_CH_CSR_EN)
+			val = readl(phychan->reg_control);
+
+		writel(val | FTDMAC020_CH_CSR_EN,
+		       phychan->reg_control);
+	} else {
+		val = readl(phychan->reg_config);
+		while ((val & PL080_CONFIG_ACTIVE) ||
+		       (val & PL080_CONFIG_ENABLE))
+			val = readl(phychan->reg_config);
+
+		writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+	}
+}
+
+/*
+ * Pause the channel by setting the HALT bit.
+ *
+ * For M->P transfers, pause the DMAC first and then stop the peripheral -
+ * the FIFO can only drain if the peripheral is still requesting data.
+ * (note: this can still timeout if the DMAC FIFO never drains of data.)
+ *
+ * For P->M transfers, disable the peripheral first to stop it filling
+ * the DMAC FIFO, and then pause the DMAC.
+ */
+static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+	int timeout;
+
+	if (ch->ftdmac020) {
+		/* Use the enable bit on the FTDMAC020 */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
+	/* Set the HALT bit and wait for the FIFO to drain */
+	val = readl(ch->reg_config);
+	val |= PL080_CONFIG_HALT;
+	writel(val, ch->reg_config);
+
+	/* Wait for channel inactive */
+	for (timeout = 1000; timeout; timeout--) {
+		if (!pl08x_phy_channel_busy(ch))
+			break;
+		udelay(1);
+	}
+	if (pl08x_phy_channel_busy(ch))
+		pr_err("pl08x: channel%u timeout waiting for pause\n", ch->id);
+}
+
+static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* Use the enable bit on the FTDMAC020 */
+	if (ch->ftdmac020) {
+		val = readl(ch->reg_control);
+		val |= FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
+	/* Clear the HALT bit */
+	val = readl(ch->reg_config);
+	val &= ~PL080_CONFIG_HALT;
+	writel(val, ch->reg_config);
+}
+
+/*
+ * pl08x_terminate_phy_chan() stops the channel, clears the FIFO and
+ * clears any pending interrupt status.  This should not be used for
+ * an on-going transfer, but as a method of shutting down a channel
+ * (eg, when it's no longer used) or terminating a transfer.
+ */
+static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
+	struct pl08x_phy_chan *ch)
+{
+	u32 val;
+
+	/* The layout for the FTDMAC020 is different */
+	if (ch->ftdmac020) {
+		/* Disable all interrupts */
+		val = readl(ch->reg_config);
+		val |= (FTDMAC020_CH_CFG_INT_ABT_MASK |
+			FTDMAC020_CH_CFG_INT_ERR_MASK |
+			FTDMAC020_CH_CFG_INT_TC_MASK);
+		writel(val, ch->reg_config);
+
+		/* Abort and disable channel */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		val |= FTDMAC020_CH_CSR_ABT;
+		writel(val, ch->reg_control);
+
+		/* Clear ABT and ERR interrupt flags */
+		writel(BIT(ch->id) | BIT(ch->id + 16),
+		       pl08x->base + PL080_ERR_CLEAR);
+		writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
+
+		return;
+	}
+
+	val = readl(ch->reg_config);
+	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
+		 PL080_CONFIG_TC_IRQ_MASK);
+	writel(val, ch->reg_config);
+
+	writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
+	writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
+}
+
+static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch)
+{
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		bytes = readl(ch->base + FTDMAC020_CH_SIZE);
+
+		val = readl(ch->reg_control);
+		val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = readl(ch->base + PL080S_CH_CONTROL2);
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+		val = readl(ch->reg_control);
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = readl(ch->reg_control);
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
+
+	switch (val) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va)
+{
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= FTDMAC020_LLI_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = llis_va[PL080S_LLI_CCTL2];
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
+
+	switch (val) {
+	case PL080_WIDTH_8BIT:
+		break;
+	case PL080_WIDTH_16BIT:
+		bytes *= 2;
+		break;
+	case PL080_WIDTH_32BIT:
+		bytes *= 4;
+		break;
+	}
+	return bytes;
+}
+
+/* The channel should be paused when calling this */
+static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	const u32 *llis_va, *llis_va_limit;
+	struct pl08x_phy_chan *ch;
+	dma_addr_t llis_bus;
+	struct pl08x_txd *txd;
+	u32 llis_max_words;
+	size_t bytes;
+	u32 clli;
+
+	ch = plchan->phychan;
+	txd = plchan->at;
+
+	if (!ch || !txd)
+		return 0;
+
+	/*
+	 * Follow the LLIs to get the number of remaining
+	 * bytes in the currently active transaction.
+	 */
+	clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2;
+
+	/* First get the remaining bytes in the active transfer */
+	bytes = get_bytes_in_phy_channel(ch);
+
+	if (!clli)
+		return bytes;
+
+	llis_va = txd->llis_va;
+	llis_bus = txd->llis_bus;
+
+	llis_max_words = pl08x->lli_words * MAX_NUM_TSFR_LLIS;
+	BUG_ON(clli < llis_bus || clli >= llis_bus +
+						sizeof(u32) * llis_max_words);
+
+	/*
+	 * Locate the next LLI - as this is an array,
+	 * it's simple maths to find.
+	 */
+	llis_va += (clli - llis_bus) / sizeof(u32);
+
+	llis_va_limit = llis_va + llis_max_words;
+
+	for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
+		bytes += get_bytes_in_lli(ch, llis_va);
+
+		/*
+		 * A LLI pointer going backward terminates the LLI list
+		 */
+		if (llis_va[PL080_LLI_LLI] <= clli)
+			break;
+	}
+
+	return bytes;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static struct pl08x_phy_chan *
+pl08x_get_phy_channel(struct pl08x_driver_data *pl08x,
+		      struct pl08x_dma_chan *virt_chan)
+{
+	struct pl08x_phy_chan *ch = NULL;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+
+		if (!ch->locked && !ch->serving) {
+			ch->serving = virt_chan;
+			spin_unlock_irqrestore(&ch->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	if (i == pl08x->vd->channels) {
+		/* No physical channel available, cope with it */
+		return NULL;
+	}
+
+	return ch;
+}
+
+/* Mark the physical channel as free.  Note, this write is atomic. */
+static inline void pl08x_put_phy_channel(struct pl08x_driver_data *pl08x,
+					 struct pl08x_phy_chan *ch)
+{
+	ch->serving = NULL;
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void pl08x_phy_alloc_and_start(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_phy_chan *ch;
+
+	ch = pl08x_get_phy_channel(pl08x, plchan);
+	if (!ch) {
+		dev_dbg(&pl08x->adev->dev, "no physical channel available for xfer on %s\n", plchan->name);
+		plchan->state = PL08X_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&pl08x->adev->dev, "allocated physical channel %d for xfer on %s\n",
+		ch->id, plchan->name);
+
+	plchan->phychan = ch;
+	plchan->state = PL08X_CHAN_RUNNING;
+	pl08x_start_next_txd(plchan);
+}
+
+static void pl08x_phy_reassign_start(struct pl08x_phy_chan *ch,
+	struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	dev_dbg(&pl08x->adev->dev, "reassigned physical channel %d for xfer on %s\n",
+		ch->id, plchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	ch->serving = plchan;
+	plchan->phychan = ch;
+	plchan->state = PL08X_CHAN_RUNNING;
+	pl08x_start_next_txd(plchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_dma_chan *p, *next;
+
+ retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &pl08x->memcpy.channels, vc.chan.device_node)
+		if (p->state == PL08X_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next && pl08x->has_slave) {
+		list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node)
+			if (p->state == PL08X_CHAN_WAITING) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	pl08x_terminate_phy_chan(pl08x, plchan->phychan);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == PL08X_CHAN_WAITING;
+		if (success)
+			pl08x_phy_reassign_start(plchan->phychan, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		pl08x_put_phy_channel(pl08x, plchan->phychan);
+	}
+
+	plchan->phychan = NULL;
+	plchan->state = PL08X_CHAN_IDLE;
+}
+
+/*
+ * LLI handling
+ */
+
+static inline unsigned int
+pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x,
+			u32 cctl,
+			bool source)
+{
+	u32 val;
+
+	if (pl08x->vd->ftdmac020) {
+		if (source)
+			val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+		else
+			val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+	} else {
+		if (source)
+			val = (cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				PL080_CONTROL_SWIDTH_SHIFT;
+		else
+			val = (cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				PL080_CONTROL_DWIDTH_SHIFT;
+	}
+
+	switch (val) {
+	case PL080_WIDTH_8BIT:
+		return 1;
+	case PL080_WIDTH_16BIT:
+		return 2;
+	case PL080_WIDTH_32BIT:
+		return 4;
+	default:
+		break;
+	}
+	BUG();
+	return 0;
+}
+
+static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x,
+					 u32 cctl,
+					 u8 srcwidth, u8 dstwidth,
+					 size_t tsize)
+{
+	u32 retbits = cctl;
+
+	/*
+	 * Remove all src, dst and transfer size bits, then set the
+	 * width and size according to the parameters. The bit offsets
+	 * are different in the FTDMAC020 so we need to accound for this.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+		retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT;
+	} else {
+		retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
+		retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
+	}
+
+	return retbits;
+}
+
+struct pl08x_lli_build_data {
+	struct pl08x_txd *txd;
+	struct pl08x_bus_data srcbus;
+	struct pl08x_bus_data dstbus;
+	size_t remainder;
+	u32 lli_bus;
+};
+
+/*
+ * Autoselect a master bus to use for the transfer. Slave will be the chosen as
+ * victim in case src & dest are not similarly aligned. i.e. If after aligning
+ * masters address with width requirements of transfer (by sending few byte by
+ * byte data), slave is still not aligned, then its width will be reduced to
+ * BYTE.
+ * - prefers the destination bus if both available
+ * - prefers bus with fixed address (i.e. peripheral)
+ */
+static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    struct pl08x_bus_data **mbus,
+				    struct pl08x_bus_data **sbus,
+				    u32 cctl)
+{
+	bool dst_incr;
+	bool src_incr;
+
+	/*
+	 * The FTDMAC020 only supports memory-to-memory transfer, so
+	 * source and destination always increase.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		dst_incr = true;
+		src_incr = true;
+	} else {
+		dst_incr = !!(cctl & PL080_CONTROL_DST_INCR);
+		src_incr = !!(cctl & PL080_CONTROL_SRC_INCR);
+	}
+
+	/*
+	 * If either bus is not advancing, i.e. it is a peripheral, that
+	 * one becomes master
+	 */
+	if (!dst_incr) {
+		*mbus = &bd->dstbus;
+		*sbus = &bd->srcbus;
+	} else if (!src_incr) {
+		*mbus = &bd->srcbus;
+		*sbus = &bd->dstbus;
+	} else {
+		if (bd->dstbus.buswidth >= bd->srcbus.buswidth) {
+			*mbus = &bd->dstbus;
+			*sbus = &bd->srcbus;
+		} else {
+			*mbus = &bd->srcbus;
+			*sbus = &bd->dstbus;
+		}
+	}
+}
+
+/*
+ * Fills in one LLI for a certain transfer descriptor and advance the counter
+ */
+static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    int num_llis, int len, u32 cctl, u32 cctl2)
+{
+	u32 offset = num_llis * pl08x->lli_words;
+	u32 *llis_va = bd->txd->llis_va + offset;
+	dma_addr_t llis_bus = bd->txd->llis_bus;
+
+	BUG_ON(num_llis >= MAX_NUM_TSFR_LLIS);
+
+	/* Advance the offset to next LLI. */
+	offset += pl08x->lli_words;
+
+	llis_va[PL080_LLI_SRC] = bd->srcbus.addr;
+	llis_va[PL080_LLI_DST] = bd->dstbus.addr;
+	llis_va[PL080_LLI_LLI] = (llis_bus + sizeof(u32) * offset);
+	llis_va[PL080_LLI_LLI] |= bd->lli_bus;
+	llis_va[PL080_LLI_CCTL] = cctl;
+	if (pl08x->vd->pl080s)
+		llis_va[PL080S_LLI_CCTL2] = cctl2;
+
+	if (pl08x->vd->ftdmac020) {
+		/* FIXME: only memcpy so far so both increase */
+		bd->srcbus.addr += len;
+		bd->dstbus.addr += len;
+	} else {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			bd->srcbus.addr += len;
+		if (cctl & PL080_CONTROL_DST_INCR)
+			bd->dstbus.addr += len;
+	}
+
+	BUG_ON(bd->remainder < len);
+
+	bd->remainder -= len;
+}
+
+static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
+			struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
+			int num_llis, size_t *total_bytes)
+{
+	*cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len);
+	pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
+	(*total_bytes) += len;
+}
+
+#if 1
+static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+			   const u32 *llis_va, int num_llis)
+{
+	int i;
+
+	if (pl08x->vd->pl080s) {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl", "cctl2");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL],
+				llis_va[PL080S_LLI_CCTL2]);
+			llis_va += pl08x->lli_words;
+		}
+	} else {
+		dev_vdbg(&pl08x->adev->dev,
+			"%-3s %-9s  %-10s %-10s %-10s %s\n",
+			"lli", "", "csrc", "cdst", "clli", "cctl");
+		for (i = 0; i < num_llis; i++) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%3d @%p: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+				i, llis_va, llis_va[PL080_LLI_SRC],
+				llis_va[PL080_LLI_DST], llis_va[PL080_LLI_LLI],
+				llis_va[PL080_LLI_CCTL]);
+			llis_va += pl08x->lli_words;
+		}
+	}
+}
+#else
+static inline void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
+				  const u32 *llis_va, int num_llis) {}
+#endif
+
+/*
+ * This fills in the table of LLIs for the transfer descriptor
+ * Note that we assume we never have to change the burst sizes
+ * Return 0 for error
+ */
+static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
+			      struct pl08x_txd *txd)
+{
+	struct pl08x_bus_data *mbus, *sbus;
+	struct pl08x_lli_build_data bd;
+	int num_llis = 0;
+	u32 cctl, early_bytes = 0;
+	size_t max_bytes_per_lli, total_bytes;
+	u32 *llis_va, *last_lli;
+	struct pl08x_sg *dsg;
+
+	txd->llis_va = dma_pool_alloc(pl08x->pool, GFP_NOWAIT, &txd->llis_bus);
+	if (!txd->llis_va) {
+		dev_err(&pl08x->adev->dev, "%s no memory for llis\n", __func__);
+		return 0;
+	}
+
+	bd.txd = txd;
+	bd.lli_bus = (pl08x->lli_buses & PL08X_AHB2) ? PL080_LLI_LM_AHB2 : 0;
+	cctl = txd->cctl;
+
+	/* Find maximum width of the source bus */
+	bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true);
+
+	/* Find maximum width of the destination bus */
+	bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false);
+
+	list_for_each_entry(dsg, &txd->dsg_list, node) {
+		total_bytes = 0;
+		cctl = txd->cctl;
+
+		bd.srcbus.addr = dsg->src_addr;
+		bd.dstbus.addr = dsg->dst_addr;
+		bd.remainder = dsg->len;
+		bd.srcbus.buswidth = bd.srcbus.maxwidth;
+		bd.dstbus.buswidth = bd.dstbus.maxwidth;
+
+		pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl);
+
+		dev_vdbg(&pl08x->adev->dev,
+			"src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
+			(u64)bd.srcbus.addr,
+			cctl & PL080_CONTROL_SRC_INCR ? "+" : "",
+			bd.srcbus.buswidth,
+			(u64)bd.dstbus.addr,
+			cctl & PL080_CONTROL_DST_INCR ? "+" : "",
+			bd.dstbus.buswidth,
+			bd.remainder);
+		dev_vdbg(&pl08x->adev->dev, "mbus=%s sbus=%s\n",
+			mbus == &bd.srcbus ? "src" : "dst",
+			sbus == &bd.srcbus ? "src" : "dst");
+
+		/*
+		 * Zero length is only allowed if all these requirements are
+		 * met:
+		 * - flow controller is peripheral.
+		 * - src.addr is aligned to src.width
+		 * - dst.addr is aligned to dst.width
+		 *
+		 * sg_len == 1 should be true, as there can be two cases here:
+		 *
+		 * - Memory addresses are contiguous and are not scattered.
+		 *   Here, Only one sg will be passed by user driver, with
+		 *   memory address and zero length. We pass this to controller
+		 *   and after the transfer it will receive the last burst
+		 *   request from peripheral and so transfer finishes.
+		 *
+		 * - Memory addresses are scattered and are not contiguous.
+		 *   Here, Obviously as DMA controller doesn't know when a lli's
+		 *   transfer gets over, it can't load next lli. So in this
+		 *   case, there has to be an assumption that only one lli is
+		 *   supported. Thus, we can't have scattered addresses.
+		 */
+		if (!bd.remainder) {
+			u32 fc;
+
+			/* FTDMAC020 only does memory-to-memory */
+			if (pl08x->vd->ftdmac020)
+				fc = PL080_FLOW_MEM2MEM;
+			else
+				fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+					PL080_CONFIG_FLOW_CONTROL_SHIFT;
+			if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
+					(fc <= PL080_FLOW_SRC2DST_SRC))) {
+				dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
+					__func__);
+				return 0;
+			}
+
+			if (!IS_BUS_ALIGNED(&bd.srcbus) ||
+				!IS_BUS_ALIGNED(&bd.dstbus)) {
+				dev_err(&pl08x->adev->dev,
+					"%s src & dst address must be aligned to src"
+					" & dst width if peripheral is flow controller",
+					__func__);
+				return 0;
+			}
+
+			cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					0);
+			pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+					0, cctl, 0);
+			break;
+		}
+
+		/*
+		 * Send byte by byte for following cases
+		 * - Less than a bus width available
+		 * - until master bus is aligned
+		 */
+		if (bd.remainder < mbus->buswidth)
+			early_bytes = bd.remainder;
+		else if (!IS_BUS_ALIGNED(mbus)) {
+			early_bytes = mbus->buswidth -
+				(mbus->addr & (mbus->buswidth - 1));
+			if ((bd.remainder - early_bytes) < mbus->buswidth)
+				early_bytes = bd.remainder;
+		}
+
+		if (early_bytes) {
+			dev_vdbg(&pl08x->adev->dev,
+				"%s byte width LLIs (remain 0x%08zx)\n",
+				__func__, bd.remainder);
+			prep_byte_width_lli(pl08x, &bd, &cctl, early_bytes,
+				num_llis++, &total_bytes);
+		}
+
+		if (bd.remainder) {
+			/*
+			 * Master now aligned
+			 * - if slave is not then we must set its width down
+			 */
+			if (!IS_BUS_ALIGNED(sbus)) {
+				dev_dbg(&pl08x->adev->dev,
+					"%s set down bus width to one byte\n",
+					__func__);
+
+				sbus->buswidth = 1;
+			}
+
+			/*
+			 * Bytes transferred = tsize * src width, not
+			 * MIN(buswidths)
+			 */
+			max_bytes_per_lli = bd.srcbus.buswidth *
+						pl08x->vd->max_transfer_size;
+			dev_vdbg(&pl08x->adev->dev,
+				"%s max bytes per lli = %zu\n",
+				__func__, max_bytes_per_lli);
+
+			/*
+			 * Make largest possible LLIs until less than one bus
+			 * width left
+			 */
+			while (bd.remainder > (mbus->buswidth - 1)) {
+				size_t lli_len, tsize, width;
+
+				/*
+				 * If enough left try to send max possible,
+				 * otherwise try to send the remainder
+				 */
+				lli_len = min(bd.remainder, max_bytes_per_lli);
+
+				/*
+				 * Check against maximum bus alignment:
+				 * Calculate actual transfer size in relation to
+				 * bus width an get a maximum remainder of the
+				 * highest bus width - 1
+				 */
+				width = max(mbus->buswidth, sbus->buswidth);
+				lli_len = (lli_len / width) * width;
+				tsize = lli_len / bd.srcbus.buswidth;
+
+				dev_vdbg(&pl08x->adev->dev,
+					"%s fill lli with single lli chunk of "
+					"size 0x%08zx (remainder 0x%08zx)\n",
+					__func__, lli_len, bd.remainder);
+
+				cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					tsize);
+				pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
+						lli_len, cctl, tsize);
+				total_bytes += lli_len;
+			}
+
+			/*
+			 * Send any odd bytes
+			 */
+			if (bd.remainder) {
+				dev_vdbg(&pl08x->adev->dev,
+					"%s align with boundary, send odd bytes (remain %zu)\n",
+					__func__, bd.remainder);
+				prep_byte_width_lli(pl08x, &bd, &cctl,
+					bd.remainder, num_llis++, &total_bytes);
+			}
+		}
+
+		if (total_bytes != dsg->len) {
+			dev_err(&pl08x->adev->dev,
+				"%s size of encoded lli:s don't match total txd, transferred 0x%08zx from size 0x%08zx\n",
+				__func__, total_bytes, dsg->len);
+			return 0;
+		}
+
+		if (num_llis >= MAX_NUM_TSFR_LLIS) {
+			dev_err(&pl08x->adev->dev,
+				"%s need to increase MAX_NUM_TSFR_LLIS from 0x%08x\n",
+				__func__, MAX_NUM_TSFR_LLIS);
+			return 0;
+		}
+	}
+
+	llis_va = txd->llis_va;
+	last_lli = llis_va + (num_llis - 1) * pl08x->lli_words;
+
+	if (txd->cyclic) {
+		/* Link back to the first LLI. */
+		last_lli[PL080_LLI_LLI] = txd->llis_bus | bd.lli_bus;
+	} else {
+		/* The final LLI terminates the LLI. */
+		last_lli[PL080_LLI_LLI] = 0;
+		/* The final LLI element shall also fire an interrupt. */
+		if (pl08x->vd->ftdmac020)
+			last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK;
+		else
+			last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
+	}
+
+	pl08x_dump_lli(pl08x, llis_va, num_llis);
+
+	return num_llis;
+}
+
+static void pl08x_free_txd(struct pl08x_driver_data *pl08x,
+			   struct pl08x_txd *txd)
+{
+	struct pl08x_sg *dsg, *_dsg;
+
+	if (txd->llis_va)
+		dma_pool_free(pl08x->pool, txd->llis_va, txd->llis_bus);
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void pl08x_desc_free(struct virt_dma_desc *vd)
+{
+	struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan);
+
+	dma_descriptor_unmap(&vd->tx);
+	if (!txd->done)
+		pl08x_release_mux(plchan);
+
+	pl08x_free_txd(plchan->host, txd);
+}
+
+static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
+				struct pl08x_dma_chan *plchan)
+{
+	LIST_HEAD(head);
+
+	vchan_get_all_descriptors(&plchan->vc, &head);
+	vchan_dma_desc_free_list(&plchan->vc, &head);
+}
+
+/*
+ * The DMA ENGINE API
+ */
+static void pl08x_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *retval = NULL;
+
+	return retval;
+}
+
+/*
+ * Code accessing dma_async_is_complete() in a tight loop may give problems.
+ * If slaves are relying on interrupts to signal completion this function
+ * must not be called with interrupts disabled.
+ */
+static enum dma_status pl08x_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (!txstate) {
+		if (plchan->state == PL08X_CHAN_PAUSED)
+			ret = DMA_PAUSED;
+		return ret;
+	}
+
+	spin_lock_irqsave(&plchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_COMPLETE) {
+		vd = vchan_find_desc(&plchan->vc, cookie);
+		if (vd) {
+			/* On the issued list, so hasn't been processed yet */
+			struct pl08x_txd *txd = to_pl08x_txd(&vd->tx);
+			struct pl08x_sg *dsg;
+
+			list_for_each_entry(dsg, &txd->dsg_list, node)
+				bytes += dsg->len;
+		} else {
+			bytes = pl08x_getbytes_chan(plchan);
+		}
+	}
+	spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	if (plchan->state == PL08X_CHAN_PAUSED && ret == DMA_IN_PROGRESS)
+		ret = DMA_PAUSED;
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/* PrimeCell DMA extension */
+struct burst_table {
+	u32 burstwords;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burstwords = 256,
+		.reg = PL080_BSIZE_256,
+	},
+	{
+		.burstwords = 128,
+		.reg = PL080_BSIZE_128,
+	},
+	{
+		.burstwords = 64,
+		.reg = PL080_BSIZE_64,
+	},
+	{
+		.burstwords = 32,
+		.reg = PL080_BSIZE_32,
+	},
+	{
+		.burstwords = 16,
+		.reg = PL080_BSIZE_16,
+	},
+	{
+		.burstwords = 8,
+		.reg = PL080_BSIZE_8,
+	},
+	{
+		.burstwords = 4,
+		.reg = PL080_BSIZE_4,
+	},
+	{
+		.burstwords = 0,
+		.reg = PL080_BSIZE_1,
+	},
+};
+
+/*
+ * Given the source and destination available bus masks, select which
+ * will be routed to each port.  We try to have source and destination
+ * on separate ports, but always respect the allowable settings.
+ */
+static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst)
+{
+	u32 cctl = 0;
+	u32 dst_ahb2;
+	u32 src_ahb2;
+
+	/* The FTDMAC020 use different bits to indicate src/dst bus */
+	if (ftdmac020) {
+		dst_ahb2 = FTDMAC020_LLI_DST_SEL;
+		src_ahb2 = FTDMAC020_LLI_SRC_SEL;
+	} else {
+		dst_ahb2 = PL080_CONTROL_DST_AHB2;
+		src_ahb2 = PL080_CONTROL_SRC_AHB2;
+	}
+
+	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
+		cctl |= dst_ahb2;
+	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
+		cctl |= src_ahb2;
+
+	return cctl;
+}
+
+static u32 pl08x_cctl(u32 cctl)
+{
+	cctl &= ~(PL080_CONTROL_SRC_AHB2 | PL080_CONTROL_DST_AHB2 |
+		  PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR |
+		  PL080_CONTROL_PROT_MASK);
+
+	/* Access the cell in privileged mode, non-bufferable, non-cacheable */
+	return cctl | PL080_CONTROL_PROT_SYS;
+}
+
+static u32 pl08x_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return PL080_WIDTH_8BIT;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return PL080_WIDTH_16BIT;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return PL080_WIDTH_32BIT;
+	default:
+		return ~0;
+	}
+}
+
+static u32 pl08x_burst(u32 maxburst)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(burst_sizes); i++)
+		if (burst_sizes[i].burstwords <= maxburst)
+			break;
+
+	return burst_sizes[i].reg;
+}
+
+static u32 pl08x_get_cctl(struct pl08x_dma_chan *plchan,
+	enum dma_slave_buswidth addr_width, u32 maxburst)
+{
+	u32 width, burst, cctl = 0;
+
+	width = pl08x_width(addr_width);
+	if (width == ~0)
+		return ~0;
+
+	cctl |= width << PL080_CONTROL_SWIDTH_SHIFT;
+	cctl |= width << PL080_CONTROL_DWIDTH_SHIFT;
+
+	/*
+	 * If this channel will only request single transfers, set this
+	 * down to ONE element.  Also select one element if no maxburst
+	 * is specified.
+	 */
+	if (plchan->cd->single)
+		maxburst = 1;
+
+	burst = pl08x_burst(maxburst);
+	cctl |= burst << PL080_CONTROL_SB_SIZE_SHIFT;
+	cctl |= burst << PL080_CONTROL_DB_SIZE_SHIFT;
+
+	return pl08x_cctl(cctl);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void pl08x_issue_pending(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->vc.lock, flags);
+	if (vchan_issue_pending(&plchan->vc)) {
+		if (!plchan->phychan && plchan->state != PL08X_CHAN_WAITING)
+			pl08x_phy_alloc_and_start(plchan);
+	}
+	spin_unlock_irqrestore(&plchan->vc.lock, flags);
+}
+
+static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan)
+{
+	struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd)
+		INIT_LIST_HEAD(&txd->dsg_list);
+	return txd;
+}
+
+static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+	u32 cctl = 0;
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_burst_size) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal burst size for memcpy, set to 1\n");
+		/* Fall through */
+	case PL08X_BURST_SZ_1:
+		cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_4:
+		cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_8:
+		cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_16:
+		cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_32:
+		cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_64:
+		cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_128:
+		cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_256:
+		cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	}
+
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	}
+
+	/* Protection flags */
+	if (pl08x->pd->memcpy_prot_buff)
+		cctl |= PL080_CONTROL_PROT_BUFF;
+	if (pl08x->pd->memcpy_prot_cache)
+		cctl |= PL080_CONTROL_PROT_CACHE;
+
+	/* We are the kernel, so we are in privileged mode */
+	cctl |= PL080_CONTROL_PROT_SYS;
+
+	/* Both to be incremented or the code will break */
+	cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+
+	if (pl08x->vd->dualmaster)
+		cctl |= pl08x_select_bus(false,
+					 pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	return cctl;
+}
+
+static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+	u32 cctl = 0;
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	}
+
+	/*
+	 * By default mask the TC IRQ on all LLIs, it will be unmasked on
+	 * the last LLI item by other code.
+	 */
+	cctl |= FTDMAC020_LLI_TC_MSK;
+
+	/*
+	 * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL
+	 * and FTDMAC020_LLI_DSTAD_CTL as zero
+	 */
+	if (pl08x->vd->dualmaster)
+		cctl |= pl08x_select_bus(true,
+					 pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	return cctl;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct pl08x_sg *dsg;
+	int ret;
+
+	txd = pl08x_get_txd(plchan);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg) {
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+	if (pl08x->vd->ftdmac020) {
+		/* Writing CCFG zero ENABLES all interrupts */
+		txd->ccfg = 0;
+		txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x);
+	} else {
+		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+			PL080_CONFIG_TC_IRQ_MASK |
+			PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		txd->cctl = pl08x_memcpy_cctl(pl08x);
+	}
+
+	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+	if (!ret) {
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct pl08x_txd *pl08x_init_txd(
+		struct dma_chan *chan,
+		enum dma_transfer_direction direction,
+		dma_addr_t *slave_addr)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	enum dma_slave_buswidth addr_width;
+	int ret, tmp;
+	u8 src_buses, dst_buses;
+	u32 maxburst, cctl;
+
+	txd = pl08x_get_txd(plchan);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev, "%s no txd\n", __func__);
+		return NULL;
+	}
+
+	/*
+	 * Set up addresses, the PrimeCell configured address
+	 * will take precedence since this may configure the
+	 * channel target address dynamically at runtime.
+	 */
+	if (direction == DMA_MEM_TO_DEV) {
+		cctl = PL080_CONTROL_SRC_INCR;
+		*slave_addr = plchan->cfg.dst_addr;
+		addr_width = plchan->cfg.dst_addr_width;
+		maxburst = plchan->cfg.dst_maxburst;
+		src_buses = pl08x->mem_buses;
+		dst_buses = plchan->cd->periph_buses;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		cctl = PL080_CONTROL_DST_INCR;
+		*slave_addr = plchan->cfg.src_addr;
+		addr_width = plchan->cfg.src_addr_width;
+		maxburst = plchan->cfg.src_maxburst;
+		src_buses = plchan->cd->periph_buses;
+		dst_buses = pl08x->mem_buses;
+	} else {
+		pl08x_free_txd(pl08x, txd);
+		dev_err(&pl08x->adev->dev,
+			"%s direction unsupported\n", __func__);
+		return NULL;
+	}
+
+	cctl |= pl08x_get_cctl(plchan, addr_width, maxburst);
+	if (cctl == ~0) {
+		pl08x_free_txd(pl08x, txd);
+		dev_err(&pl08x->adev->dev,
+			"DMA slave configuration botched?\n");
+		return NULL;
+	}
+
+	txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses);
+
+	if (plchan->cfg.device_fc)
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
+			PL080_FLOW_PER2MEM_PER;
+	else
+		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
+			PL080_FLOW_PER2MEM;
+
+	txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+		PL080_CONFIG_TC_IRQ_MASK |
+		tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+
+	ret = pl08x_request_mux(plchan);
+	if (ret < 0) {
+		pl08x_free_txd(pl08x, txd);
+		dev_dbg(&pl08x->adev->dev,
+			"unable to mux for transfer on %s due to platform restrictions\n",
+			plchan->name);
+		return NULL;
+	}
+
+	dev_dbg(&pl08x->adev->dev, "allocated DMA request signal %d for xfer on %s\n",
+		 plchan->signal, plchan->name);
+
+	/* Assign the flow control signal to this channel */
+	if (direction == DMA_MEM_TO_DEV)
+		txd->ccfg |= plchan->signal << PL080_CONFIG_DST_SEL_SHIFT;
+	else
+		txd->ccfg |= plchan->signal << PL080_CONFIG_SRC_SEL_SHIFT;
+
+	return txd;
+}
+
+static int pl08x_tx_add_sg(struct pl08x_txd *txd,
+			   enum dma_transfer_direction direction,
+			   dma_addr_t slave_addr,
+			   dma_addr_t buf_addr,
+			   unsigned int len)
+{
+	struct pl08x_sg *dsg;
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg)
+		return -ENOMEM;
+
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->len = len;
+	if (direction == DMA_MEM_TO_DEV) {
+		dsg->src_addr = buf_addr;
+		dsg->dst_addr = slave_addr;
+	} else {
+		dsg->src_addr = slave_addr;
+		dsg->dst_addr = buf_addr;
+	}
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct scatterlist *sg;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev, "%s prepare transaction of %d bytes from %s\n",
+			__func__, sg_dma_len(sgl), plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      sg_dma_address(sg),
+				      sg_dma_len(sg));
+		if (ret) {
+			pl08x_release_mux(plchan);
+			pl08x_free_txd(pl08x, txd);
+			dev_err(&pl08x->adev->dev, "%s no mem for pl080 sg\n",
+					__func__);
+			return NULL;
+		}
+	}
+
+	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+	if (!ret) {
+		pl08x_release_mux(plchan);
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *pl08x_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	int ret, tmp;
+	dma_addr_t slave_addr;
+
+	dev_dbg(&pl08x->adev->dev,
+		"%s prepare cyclic transaction of %zd/%zd bytes %s %s\n",
+		__func__, period_len, buf_len,
+		direction == DMA_MEM_TO_DEV ? "to" : "from",
+		plchan->name);
+
+	txd = pl08x_init_txd(chan, direction, &slave_addr);
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = true;
+	txd->cctl |= PL080_CONTROL_TC_IRQ_EN;
+	for (tmp = 0; tmp < buf_len; tmp += period_len) {
+		ret = pl08x_tx_add_sg(txd, direction, slave_addr,
+				      buf_addr + tmp, period_len);
+		if (ret) {
+			pl08x_release_mux(plchan);
+			pl08x_free_txd(pl08x, txd);
+			return NULL;
+		}
+	}
+
+	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
+	if (!ret) {
+		pl08x_release_mux(plchan);
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&plchan->vc, &txd->vd, flags);
+}
+
+static int pl08x_config(struct dma_chan *chan,
+			struct dma_slave_config *config)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+
+	if (!plchan->slave)
+		return -EINVAL;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	if (config->device_fc && pl08x->vd->pl080s) {
+		dev_err(&pl08x->adev->dev,
+			"%s: PL080S does not support peripheral flow control\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	plchan->cfg = *config;
+
+	return 0;
+}
+
+static int pl08x_terminate_all(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	unsigned long flags;
+
+	spin_lock_irqsave(&plchan->vc.lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->vc.lock, flags);
+		return 0;
+	}
+
+	plchan->state = PL08X_CHAN_IDLE;
+
+	if (plchan->phychan) {
+		/*
+		 * Mark physical channel as free and free any slave
+		 * signal
+		 */
+		pl08x_phy_free(plchan);
+	}
+	/* Dequeue jobs and free LLIs */
+	if (plchan->at) {
+		vchan_terminate_vdesc(&plchan->at->vd);
+		plchan->at = NULL;
+	}
+	/* Dequeue jobs not yet fired as well */
+	pl08x_free_txd_list(pl08x, plchan);
+
+	spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+	return 0;
+}
+
+static void pl08x_synchronize(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+
+	vchan_synchronize(&plchan->vc);
+}
+
+static int pl08x_pause(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	unsigned long flags;
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->vc.lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->vc.lock, flags);
+		return 0;
+	}
+
+	pl08x_pause_phy_chan(plchan->phychan);
+	plchan->state = PL08X_CHAN_PAUSED;
+
+	spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+	return 0;
+}
+
+static int pl08x_resume(struct dma_chan *chan)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	unsigned long flags;
+
+	/*
+	 * Anything succeeds on channels with no physical allocation and
+	 * no queued transfers.
+	 */
+	spin_lock_irqsave(&plchan->vc.lock, flags);
+	if (!plchan->phychan && !plchan->at) {
+		spin_unlock_irqrestore(&plchan->vc.lock, flags);
+		return 0;
+	}
+
+	pl08x_resume_phy_chan(plchan->phychan);
+	plchan->state = PL08X_CHAN_RUNNING;
+
+	spin_unlock_irqrestore(&plchan->vc.lock, flags);
+
+	return 0;
+}
+
+bool pl08x_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan;
+	char *name = chan_id;
+
+	/* Reject channels for devices not bound to this driver */
+	if (chan->device->dev->driver != &pl08x_amba_driver.drv)
+		return false;
+
+	plchan = to_pl08x_chan(chan);
+
+	/* Check that the channel is not taken! */
+	if (!strcmp(plchan->name, name))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(pl08x_filter_id);
+
+static bool pl08x_filter_fn(struct dma_chan *chan, void *chan_id)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+
+	return plchan->cd == chan_id;
+}
+
+/*
+ * Just check that the device is there and active
+ * TODO: turn this bit on/off depending on the number of physical channels
+ * actually used, if it is zero... well shut it off. That will save some
+ * power. Cut the clock at the same time.
+ */
+static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
+{
+	/* The Nomadik variant does not have the config register */
+	if (pl08x->vd->nomadik)
+		return;
+	/* The FTDMAC020 variant does this in another register */
+	if (pl08x->vd->ftdmac020) {
+		writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR);
+		return;
+	}
+	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
+}
+
+static irqreturn_t pl08x_irq(int irq, void *dev)
+{
+	struct pl08x_driver_data *pl08x = dev;
+	u32 mask = 0, err, tc, i;
+
+	/* check & clear - ERR & TC interrupts */
+	err = readl(pl08x->base + PL080_ERR_STATUS);
+	if (err) {
+		dev_err(&pl08x->adev->dev, "%s error interrupt, register value 0x%08x\n",
+			__func__, err);
+		writel(err, pl08x->base + PL080_ERR_CLEAR);
+	}
+	tc = readl(pl08x->base + PL080_TC_STATUS);
+	if (tc)
+		writel(tc, pl08x->base + PL080_TC_CLEAR);
+
+	if (!err && !tc)
+		return IRQ_NONE;
+
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		if ((BIT(i) & err) || (BIT(i) & tc)) {
+			/* Locate physical channel */
+			struct pl08x_phy_chan *phychan = &pl08x->phy_chans[i];
+			struct pl08x_dma_chan *plchan = phychan->serving;
+			struct pl08x_txd *tx;
+
+			if (!plchan) {
+				dev_err(&pl08x->adev->dev,
+					"%s Error TC interrupt on unused channel: 0x%08x\n",
+					__func__, i);
+				continue;
+			}
+
+			spin_lock(&plchan->vc.lock);
+			tx = plchan->at;
+			if (tx && tx->cyclic) {
+				vchan_cyclic_callback(&tx->vd);
+			} else if (tx) {
+				plchan->at = NULL;
+				/*
+				 * This descriptor is done, release its mux
+				 * reservation.
+				 */
+				pl08x_release_mux(plchan);
+				tx->done = true;
+				vchan_cookie_complete(&tx->vd);
+
+				/*
+				 * And start the next descriptor (if any),
+				 * otherwise free this channel.
+				 */
+				if (vchan_next_desc(&plchan->vc))
+					pl08x_start_next_txd(plchan);
+				else
+					pl08x_phy_free(plchan);
+			}
+			spin_unlock(&plchan->vc.lock);
+
+			mask |= BIT(i);
+		}
+	}
+
+	return mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void pl08x_dma_slave_init(struct pl08x_dma_chan *chan)
+{
+	chan->slave = true;
+	chan->name = chan->cd->bus_id;
+	chan->cfg.src_addr = chan->cd->addr;
+	chan->cfg.dst_addr = chan->cd->addr;
+}
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct pl08x_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+		if (!chan)
+			return -ENOMEM;
+
+		chan->host = pl08x;
+		chan->state = PL08X_CHAN_IDLE;
+		chan->signal = -1;
+
+		if (slave) {
+			chan->cd = &pl08x->pd->slave_channels[i];
+			/*
+			 * Some implementations have muxed signals, whereas some
+			 * use a mux in front of the signals and need dynamic
+			 * assignment of signals.
+			 */
+			chan->signal = i;
+			pl08x_dma_slave_init(chan);
+		} else {
+			chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL);
+			if (!chan->cd) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+			chan->cd->bus_id = "memcpy";
+			chan->cd->periph_buses = pl08x->pd->mem_buses;
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name) {
+				kfree(chan->cd);
+				kfree(chan);
+				return -ENOMEM;
+			}
+		}
+		dev_dbg(&pl08x->adev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = pl08x_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(&pl08x->adev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void pl08x_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct pl08x_dma_chan *chan = NULL;
+	struct pl08x_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		kfree(chan);
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const char *pl08x_state_str(enum pl08x_dma_chan_state state)
+{
+	switch (state) {
+	case PL08X_CHAN_IDLE:
+		return "idle";
+	case PL08X_CHAN_RUNNING:
+		return "running";
+	case PL08X_CHAN_PAUSED:
+		return "paused";
+	case PL08X_CHAN_WAITING:
+		return "waiting";
+	default:
+		break;
+	}
+	return "UNKNOWN STATE";
+}
+
+static int pl08x_debugfs_show(struct seq_file *s, void *data)
+{
+	struct pl08x_driver_data *pl08x = s->private;
+	struct pl08x_dma_chan *chan;
+	struct pl08x_phy_chan *ch;
+	unsigned long flags;
+	int i;
+
+	seq_printf(s, "PL08x physical channels:\n");
+	seq_printf(s, "CHANNEL:\tUSER:\n");
+	seq_printf(s, "--------\t-----\n");
+	for (i = 0; i < pl08x->vd->channels; i++) {
+		struct pl08x_dma_chan *virt_chan;
+
+		ch = &pl08x->phy_chans[i];
+
+		spin_lock_irqsave(&ch->lock, flags);
+		virt_chan = ch->serving;
+
+		seq_printf(s, "%d\t\t%s%s\n",
+			   ch->id,
+			   virt_chan ? virt_chan->name : "(none)",
+			   ch->locked ? " LOCKED" : "");
+
+		spin_unlock_irqrestore(&ch->lock, flags);
+	}
+
+	seq_printf(s, "\nPL08x virtual memcpy channels:\n");
+	seq_printf(s, "CHANNEL:\tSTATE:\n");
+	seq_printf(s, "--------\t------\n");
+	list_for_each_entry(chan, &pl08x->memcpy.channels, vc.chan.device_node) {
+		seq_printf(s, "%s\t\t%s\n", chan->name,
+			   pl08x_state_str(chan->state));
+	}
+
+	if (pl08x->has_slave) {
+		seq_printf(s, "\nPL08x virtual slave channels:\n");
+		seq_printf(s, "CHANNEL:\tSTATE:\n");
+		seq_printf(s, "--------\t------\n");
+		list_for_each_entry(chan, &pl08x->slave.channels,
+				    vc.chan.device_node) {
+			seq_printf(s, "%s\t\t%s\n", chan->name,
+				   pl08x_state_str(chan->state));
+		}
+	}
+
+	return 0;
+}
+
+static int pl08x_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pl08x_debugfs_show, inode->i_private);
+}
+
+static const struct file_operations pl08x_debugfs_operations = {
+	.open		= pl08x_debugfs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+	/* Expose a simple debugfs interface to view all clocks */
+	(void) debugfs_create_file(dev_name(&pl08x->adev->dev),
+			S_IFREG | S_IRUGO, NULL, pl08x,
+			&pl08x_debugfs_operations);
+}
+
+#else
+static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
+{
+}
+#endif
+
+#ifdef CONFIG_OF
+static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x,
+					 u32 id)
+{
+	struct pl08x_dma_chan *chan;
+
+	/* Trying to get a slave channel from something with no slave support */
+	if (!pl08x->has_slave)
+		return NULL;
+
+	list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
+		if (chan->signal == id)
+			return &chan->vc.chan;
+	}
+
+	return NULL;
+}
+
+static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *ofdma)
+{
+	struct pl08x_driver_data *pl08x = ofdma->of_dma_data;
+	struct dma_chan *dma_chan;
+	struct pl08x_dma_chan *plchan;
+
+	if (!pl08x)
+		return NULL;
+
+	if (dma_spec->args_count != 2) {
+		dev_err(&pl08x->adev->dev,
+			"DMA channel translation requires two cells\n");
+		return NULL;
+	}
+
+	dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]);
+	if (!dma_chan) {
+		dev_err(&pl08x->adev->dev,
+			"DMA slave channel not found\n");
+		return NULL;
+	}
+
+	plchan = to_pl08x_chan(dma_chan);
+	dev_dbg(&pl08x->adev->dev,
+		"translated channel for signal %d\n",
+		dma_spec->args[0]);
+
+	/* Augment channel data for applicable AHB buses */
+	plchan->cd->periph_buses = dma_spec->args[1];
+	return dma_get_slave_channel(dma_chan);
+}
+
+static int pl08x_of_probe(struct amba_device *adev,
+			  struct pl08x_driver_data *pl08x,
+			  struct device_node *np)
+{
+	struct pl08x_platform_data *pd;
+	struct pl08x_channel_data *chanp = NULL;
+	u32 val;
+	int ret;
+	int i;
+
+	pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	/* Eligible bus masters for fetching LLIs */
+	if (of_property_read_bool(np, "lli-bus-interface-ahb1"))
+		pd->lli_buses |= PL08X_AHB1;
+	if (of_property_read_bool(np, "lli-bus-interface-ahb2"))
+		pd->lli_buses |= PL08X_AHB2;
+	if (!pd->lli_buses) {
+		dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n");
+		pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2;
+	}
+
+	/* Eligible bus masters for memory access */
+	if (of_property_read_bool(np, "mem-bus-interface-ahb1"))
+		pd->mem_buses |= PL08X_AHB1;
+	if (of_property_read_bool(np, "mem-bus-interface-ahb2"))
+		pd->mem_buses |= PL08X_AHB2;
+	if (!pd->mem_buses) {
+		dev_info(&adev->dev, "no bus masters for memory stated, assume all\n");
+		pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2;
+	}
+
+	/* Parse the memcpy channel properties */
+	ret = of_property_read_u32(np, "memcpy-burst-size", &val);
+	if (ret) {
+		dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n");
+		val = 1;
+	}
+	switch (val) {
+	default:
+		dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
+		/* Fall through */
+	case 1:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_1;
+		break;
+	case 4:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_4;
+		break;
+	case 8:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_8;
+		break;
+	case 16:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_16;
+		break;
+	case 32:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_32;
+		break;
+	case 64:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_64;
+		break;
+	case 128:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_128;
+		break;
+	case 256:
+		pd->memcpy_burst_size = PL08X_BURST_SZ_256;
+		break;
+	}
+
+	ret = of_property_read_u32(np, "memcpy-bus-width", &val);
+	if (ret) {
+		dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n");
+		val = 8;
+	}
+	switch (val) {
+	default:
+		dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case 8:
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS;
+		break;
+	case 16:
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS;
+		break;
+	case 32:
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS;
+		break;
+	}
+
+	/*
+	 * Allocate channel data for all possible slave channels (one
+	 * for each possible signal), channels will then be allocated
+	 * for a device and have it's AHB interfaces set up at
+	 * translation time.
+	 */
+	if (pl08x->vd->signals) {
+		chanp = devm_kcalloc(&adev->dev,
+				     pl08x->vd->signals,
+				     sizeof(struct pl08x_channel_data),
+				     GFP_KERNEL);
+		if (!chanp)
+			return -ENOMEM;
+
+		pd->slave_channels = chanp;
+		for (i = 0; i < pl08x->vd->signals; i++) {
+			/*
+			 * chanp->periph_buses will be assigned at translation
+			 */
+			chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
+			chanp++;
+		}
+		pd->num_slave_channels = pl08x->vd->signals;
+	}
+
+	pl08x->pd = pd;
+
+	return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate,
+					  pl08x);
+}
+#else
+static inline int pl08x_of_probe(struct amba_device *adev,
+				 struct pl08x_driver_data *pl08x,
+				 struct device_node *np)
+{
+	return -EINVAL;
+}
+#endif
+
+static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct pl08x_driver_data *pl08x;
+	struct vendor_data *vd = id->data;
+	struct device_node *np = adev->dev.of_node;
+	u32 tsfr_size;
+	int ret = 0;
+	int i;
+
+	ret = amba_request_regions(adev, NULL);
+	if (ret)
+		return ret;
+
+	/* Ensure that we can do DMA */
+	ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
+	if (ret)
+		goto out_no_pl08x;
+
+	/* Create the driver state holder */
+	pl08x = kzalloc(sizeof(*pl08x), GFP_KERNEL);
+	if (!pl08x) {
+		ret = -ENOMEM;
+		goto out_no_pl08x;
+	}
+
+	/* Assign useful pointers to the driver state */
+	pl08x->adev = adev;
+	pl08x->vd = vd;
+
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	if (vd->ftdmac020) {
+		u32 val;
+
+		val = readl(pl08x->base + FTDMAC020_REVISION);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n",
+			 (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+		val = readl(pl08x->base + FTDMAC020_FEATURE);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, "
+			 "%s built-in bridge, %s, %s linked lists\n",
+			 (val >> 12) & 0x0f,
+			 (val & BIT(10)) ? "no" : "has",
+			 (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0",
+			 (val & BIT(8)) ? "supports" : "does not support");
+
+		/* Vendor data from feature register */
+		if (!(val & BIT(8)))
+			dev_warn(&pl08x->adev->dev,
+				 "linked lists not supported, required\n");
+		vd->channels = (val >> 12) & 0x0f;
+		vd->dualmaster = !!(val & BIT(9));
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
+	pl08x->memcpy.dev = &adev->dev;
+	pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
+	pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
+	pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
+	pl08x->memcpy.device_tx_status = pl08x_dma_tx_status;
+	pl08x->memcpy.device_issue_pending = pl08x_issue_pending;
+	pl08x->memcpy.device_config = pl08x_config;
+	pl08x->memcpy.device_pause = pl08x_pause;
+	pl08x->memcpy.device_resume = pl08x_resume;
+	pl08x->memcpy.device_terminate_all = pl08x_terminate_all;
+	pl08x->memcpy.device_synchronize = pl08x_synchronize;
+	pl08x->memcpy.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+	pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+	pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
+	pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	if (vd->ftdmac020)
+		pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
+
+
+	/*
+	 * Initialize slave engine, if the block has no signals, that means
+	 * we have no slave support.
+	 */
+	if (vd->signals) {
+		pl08x->has_slave = true;
+		dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+		dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
+		pl08x->slave.dev = &adev->dev;
+		pl08x->slave.device_free_chan_resources =
+			pl08x_free_chan_resources;
+		pl08x->slave.device_prep_dma_interrupt =
+			pl08x_prep_dma_interrupt;
+		pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+		pl08x->slave.device_issue_pending = pl08x_issue_pending;
+		pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+		pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
+		pl08x->slave.device_config = pl08x_config;
+		pl08x->slave.device_pause = pl08x_pause;
+		pl08x->slave.device_resume = pl08x_resume;
+		pl08x->slave.device_terminate_all = pl08x_terminate_all;
+		pl08x->slave.device_synchronize = pl08x_synchronize;
+		pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+		pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+		pl08x->slave.directions =
+			BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+		pl08x->slave.residue_granularity =
+			DMA_RESIDUE_GRANULARITY_SEGMENT;
+	}
+
+	/* Get the platform data */
+	pl08x->pd = dev_get_platdata(&adev->dev);
+	if (!pl08x->pd) {
+		if (np) {
+			ret = pl08x_of_probe(adev, pl08x, np);
+			if (ret)
+				goto out_no_platdata;
+		} else {
+			dev_err(&adev->dev, "no platform data supplied\n");
+			ret = -EINVAL;
+			goto out_no_platdata;
+		}
+	} else {
+		pl08x->slave.filter.map = pl08x->pd->slave_map;
+		pl08x->slave.filter.mapcnt = pl08x->pd->slave_map_len;
+		pl08x->slave.filter.fn = pl08x_filter_fn;
+	}
+
+	/* By default, AHB1 only.  If dualmaster, from platform */
+	pl08x->lli_buses = PL08X_AHB1;
+	pl08x->mem_buses = PL08X_AHB1;
+	if (pl08x->vd->dualmaster) {
+		pl08x->lli_buses = pl08x->pd->lli_buses;
+		pl08x->mem_buses = pl08x->pd->mem_buses;
+	}
+
+	if (vd->pl080s)
+		pl08x->lli_words = PL080S_LLI_WORDS;
+	else
+		pl08x->lli_words = PL080_LLI_WORDS;
+	tsfr_size = MAX_NUM_TSFR_LLIS * pl08x->lli_words * sizeof(u32);
+
+	/* A DMA memory pool for LLIs, align on 1-byte boundary */
+	pl08x->pool = dma_pool_create(DRIVER_NAME, &pl08x->adev->dev,
+						tsfr_size, PL08X_ALIGN, 0);
+	if (!pl08x->pool) {
+		ret = -ENOMEM;
+		goto out_no_lli_pool;
+	}
+
+	/* Turn on the PL08x */
+	pl08x_ensure_on(pl08x);
+
+	/* Clear any pending interrupts */
+	if (vd->ftdmac020)
+		/* This variant has error IRQs in bits 16-19 */
+		writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR);
+	else
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
+
+	/* Attach the interrupt handler */
+	ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
+	if (ret) {
+		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
+			__func__, adev->irq[0]);
+		goto out_no_irq;
+	}
+
+	/* Initialize physical channels */
+	pl08x->phy_chans = kzalloc((vd->channels * sizeof(*pl08x->phy_chans)),
+			GFP_KERNEL);
+	if (!pl08x->phy_chans) {
+		ret = -ENOMEM;
+		goto out_no_phychans;
+	}
+
+	for (i = 0; i < vd->channels; i++) {
+		struct pl08x_phy_chan *ch = &pl08x->phy_chans[i];
+
+		ch->id = i;
+		ch->base = pl08x->base + PL080_Cx_BASE(i);
+		if (vd->ftdmac020) {
+			/* FTDMA020 has a special channel busy register */
+			ch->reg_busy = ch->base + FTDMAC020_CH_BUSY;
+			ch->reg_config = ch->base + FTDMAC020_CH_CFG;
+			ch->reg_control = ch->base + FTDMAC020_CH_CSR;
+			ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR;
+			ch->reg_lli = ch->base + FTDMAC020_CH_LLP;
+			ch->ftdmac020 = true;
+		} else {
+			ch->reg_config = ch->base + vd->config_offset;
+			ch->reg_control = ch->base + PL080_CH_CONTROL;
+			ch->reg_src = ch->base + PL080_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + PL080_CH_DST_ADDR;
+			ch->reg_lli = ch->base + PL080_CH_LLI;
+		}
+		if (vd->pl080s)
+			ch->pl080s = true;
+
+		spin_lock_init(&ch->lock);
+
+		/*
+		 * Nomadik variants can have channels that are locked
+		 * down for the secure world only. Lock up these channels
+		 * by perpetually serving a dummy virtual channel.
+		 */
+		if (vd->nomadik) {
+			u32 val;
+
+			val = readl(ch->reg_config);
+			if (val & (PL080N_CONFIG_ITPROT | PL080N_CONFIG_SECPROT)) {
+				dev_info(&adev->dev, "physical channel %d reserved for secure access only\n", i);
+				ch->locked = true;
+			}
+		}
+
+		dev_dbg(&adev->dev, "physical channel %d is %s\n",
+			i, pl08x_phy_channel_busy(ch) ? "BUSY" : "FREE");
+	}
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->memcpy,
+					      pl08x->vd->channels, false);
+	if (ret <= 0) {
+		dev_warn(&pl08x->adev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto out_no_memcpy;
+	}
+
+	/* Register slave channels */
+	if (pl08x->has_slave) {
+		ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+					pl08x->pd->num_slave_channels, true);
+		if (ret < 0) {
+			dev_warn(&pl08x->adev->dev,
+				 "%s failed to enumerate slave channels - %d\n",
+				 __func__, ret);
+			goto out_no_slave;
+		}
+	}
+
+	ret = dma_async_device_register(&pl08x->memcpy);
+	if (ret) {
+		dev_warn(&pl08x->adev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto out_no_memcpy_reg;
+	}
+
+	if (pl08x->has_slave) {
+		ret = dma_async_device_register(&pl08x->slave);
+		if (ret) {
+			dev_warn(&pl08x->adev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+			goto out_no_slave_reg;
+		}
+	}
+
+	amba_set_drvdata(adev, pl08x);
+	init_pl08x_debugfs(pl08x);
+	dev_info(&pl08x->adev->dev, "DMA: PL%03x%s rev%u at 0x%08llx irq %d\n",
+		 amba_part(adev), pl08x->vd->pl080s ? "s" : "", amba_rev(adev),
+		 (unsigned long long)adev->res.start, adev->irq[0]);
+
+	return 0;
+
+out_no_slave_reg:
+	dma_async_device_unregister(&pl08x->memcpy);
+out_no_memcpy_reg:
+	if (pl08x->has_slave)
+		pl08x_free_virtual_channels(&pl08x->slave);
+out_no_slave:
+	pl08x_free_virtual_channels(&pl08x->memcpy);
+out_no_memcpy:
+	kfree(pl08x->phy_chans);
+out_no_phychans:
+	free_irq(adev->irq[0], pl08x);
+out_no_irq:
+	dma_pool_destroy(pl08x->pool);
+out_no_lli_pool:
+out_no_platdata:
+	iounmap(pl08x->base);
+out_no_ioremap:
+	kfree(pl08x);
+out_no_pl08x:
+	amba_release_regions(adev);
+	return ret;
+}
+
+/* PL080 has 8 channels and the PL080 have just 2 */
+static struct vendor_data vendor_pl080 = {
+	.config_offset = PL080_CH_CONFIG,
+	.channels = 8,
+	.signals = 16,
+	.dualmaster = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_nomadik = {
+	.config_offset = PL080_CH_CONFIG,
+	.channels = 8,
+	.signals = 32,
+	.dualmaster = true,
+	.nomadik = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl080s = {
+	.config_offset = PL080S_CH_CONFIG,
+	.channels = 8,
+	.signals = 32,
+	.pl080s = true,
+	.max_transfer_size = PL080S_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_pl081 = {
+	.config_offset = PL080_CH_CONFIG,
+	.channels = 2,
+	.signals = 16,
+	.dualmaster = false,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static struct vendor_data vendor_ftdmac020 = {
+	.config_offset = PL080_CH_CONFIG,
+	.ftdmac020 = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
+static const struct amba_id pl08x_ids[] = {
+	/* Samsung PL080S variant */
+	{
+		.id	= 0x0a141080,
+		.mask	= 0xffffffff,
+		.data	= &vendor_pl080s,
+	},
+	/* PL080 */
+	{
+		.id	= 0x00041080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl080,
+	},
+	/* PL081 */
+	{
+		.id	= 0x00041081,
+		.mask	= 0x000fffff,
+		.data	= &vendor_pl081,
+	},
+	/* Nomadik 8815 PL080 variant */
+	{
+		.id	= 0x00280080,
+		.mask	= 0x00ffffff,
+		.data	= &vendor_nomadik,
+	},
+	/* Faraday Technology FTDMAC020 */
+	{
+		.id	= 0x0003b080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_ftdmac020,
+	},
+	{ 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl08x_ids);
+
+static struct amba_driver pl08x_amba_driver = {
+	.drv.name	= DRIVER_NAME,
+	.id_table	= pl08x_ids,
+	.probe		= pl08x_probe,
+};
+
+static int __init pl08x_init(void)
+{
+	int retval;
+	retval = amba_driver_register(&pl08x_amba_driver);
+	if (retval)
+		printk(KERN_WARNING DRIVER_NAME
+		       "failed to register as an AMBA device (%d)\n",
+		       retval);
+	return retval;
+}
+subsys_initcall(pl08x_init);
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
new file mode 100644
index 0000000..dbc5115
--- /dev/null
+++ b/drivers/dma/at_hdmac.c
@@ -0,0 +1,2183 @@
+/*
+ * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems)
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ *
+ * This supports the Atmel AHB DMA Controller found in several Atmel SoCs.
+ * The only Atmel DMA Controller that is not covered by this driver is the one
+ * found on AT91SAM9263.
+ */
+
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include "at_hdmac_regs.h"
+#include "dmaengine.h"
+
+/*
+ * Glossary
+ * --------
+ *
+ * at_hdmac		: Name of the ATmel AHB DMA Controller
+ * at_dma_ / atdma	: ATmel DMA controller entity related
+ * atc_	/ atchan	: ATmel DMA Channel entity related
+ */
+
+#define	ATC_DEFAULT_CFG		(ATC_FIFOCFG_HALFFIFO)
+#define	ATC_DEFAULT_CTRLB	(ATC_SIF(AT_DMA_MEM_IF) \
+				|ATC_DIF(AT_DMA_MEM_IF))
+#define ATC_DMA_BUSWIDTHS\
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+#define ATC_MAX_DSCR_TRIALS	10
+
+/*
+ * Initial number of descriptors to allocate for each channel. This could
+ * be increased during dma usage.
+ */
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+
+/* prototypes */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
+static void atc_issue_pending(struct dma_chan *chan);
+
+
+/*----------------------------------------------------------------------*/
+
+static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
+						size_t len)
+{
+	unsigned int width;
+
+	if (!((src | dst  | len) & 3))
+		width = 2;
+	else if (!((src | dst | len) & 1))
+		width = 1;
+	else
+		width = 0;
+
+	return width;
+}
+
+static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
+{
+	return list_first_entry(&atchan->active_list,
+				struct at_desc, desc_node);
+}
+
+static struct at_desc *atc_first_queued(struct at_dma_chan *atchan)
+{
+	return list_first_entry(&atchan->queue,
+				struct at_desc, desc_node);
+}
+
+/**
+ * atc_alloc_descriptor - allocate and return an initialized descriptor
+ * @chan: the channel to allocate descriptors for
+ * @gfp_flags: GFP allocation flags
+ *
+ * Note: The ack-bit is positioned in the descriptor flag at creation time
+ *       to make initial allocation more convenient. This bit will be cleared
+ *       and control will be given to client at usage time (during
+ *       preparation functions).
+ */
+static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
+					    gfp_t gfp_flags)
+{
+	struct at_desc	*desc = NULL;
+	struct at_dma	*atdma = to_at_dma(chan->device);
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(atdma->dma_desc_pool, gfp_flags, &phys);
+	if (desc) {
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		/* txd.flags will be overwritten in prep functions */
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.tx_submit = atc_tx_submit;
+		desc->txd.phys = phys;
+	}
+
+	return desc;
+}
+
+/**
+ * atc_desc_get - get an unused descriptor from free_list
+ * @atchan: channel we want a new descriptor for
+ */
+static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
+{
+	struct at_desc *desc, *_desc;
+	struct at_desc *ret = NULL;
+	unsigned long flags;
+	unsigned int i = 0;
+	LIST_HEAD(tmp_list);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+		i++;
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&atchan->chan_common),
+				"desc %p not ACKed\n", desc);
+	}
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	dev_vdbg(chan2dev(&atchan->chan_common),
+		"scanned %u descriptors on freelist\n", i);
+
+	/* no more descriptor available in initial pool: create one more */
+	if (!ret) {
+		ret = atc_alloc_descriptor(&atchan->chan_common, GFP_ATOMIC);
+		if (ret) {
+			spin_lock_irqsave(&atchan->lock, flags);
+			atchan->descs_allocated++;
+			spin_unlock_irqrestore(&atchan->lock, flags);
+		} else {
+			dev_err(chan2dev(&atchan->chan_common),
+					"not enough descriptors available\n");
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * atc_desc_put - move a descriptor, including any children, to the free list
+ * @atchan: channel we work on
+ * @desc: descriptor, at the head of a chain, to move to free list
+ */
+static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+	if (desc) {
+		struct at_desc *child;
+		unsigned long flags;
+
+		spin_lock_irqsave(&atchan->lock, flags);
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			dev_vdbg(chan2dev(&atchan->chan_common),
+					"moving child desc %p to freelist\n",
+					child);
+		list_splice_init(&desc->tx_list, &atchan->free_list);
+		dev_vdbg(chan2dev(&atchan->chan_common),
+			 "moving desc %p to freelist\n", desc);
+		list_add(&desc->desc_node, &atchan->free_list);
+		spin_unlock_irqrestore(&atchan->lock, flags);
+	}
+}
+
+/**
+ * atc_desc_chain - build chain adding a descriptor
+ * @first: address of first descriptor of the chain
+ * @prev: address of previous descriptor of the chain
+ * @desc: descriptor to queue
+ *
+ * Called from prep_* functions
+ */
+static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
+			   struct at_desc *desc)
+{
+	if (!(*first)) {
+		*first = desc;
+	} else {
+		/* inform the HW lli about chaining */
+		(*prev)->lli.dscr = desc->txd.phys;
+		/* insert the link descriptor to the LD ring */
+		list_add_tail(&desc->desc_node,
+				&(*first)->tx_list);
+	}
+	*prev = desc;
+}
+
+/**
+ * atc_dostart - starts the DMA engine for real
+ * @atchan: the channel we want to start
+ * @first: first descriptor in the list we want to begin with
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	/* ASSERT:  channel is idle */
+	if (atc_chan_is_enabled(atchan)) {
+		dev_err(chan2dev(&atchan->chan_common),
+			"BUG: Attempted to start non-idle channel\n");
+		dev_err(chan2dev(&atchan->chan_common),
+			"  channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
+			channel_readl(atchan, SADDR),
+			channel_readl(atchan, DADDR),
+			channel_readl(atchan, CTRLA),
+			channel_readl(atchan, CTRLB),
+			channel_readl(atchan, DSCR));
+
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	vdbg_dump_regs(atchan);
+
+	channel_writel(atchan, SADDR, 0);
+	channel_writel(atchan, DADDR, 0);
+	channel_writel(atchan, CTRLA, 0);
+	channel_writel(atchan, CTRLB, 0);
+	channel_writel(atchan, DSCR, first->txd.phys);
+	channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
+		       ATC_SPIP_BOUNDARY(first->boundary));
+	channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+		       ATC_DPIP_BOUNDARY(first->boundary));
+	dma_writel(atdma, CHER, atchan->mask);
+
+	vdbg_dump_regs(atchan);
+}
+
+/*
+ * atc_get_desc_by_cookie - get the descriptor of a cookie
+ * @atchan: the DMA channel
+ * @cookie: the cookie to get the descriptor for
+ */
+static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
+						dma_cookie_t cookie)
+{
+	struct at_desc *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) {
+		if (desc->txd.cookie == cookie)
+			return desc;
+	}
+
+	list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
+		if (desc->txd.cookie == cookie)
+			return desc;
+	}
+
+	return NULL;
+}
+
+/**
+ * atc_calc_bytes_left - calculates the number of bytes left according to the
+ * value read from CTRLA.
+ *
+ * @current_len: the number of bytes left before reading CTRLA
+ * @ctrla: the value of CTRLA
+ */
+static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
+{
+	u32 btsize = (ctrla & ATC_BTSIZE_MAX);
+	u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
+
+	/*
+	 * According to the datasheet, when reading the Control A Register
+	 * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the
+	 * number of transfers completed on the Source Interface.
+	 * So btsize is always a number of source width transfers.
+	 */
+	return current_len - (btsize << src_width);
+}
+
+/**
+ * atc_get_bytes_left - get the number of bytes residue for a cookie
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ */
+static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+{
+	struct at_dma_chan      *atchan = to_at_dma_chan(chan);
+	struct at_desc *desc_first = atc_first_active(atchan);
+	struct at_desc *desc;
+	int ret;
+	u32 ctrla, dscr, trials;
+
+	/*
+	 * If the cookie doesn't match to the currently running transfer then
+	 * we can return the total length of the associated DMA transfer,
+	 * because it is still queued.
+	 */
+	desc = atc_get_desc_by_cookie(atchan, cookie);
+	if (desc == NULL)
+		return -EINVAL;
+	else if (desc != desc_first)
+		return desc->total_len;
+
+	/* cookie matches to the currently running transfer */
+	ret = desc_first->total_len;
+
+	if (desc_first->lli.dscr) {
+		/* hardware linked list transfer */
+
+		/*
+		 * Calculate the residue by removing the length of the child
+		 * descriptors already transferred from the total length.
+		 * To get the current child descriptor we can use the value of
+		 * the channel's DSCR register and compare it against the value
+		 * of the hardware linked list structure of each child
+		 * descriptor.
+		 *
+		 * The CTRLA register provides us with the amount of data
+		 * already read from the source for the current child
+		 * descriptor. So we can compute a more accurate residue by also
+		 * removing the number of bytes corresponding to this amount of
+		 * data.
+		 *
+		 * However, the DSCR and CTRLA registers cannot be read both
+		 * atomically. Hence a race condition may occur: the first read
+		 * register may refer to one child descriptor whereas the second
+		 * read may refer to a later child descriptor in the list
+		 * because of the DMA transfer progression inbetween the two
+		 * reads.
+		 *
+		 * One solution could have been to pause the DMA transfer, read
+		 * the DSCR and CTRLA then resume the DMA transfer. Nonetheless,
+		 * this approach presents some drawbacks:
+		 * - If the DMA transfer is paused, RX overruns or TX underruns
+		 *   are more likey to occur depending on the system latency.
+		 *   Taking the USART driver as an example, it uses a cyclic DMA
+		 *   transfer to read data from the Receive Holding Register
+		 *   (RHR) to avoid RX overruns since the RHR is not protected
+		 *   by any FIFO on most Atmel SoCs. So pausing the DMA transfer
+		 *   to compute the residue would break the USART driver design.
+		 * - The atc_pause() function masks interrupts but we'd rather
+		 *   avoid to do so for system latency purpose.
+		 *
+		 * Then we'd rather use another solution: the DSCR is read a
+		 * first time, the CTRLA is read in turn, next the DSCR is read
+		 * a second time. If the two consecutive read values of the DSCR
+		 * are the same then we assume both refers to the very same
+		 * child descriptor as well as the CTRLA value read inbetween
+		 * does. For cyclic tranfers, the assumption is that a full loop
+		 * is "not so fast".
+		 * If the two DSCR values are different, we read again the CTRLA
+		 * then the DSCR till two consecutive read values from DSCR are
+		 * equal or till the maxium trials is reach.
+		 * This algorithm is very unlikely not to find a stable value for
+		 * DSCR.
+		 */
+
+		dscr = channel_readl(atchan, DSCR);
+		rmb(); /* ensure DSCR is read before CTRLA */
+		ctrla = channel_readl(atchan, CTRLA);
+		for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) {
+			u32 new_dscr;
+
+			rmb(); /* ensure DSCR is read after CTRLA */
+			new_dscr = channel_readl(atchan, DSCR);
+
+			/*
+			 * If the DSCR register value has not changed inside the
+			 * DMA controller since the previous read, we assume
+			 * that both the dscr and ctrla values refers to the
+			 * very same descriptor.
+			 */
+			if (likely(new_dscr == dscr))
+				break;
+
+			/*
+			 * DSCR has changed inside the DMA controller, so the
+			 * previouly read value of CTRLA may refer to an already
+			 * processed descriptor hence could be outdated.
+			 * We need to update ctrla to match the current
+			 * descriptor.
+			 */
+			dscr = new_dscr;
+			rmb(); /* ensure DSCR is read before CTRLA */
+			ctrla = channel_readl(atchan, CTRLA);
+		}
+		if (unlikely(trials >= ATC_MAX_DSCR_TRIALS))
+			return -ETIMEDOUT;
+
+		/* for the first descriptor we can be more accurate */
+		if (desc_first->lli.dscr == dscr)
+			return atc_calc_bytes_left(ret, ctrla);
+
+		ret -= desc_first->len;
+		list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
+			if (desc->lli.dscr == dscr)
+				break;
+
+			ret -= desc->len;
+		}
+
+		/*
+		 * For the current descriptor in the chain we can calculate
+		 * the remaining bytes using the channel's register.
+		 */
+		ret = atc_calc_bytes_left(ret, ctrla);
+	} else {
+		/* single transfer */
+		ctrla = channel_readl(atchan, CTRLA);
+		ret = atc_calc_bytes_left(ret, ctrla);
+	}
+
+	return ret;
+}
+
+/**
+ * atc_chain_complete - finish work for one transaction chain
+ * @atchan: channel we work on
+ * @desc: descriptor at the head of the chain we want do complete
+ *
+ * Called with atchan->lock held and bh disabled */
+static void
+atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
+{
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct at_dma			*atdma = to_at_dma(atchan->chan_common.device);
+
+	dev_vdbg(chan2dev(&atchan->chan_common),
+		"descriptor %u complete\n", txd->cookie);
+
+	/* mark the descriptor as complete for non cyclic cases only */
+	if (!atc_chan_is_cyclic(atchan))
+		dma_cookie_complete(txd);
+
+	/* If the transfer was a memset, free our temporary buffer */
+	if (desc->memset_buffer) {
+		dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+			      desc->memset_paddr);
+		desc->memset_buffer = false;
+	}
+
+	/* move children to free_list */
+	list_splice_init(&desc->tx_list, &atchan->free_list);
+	/* move myself to free_list */
+	list_move(&desc->desc_node, &atchan->free_list);
+
+	dma_descriptor_unmap(txd);
+	/* for cyclic transfers,
+	 * no need to replay callback function while stopping */
+	if (!atc_chan_is_cyclic(atchan)) {
+		/*
+		 * The API requires that no submissions are done from a
+		 * callback, so we don't need to drop the lock here
+		 */
+		dmaengine_desc_get_callback_invoke(txd, NULL);
+	}
+
+	dma_run_dependencies(txd);
+}
+
+/**
+ * atc_complete_all - finish work for all transactions
+ * @atchan: channel to complete transactions for
+ *
+ * Eventually submit queued descriptors if any
+ *
+ * Assume channel is idle while calling this function
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_complete_all(struct at_dma_chan *atchan)
+{
+	struct at_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n");
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	if (!list_empty(&atchan->queue))
+		atc_dostart(atchan, atc_first_queued(atchan));
+	/* empty active_list now it is completed */
+	list_splice_init(&atchan->active_list, &list);
+	/* empty queue list by moving descriptors (if any) to active_list */
+	list_splice_init(&atchan->queue, &atchan->active_list);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		atc_chain_complete(atchan, desc);
+}
+
+/**
+ * atc_advance_work - at the end of a transaction, move forward
+ * @atchan: channel where the transaction ended
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_advance_work(struct at_dma_chan *atchan)
+{
+	dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+
+	if (atc_chan_is_enabled(atchan))
+		return;
+
+	if (list_empty(&atchan->active_list) ||
+	    list_is_singular(&atchan->active_list)) {
+		atc_complete_all(atchan);
+	} else {
+		atc_chain_complete(atchan, atc_first_active(atchan));
+		/* advance work */
+		atc_dostart(atchan, atc_first_active(atchan));
+	}
+}
+
+
+/**
+ * atc_handle_error - handle errors reported by DMA controller
+ * @atchan: channel where error occurs
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_error(struct at_dma_chan *atchan)
+{
+	struct at_desc *bad_desc;
+	struct at_desc *child;
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * broked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	bad_desc = atc_first_active(atchan);
+	list_del_init(&bad_desc->desc_node);
+
+	/* As we are stopped, take advantage to push queued descriptors
+	 * in active_list */
+	list_splice_init(&atchan->queue, atchan->active_list.prev);
+
+	/* Try to restart the controller */
+	if (!list_empty(&atchan->active_list))
+		atc_dostart(atchan, atc_first_active(atchan));
+
+	/*
+	 * KERN_CRITICAL may seem harsh, but since this only happens
+	 * when someone submits a bad physical address in a
+	 * descriptor, we should consider ourselves lucky that the
+	 * controller flagged an error instead of scribbling over
+	 * random memory locations.
+	 */
+	dev_crit(chan2dev(&atchan->chan_common),
+			"Bad descriptor submitted for DMA!\n");
+	dev_crit(chan2dev(&atchan->chan_common),
+			"  cookie: %d\n", bad_desc->txd.cookie);
+	atc_dump_lli(atchan, &bad_desc->lli);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		atc_dump_lli(atchan, &child->lli);
+
+	/* Pretend the descriptor completed successfully */
+	atc_chain_complete(atchan, bad_desc);
+}
+
+/**
+ * atc_handle_cyclic - at the end of a period, run callback function
+ * @atchan: channel used for cyclic operations
+ *
+ * Called with atchan->lock held and bh disabled
+ */
+static void atc_handle_cyclic(struct at_dma_chan *atchan)
+{
+	struct at_desc			*first = atc_first_active(atchan);
+	struct dma_async_tx_descriptor	*txd = &first->txd;
+
+	dev_vdbg(chan2dev(&atchan->chan_common),
+			"new cyclic period llp 0x%08x\n",
+			channel_readl(atchan, DSCR));
+
+	dmaengine_desc_get_callback_invoke(txd, NULL);
+}
+
+/*--  IRQ & Tasklet  ---------------------------------------------------*/
+
+static void atc_tasklet(unsigned long data)
+{
+	struct at_dma_chan *atchan = (struct at_dma_chan *)data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status))
+		atc_handle_error(atchan);
+	else if (atc_chan_is_cyclic(atchan))
+		atc_handle_cyclic(atchan);
+	else
+		atc_advance_work(atchan);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
+{
+	struct at_dma		*atdma = (struct at_dma *)dev_id;
+	struct at_dma_chan	*atchan;
+	int			i;
+	u32			status, pending, imr;
+	int			ret = IRQ_NONE;
+
+	do {
+		imr = dma_readl(atdma, EBCIMR);
+		status = dma_readl(atdma, EBCISR);
+		pending = status & imr;
+
+		if (!pending)
+			break;
+
+		dev_vdbg(atdma->dma_common.dev,
+			"interrupt: status = 0x%08x, 0x%08x, 0x%08x\n",
+			 status, imr, pending);
+
+		for (i = 0; i < atdma->dma_common.chancnt; i++) {
+			atchan = &atdma->chan[i];
+			if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
+				if (pending & AT_DMA_ERR(i)) {
+					/* Disable channel on AHB error */
+					dma_writel(atdma, CHDR,
+						AT_DMA_RES(i) | atchan->mask);
+					/* Give information to tasklet */
+					set_bit(ATC_IS_ERROR, &atchan->status);
+				}
+				tasklet_schedule(&atchan->tasklet);
+				ret = IRQ_HANDLED;
+			}
+		}
+
+	} while (pending);
+
+	return ret;
+}
+
+
+/*--  DMA Engine API  --------------------------------------------------*/
+
+/**
+ * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
+ * @desc: descriptor at the head of the transaction chain
+ *
+ * Queue chain if DMA engine is working already
+ *
+ * Cookie increment and adding to active_list or queue must be atomic
+ */
+static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct at_desc		*desc = txd_to_at_desc(tx);
+	struct at_dma_chan	*atchan = to_at_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&atchan->active_list)) {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n",
+				desc->txd.cookie);
+		atc_dostart(atchan, desc);
+		list_add_tail(&desc->desc_node, &atchan->active_list);
+	} else {
+		dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
+				desc->txd.cookie);
+		list_add_tail(&desc->desc_node, &atchan->queue);
+	}
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
+ * @chan: the channel to prepare operation on
+ * @xt: Interleaved transfer template
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_interleaved(struct dma_chan *chan,
+			 struct dma_interleaved_template *xt,
+			 unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct data_chunk	*first;
+	struct at_desc		*desc = NULL;
+	size_t			xfer_count;
+	unsigned int		dwidth;
+	u32			ctrla;
+	u32			ctrlb;
+	size_t			len = 0;
+	int			i;
+
+	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+		return NULL;
+
+	first = xt->sgl;
+
+	dev_info(chan2dev(chan),
+		 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start, xt->numf,
+		xt->frame_size, flags);
+
+	/*
+	 * The controller can only "skip" X bytes every Y bytes, so we
+	 * need to make sure we are given a template that fit that
+	 * description, ie a template with chunks that always have the
+	 * same size, with the same ICGs.
+	 */
+	for (i = 0; i < xt->frame_size; i++) {
+		struct data_chunk *chunk = xt->sgl + i;
+
+		if ((chunk->size != xt->sgl->size) ||
+		    (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) ||
+		    (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) {
+			dev_err(chan2dev(chan),
+				"%s: the controller can transfer only identical chunks\n",
+				__func__);
+			return NULL;
+		}
+
+		len += chunk->size;
+	}
+
+	dwidth = atc_get_xfer_width(xt->src_start,
+				    xt->dst_start, len);
+
+	xfer_count = len >> dwidth;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+		return NULL;
+	}
+
+	ctrla = ATC_SRC_WIDTH(dwidth) |
+		ATC_DST_WIDTH(dwidth);
+
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_SRC_PIP
+		| ATC_DST_PIP
+		| ATC_FC_MEM2MEM;
+
+	/* create the transfer */
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan),
+			"%s: couldn't allocate our descriptor\n", __func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = xt->src_start;
+	desc->lli.daddr = xt->dst_start;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->boundary = first->size >> dwidth;
+	desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
+	desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = desc->len = len;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags; /* client is in control of this ack */
+
+	return &desc->txd;
+}
+
+/**
+ * atc_prep_dma_memcpy - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @src: operation virtual source address
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_desc		*desc = NULL;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	size_t			xfer_count;
+	size_t			offset;
+	unsigned int		src_width;
+	unsigned int		dst_width;
+	u32			ctrla;
+	u32			ctrlb;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+			&dest, &src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_FC_MEM2MEM;
+
+	/*
+	 * We can be a lot more clever here, but this should take care
+	 * of the most common optimization.
+	 */
+	src_width = dst_width = atc_get_xfer_width(src, dest, len);
+
+	ctrla = ATC_SRC_WIDTH(src_width) |
+		ATC_DST_WIDTH(dst_width);
+
+	for (offset = 0; offset < len; offset += xfer_count << src_width) {
+		xfer_count = min_t(size_t, (len - offset) >> src_width,
+				ATC_BTSIZE_MAX);
+
+		desc = atc_desc_get(atchan);
+		if (!desc)
+			goto err_desc_get;
+
+		desc->lli.saddr = src + offset;
+		desc->lli.daddr = dest + offset;
+		desc->lli.ctrla = ctrla | xfer_count;
+		desc->lli.ctrlb = ctrlb;
+
+		desc->txd.cookie = 0;
+		desc->len = xfer_count << src_width;
+
+		atc_desc_chain(&first, &prev, desc);
+	}
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->total_len = len;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	first->txd.flags = flags; /* client is in control of this ack */
+
+	return &first->txd;
+
+err_desc_get:
+	atc_desc_put(atchan, first);
+	return NULL;
+}
+
+static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
+					      dma_addr_t psrc,
+					      dma_addr_t pdst,
+					      size_t len)
+{
+	struct at_dma_chan *atchan = to_at_dma_chan(chan);
+	struct at_desc *desc;
+	size_t xfer_count;
+
+	u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2);
+	u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
+		ATC_SRC_ADDR_MODE_FIXED |
+		ATC_DST_ADDR_MODE_INCR |
+		ATC_FC_MEM2MEM;
+
+	xfer_count = len >> 2;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n",
+			__func__);
+		return NULL;
+	}
+
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan), "%s: can't get a descriptor\n",
+			__func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = psrc;
+	desc->lli.daddr = pdst;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->txd.cookie = 0;
+	desc->len = len;
+
+	return desc;
+}
+
+/**
+ * atc_prep_dma_memset - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @value: value to set memory buffer to
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+		    size_t len, unsigned long flags)
+{
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc;
+	void __iomem		*vaddr;
+	dma_addr_t		paddr;
+
+	dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
+		&dest, value, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+		return NULL;
+	}
+
+	if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+		dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n",
+			__func__);
+		return NULL;
+	}
+
+	vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr);
+	if (!vaddr) {
+		dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+			__func__);
+		return NULL;
+	}
+	*(u32*)vaddr = value;
+
+	desc = atc_create_memset_desc(chan, paddr, dest, len);
+	if (!desc) {
+		dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n",
+			__func__);
+		goto err_free_buffer;
+	}
+
+	desc->memset_paddr = paddr;
+	desc->memset_vaddr = vaddr;
+	desc->memset_buffer = true;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = len;
+
+	/* set end-of-link on the descriptor */
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+
+err_free_buffer:
+	dma_pool_free(atdma->memset_pool, vaddr, paddr);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset_sg(struct dma_chan *chan,
+		       struct scatterlist *sgl,
+		       unsigned int sg_len, int value,
+		       unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc = NULL, *first = NULL, *prev = NULL;
+	struct scatterlist	*sg;
+	void __iomem		*vaddr;
+	dma_addr_t		paddr;
+	size_t			total_len = 0;
+	int			i;
+
+	dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__,
+		 value, sg_len, flags);
+
+	if (unlikely(!sgl || !sg_len)) {
+		dev_dbg(chan2dev(chan), "%s: scatterlist is empty!\n",
+			__func__);
+		return NULL;
+	}
+
+	vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr);
+	if (!vaddr) {
+		dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+			__func__);
+		return NULL;
+	}
+	*(u32*)vaddr = value;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t dest = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n",
+			 __func__, &dest, len);
+
+		if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+			dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
+				__func__);
+			goto err_put_desc;
+		}
+
+		desc = atc_create_memset_desc(chan, paddr, dest, len);
+		if (!desc)
+			goto err_put_desc;
+
+		atc_desc_chain(&first, &prev, desc);
+
+		total_len += len;
+	}
+
+	/*
+	 * Only set the buffer pointers on the last descriptor to
+	 * avoid free'ing while we have our transfer still going
+	 */
+	desc->memset_paddr = paddr;
+	desc->memset_vaddr = vaddr;
+	desc->memset_buffer = true;
+
+	first->txd.cookie = -EBUSY;
+	first->total_len = total_len;
+
+	/* set end-of-link on the descriptor */
+	set_desc_eol(desc);
+
+	first->txd.flags = flags;
+
+	return &first->txd;
+
+err_put_desc:
+	atc_desc_put(atchan, first);
+	return NULL;
+}
+
+/**
+ * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @chan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: tx descriptor status flags
+ * @context: transaction context (ignored)
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	u32			ctrla;
+	u32			ctrlb;
+	dma_addr_t		reg;
+	unsigned int		reg_width;
+	unsigned int		mem_width;
+	unsigned int		i;
+	struct scatterlist	*sg;
+	size_t			total_len = 0;
+
+	dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n",
+			sg_len,
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+			flags);
+
+	if (unlikely(!atslave || !sg_len)) {
+		dev_dbg(chan2dev(chan), "prep_slave_sg: sg length is zero!\n");
+		return NULL;
+	}
+
+	ctrla =   ATC_SCSIZE(sconfig->src_maxburst)
+		| ATC_DCSIZE(sconfig->dst_maxburst);
+	ctrlb = ATC_IEN;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+		ctrla |=  ATC_DST_WIDTH(reg_width);
+		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
+			| ATC_SRC_ADDR_MODE_INCR
+			| ATC_FC_MEM2PER
+			| ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if);
+		reg = sconfig->dst_addr;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct at_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = atc_desc_get(atchan);
+			if (!desc)
+				goto err_desc_get;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+			if (unlikely(!len)) {
+				dev_dbg(chan2dev(chan),
+					"prep_slave_sg: sg(%d) data length is zero\n", i);
+				goto err;
+			}
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.saddr = mem;
+			desc->lli.daddr = reg;
+			desc->lli.ctrla = ctrla
+					| ATC_SRC_WIDTH(mem_width)
+					| len >> mem_width;
+			desc->lli.ctrlb = ctrlb;
+			desc->len = len;
+
+			atc_desc_chain(&first, &prev, desc);
+			total_len += len;
+		}
+		break;
+	case DMA_DEV_TO_MEM:
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+		ctrla |=  ATC_SRC_WIDTH(reg_width);
+		ctrlb |=  ATC_DST_ADDR_MODE_INCR
+			| ATC_SRC_ADDR_MODE_FIXED
+			| ATC_FC_PER2MEM
+			| ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if);
+
+		reg = sconfig->src_addr;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct at_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = atc_desc_get(atchan);
+			if (!desc)
+				goto err_desc_get;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+			if (unlikely(!len)) {
+				dev_dbg(chan2dev(chan),
+					"prep_slave_sg: sg(%d) data length is zero\n", i);
+				goto err;
+			}
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.saddr = reg;
+			desc->lli.daddr = mem;
+			desc->lli.ctrla = ctrla
+					| ATC_DST_WIDTH(mem_width)
+					| len >> reg_width;
+			desc->lli.ctrlb = ctrlb;
+			desc->len = len;
+
+			atc_desc_chain(&first, &prev, desc);
+			total_len += len;
+		}
+		break;
+	default:
+		return NULL;
+	}
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(prev);
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->total_len = total_len;
+
+	/* first link descriptor of list is responsible of flags */
+	first->txd.flags = flags; /* client is in control of this ack */
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "not enough descriptors available\n");
+err:
+	atc_desc_put(atchan, first);
+	return NULL;
+}
+
+/**
+ * atc_dma_cyclic_check_values
+ * Check for too big/unaligned periods and unaligned DMA buffer
+ */
+static int
+atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr,
+		size_t period_len)
+{
+	if (period_len > (ATC_BTSIZE_MAX << reg_width))
+		goto err_out;
+	if (unlikely(period_len & ((1 << reg_width) - 1)))
+		goto err_out;
+	if (unlikely(buf_addr & ((1 << reg_width) - 1)))
+		goto err_out;
+
+	return 0;
+
+err_out:
+	return -EINVAL;
+}
+
+/**
+ * atc_dma_cyclic_fill_desc - Fill one period descriptor
+ */
+static int
+atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
+		unsigned int period_index, dma_addr_t buf_addr,
+		unsigned int reg_width, size_t period_len,
+		enum dma_transfer_direction direction)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	u32			ctrla;
+
+	/* prepare common CRTLA value */
+	ctrla =   ATC_SCSIZE(sconfig->src_maxburst)
+		| ATC_DCSIZE(sconfig->dst_maxburst)
+		| ATC_DST_WIDTH(reg_width)
+		| ATC_SRC_WIDTH(reg_width)
+		| period_len >> reg_width;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		desc->lli.saddr = buf_addr + (period_len * period_index);
+		desc->lli.daddr = sconfig->dst_addr;
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
+				| ATC_SRC_ADDR_MODE_INCR
+				| ATC_FC_MEM2PER
+				| ATC_SIF(atchan->mem_if)
+				| ATC_DIF(atchan->per_if);
+		desc->len = period_len;
+		break;
+
+	case DMA_DEV_TO_MEM:
+		desc->lli.saddr = sconfig->src_addr;
+		desc->lli.daddr = buf_addr + (period_len * period_index);
+		desc->lli.ctrla = ctrla;
+		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
+				| ATC_SRC_ADDR_MODE_FIXED
+				| ATC_FC_PER2MEM
+				| ATC_SIF(atchan->per_if)
+				| ATC_DIF(atchan->mem_if);
+		desc->len = period_len;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * atc_prep_dma_cyclic - prepare the cyclic DMA transfer
+ * @chan: the DMA channel to prepare
+ * @buf_addr: physical DMA address where the buffer starts
+ * @buf_len: total number of bytes for the entire buffer
+ * @period_len: number of bytes for each period
+ * @direction: transfer direction, to or from device
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma_slave	*atslave = chan->private;
+	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
+	struct at_desc		*first = NULL;
+	struct at_desc		*prev = NULL;
+	unsigned long		was_cyclic;
+	unsigned int		reg_width;
+	unsigned int		periods = buf_len / period_len;
+	unsigned int		i;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n",
+			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
+			&buf_addr,
+			periods, buf_len, period_len);
+
+	if (unlikely(!atslave || !buf_len || !period_len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n");
+		return NULL;
+	}
+
+	was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status);
+	if (was_cyclic) {
+		dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n");
+		return NULL;
+	}
+
+	if (unlikely(!is_slave_direction(direction)))
+		goto err_out;
+
+	if (sconfig->direction == DMA_MEM_TO_DEV)
+		reg_width = convert_buswidth(sconfig->dst_addr_width);
+	else
+		reg_width = convert_buswidth(sconfig->src_addr_width);
+
+	/* Check for too big/unaligned periods and unaligned DMA buffer */
+	if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len))
+		goto err_out;
+
+	/* build cyclic linked list */
+	for (i = 0; i < periods; i++) {
+		struct at_desc	*desc;
+
+		desc = atc_desc_get(atchan);
+		if (!desc)
+			goto err_desc_get;
+
+		if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
+					     reg_width, period_len, direction))
+			goto err_desc_get;
+
+		atc_desc_chain(&first, &prev, desc);
+	}
+
+	/* lets make a cyclic list */
+	prev->lli.dscr = first->txd.phys;
+
+	/* First descriptor of the chain embedds additional information */
+	first->txd.cookie = -EBUSY;
+	first->total_len = buf_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "not enough descriptors available\n");
+	atc_desc_put(atchan, first);
+err_out:
+	clear_bit(ATC_IS_CYCLIC, &atchan->status);
+	return NULL;
+}
+
+static int atc_config(struct dma_chan *chan,
+		      struct dma_slave_config *sconfig)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	/* Check if it is chan is configured for slave transfers */
+	if (!chan->private)
+		return -EINVAL;
+
+	memcpy(&atchan->dma_sconfig, sconfig, sizeof(*sconfig));
+
+	convert_burst(&atchan->dma_sconfig.src_maxburst);
+	convert_burst(&atchan->dma_sconfig.dst_maxburst);
+
+	return 0;
+}
+
+static int atc_pause(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	int			chan_id = atchan->chan_common.chan_id;
+	unsigned long		flags;
+
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id));
+	set_bit(ATC_IS_PAUSED, &atchan->status);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+static int atc_resume(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	int			chan_id = atchan->chan_common.chan_id;
+	unsigned long		flags;
+
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	if (!atc_chan_is_paused(atchan))
+		return 0;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	dma_writel(atdma, CHDR, AT_DMA_RES(chan_id));
+	clear_bit(ATC_IS_PAUSED, &atchan->status);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+static int atc_terminate_all(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	int			chan_id = atchan->chan_common.chan_id;
+	struct at_desc		*desc, *_desc;
+	unsigned long		flags;
+
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	/*
+	 * This is only called when something went wrong elsewhere, so
+	 * we don't really care about the data. Just disable the
+	 * channel. We still have to poll the channel enable bit due
+	 * to AHB/HSB limitations.
+	 */
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	/* disabling channel: must also remove suspend state */
+	dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask);
+
+	/* confirm that this channel is disabled */
+	while (dma_readl(atdma, CHSR) & atchan->mask)
+		cpu_relax();
+
+	/* active_list entries will end up before queued entries */
+	list_splice_init(&atchan->queue, &list);
+	list_splice_init(&atchan->active_list, &list);
+
+	/* Flush all pending and queued descriptors */
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		atc_chain_complete(atchan, desc);
+
+	clear_bit(ATC_IS_PAUSED, &atchan->status);
+	/* if channel dedicated to cyclic operations, free it */
+	clear_bit(ATC_IS_CYCLIC, &atchan->status);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+/**
+ * atc_tx_status - poll for transaction completion
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
+ * @txstate: if not %NULL updated with transaction state
+ *
+ * If @txstate is passed in, upon return it reflect the driver
+ * internal state and can be used with dma_async_is_complete() to check
+ * the status of multiple cookies without re-checking hardware state.
+ */
+static enum dma_status
+atc_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	unsigned long		flags;
+	enum dma_status		ret;
+	int bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (!txstate)
+		return DMA_ERROR;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	/*  Get number of bytes left in the active transactions */
+	bytes = atc_get_bytes_left(chan, cookie);
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	if (unlikely(bytes < 0)) {
+		dev_vdbg(chan2dev(chan), "get residual bytes error\n");
+		return DMA_ERROR;
+	} else {
+		dma_set_residue(txstate, bytes);
+	}
+
+	dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n",
+		 ret, cookie, bytes);
+
+	return ret;
+}
+
+/**
+ * atc_issue_pending - try to finish work
+ * @chan: target DMA channel
+ */
+static void atc_issue_pending(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	unsigned long		flags;
+
+	dev_vdbg(chan2dev(chan), "issue_pending\n");
+
+	/* Not needed for cyclic transfers */
+	if (atc_chan_is_cyclic(atchan))
+		return;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	atc_advance_work(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+/**
+ * atc_alloc_chan_resources - allocate resources for DMA channel
+ * @chan: allocate descriptor resources for this channel
+ * @client: current client requesting the channel be ready for requests
+ *
+ * return - the number of allocated descriptors
+ */
+static int atc_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc;
+	struct at_dma_slave	*atslave;
+	unsigned long		flags;
+	int			i;
+	u32			cfg;
+	LIST_HEAD(tmp_list);
+
+	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+	/* ASSERT:  channel is idle */
+	if (atc_chan_is_enabled(atchan)) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+		return -EIO;
+	}
+
+	cfg = ATC_DEFAULT_CFG;
+
+	atslave = chan->private;
+	if (atslave) {
+		/*
+		 * We need controller-specific data to set up slave
+		 * transfers.
+		 */
+		BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev);
+
+		/* if cfg configuration specified take it instead of default */
+		if (atslave->cfg)
+			cfg = atslave->cfg;
+	}
+
+	/* have we already been set up?
+	 * reconfigure channel but no need to reallocate descriptors */
+	if (!list_empty(&atchan->free_list))
+		return atchan->descs_allocated;
+
+	/* Allocate initial pool of descriptors */
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = atc_alloc_descriptor(chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(atdma->dma_common.dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	atchan->descs_allocated = i;
+	list_splice(&tmp_list, &atchan->free_list);
+	dma_cookie_init(chan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	/* channel parameters */
+	channel_writel(atchan, CFG, cfg);
+
+	dev_dbg(chan2dev(chan),
+		"alloc_chan_resources: allocated %d descriptors\n",
+		atchan->descs_allocated);
+
+	return atchan->descs_allocated;
+}
+
+/**
+ * atc_free_chan_resources - free all channel resources
+ * @chan: DMA channel
+ */
+static void atc_free_chan_resources(struct dma_chan *chan)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "free_chan_resources: (descs allocated=%u)\n",
+		atchan->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&atchan->active_list));
+	BUG_ON(!list_empty(&atchan->queue));
+	BUG_ON(atc_chan_is_enabled(atchan));
+
+	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
+		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
+		list_del(&desc->desc_node);
+		/* free link descriptor */
+		dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys);
+	}
+	list_splice_init(&atchan->free_list, &list);
+	atchan->descs_allocated = 0;
+	atchan->status = 0;
+
+	/*
+	 * Free atslave allocated in at_dma_xlate()
+	 */
+	kfree(chan->private);
+	chan->private = NULL;
+
+	dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
+}
+
+#ifdef CONFIG_OF
+static bool at_dma_filter(struct dma_chan *chan, void *slave)
+{
+	struct at_dma_slave *atslave = slave;
+
+	if (atslave->dma_dev == chan->device->dev) {
+		chan->private = atslave;
+		return true;
+	} else {
+		return false;
+	}
+}
+
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+				     struct of_dma *of_dma)
+{
+	struct dma_chan *chan;
+	struct at_dma_chan *atchan;
+	struct at_dma_slave *atslave;
+	dma_cap_mask_t mask;
+	unsigned int per_id;
+	struct platform_device *dmac_pdev;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	dmac_pdev = of_find_device_by_node(dma_spec->np);
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	atslave = kzalloc(sizeof(*atslave), GFP_KERNEL);
+	if (!atslave)
+		return NULL;
+
+	atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW;
+	/*
+	 * We can fill both SRC_PER and DST_PER, one of these fields will be
+	 * ignored depending on DMA transfer direction.
+	 */
+	per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK;
+	atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id)
+		     | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id);
+	/*
+	 * We have to translate the value we get from the device tree since
+	 * the half FIFO configuration value had to be 0 to keep backward
+	 * compatibility.
+	 */
+	switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) {
+	case AT91_DMA_CFG_FIFOCFG_ALAP:
+		atslave->cfg |= ATC_FIFOCFG_LARGESTBURST;
+		break;
+	case AT91_DMA_CFG_FIFOCFG_ASAP:
+		atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE;
+		break;
+	case AT91_DMA_CFG_FIFOCFG_HALF:
+	default:
+		atslave->cfg |= ATC_FIFOCFG_HALFFIFO;
+	}
+	atslave->dma_dev = &dmac_pdev->dev;
+
+	chan = dma_request_channel(mask, at_dma_filter, atslave);
+	if (!chan)
+		return NULL;
+
+	atchan = to_at_dma_chan(chan);
+	atchan->per_if = dma_spec->args[0] & 0xff;
+	atchan->mem_if = (dma_spec->args[0] >> 16) & 0xff;
+
+	return chan;
+}
+#else
+static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
+				     struct of_dma *of_dma)
+{
+	return NULL;
+}
+#endif
+
+/*--  Module Management  -----------------------------------------------*/
+
+/* cap_mask is a multi-u32 bitfield, fill it with proper C code. */
+static struct at_dma_platform_data at91sam9rl_config = {
+	.nr_channels = 2,
+};
+static struct at_dma_platform_data at91sam9g45_config = {
+	.nr_channels = 8,
+};
+
+#if defined(CONFIG_OF)
+static const struct of_device_id atmel_dma_dt_ids[] = {
+	{
+		.compatible = "atmel,at91sam9rl-dma",
+		.data = &at91sam9rl_config,
+	}, {
+		.compatible = "atmel,at91sam9g45-dma",
+		.data = &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+MODULE_DEVICE_TABLE(of, atmel_dma_dt_ids);
+#endif
+
+static const struct platform_device_id atdma_devtypes[] = {
+	{
+		.name = "at91sam9rl_dma",
+		.driver_data = (unsigned long) &at91sam9rl_config,
+	}, {
+		.name = "at91sam9g45_dma",
+		.driver_data = (unsigned long) &at91sam9g45_config,
+	}, {
+		/* sentinel */
+	}
+};
+
+static inline const struct at_dma_platform_data * __init at_dma_get_driver_data(
+						struct platform_device *pdev)
+{
+	if (pdev->dev.of_node) {
+		const struct of_device_id *match;
+		match = of_match_node(atmel_dma_dt_ids, pdev->dev.of_node);
+		if (match == NULL)
+			return NULL;
+		return match->data;
+	}
+	return (struct at_dma_platform_data *)
+			platform_get_device_id(pdev)->driver_data;
+}
+
+/**
+ * at_dma_off - disable DMA controller
+ * @atdma: the Atmel HDAMC device
+ */
+static void at_dma_off(struct at_dma *atdma)
+{
+	dma_writel(atdma, EN, 0);
+
+	/* disable all interrupts */
+	dma_writel(atdma, EBCIDR, -1L);
+
+	/* confirm that all channels are disabled */
+	while (dma_readl(atdma, CHSR) & atdma->all_chan_mask)
+		cpu_relax();
+}
+
+static int __init at_dma_probe(struct platform_device *pdev)
+{
+	struct resource		*io;
+	struct at_dma		*atdma;
+	size_t			size;
+	int			irq;
+	int			err;
+	int			i;
+	const struct at_dma_platform_data *plat_dat;
+
+	/* setup platform data for each SoC */
+	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+
+	/* get DMA parameters from controller type */
+	plat_dat = at_dma_get_driver_data(pdev);
+	if (!plat_dat)
+		return -ENODEV;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	size = sizeof(struct at_dma);
+	size += plat_dat->nr_channels * sizeof(struct at_dma_chan);
+	atdma = kzalloc(size, GFP_KERNEL);
+	if (!atdma)
+		return -ENOMEM;
+
+	/* discover transaction capabilities */
+	atdma->dma_common.cap_mask = plat_dat->cap_mask;
+	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
+
+	size = resource_size(io);
+	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
+		err = -EBUSY;
+		goto err_kfree;
+	}
+
+	atdma->regs = ioremap(io->start, size);
+	if (!atdma->regs) {
+		err = -ENOMEM;
+		goto err_release_r;
+	}
+
+	atdma->clk = clk_get(&pdev->dev, "dma_clk");
+	if (IS_ERR(atdma->clk)) {
+		err = PTR_ERR(atdma->clk);
+		goto err_clk;
+	}
+	err = clk_prepare_enable(atdma->clk);
+	if (err)
+		goto err_clk_prepare;
+
+	/* force dma off, just in case */
+	at_dma_off(atdma);
+
+	err = request_irq(irq, at_dma_interrupt, 0, "at_hdmac", atdma);
+	if (err)
+		goto err_irq;
+
+	platform_set_drvdata(pdev, atdma);
+
+	/* create a pool of consistent memory blocks for hardware descriptors */
+	atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool",
+			&pdev->dev, sizeof(struct at_desc),
+			4 /* word alignment */, 0);
+	if (!atdma->dma_desc_pool) {
+		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+		err = -ENOMEM;
+		goto err_desc_pool_create;
+	}
+
+	/* create a pool of consistent memory blocks for memset blocks */
+	atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool",
+					     &pdev->dev, sizeof(int), 4, 0);
+	if (!atdma->memset_pool) {
+		dev_err(&pdev->dev, "No memory for memset dma pool\n");
+		err = -ENOMEM;
+		goto err_memset_pool_create;
+	}
+
+	/* clear any pending interrupt */
+	while (dma_readl(atdma, EBCISR))
+		cpu_relax();
+
+	/* initialize channels related values */
+	INIT_LIST_HEAD(&atdma->dma_common.channels);
+	for (i = 0; i < plat_dat->nr_channels; i++) {
+		struct at_dma_chan	*atchan = &atdma->chan[i];
+
+		atchan->mem_if = AT_DMA_MEM_IF;
+		atchan->per_if = AT_DMA_PER_IF;
+		atchan->chan_common.device = &atdma->dma_common;
+		dma_cookie_init(&atchan->chan_common);
+		list_add_tail(&atchan->chan_common.device_node,
+				&atdma->dma_common.channels);
+
+		atchan->ch_regs = atdma->regs + ch_regs(i);
+		spin_lock_init(&atchan->lock);
+		atchan->mask = 1 << i;
+
+		INIT_LIST_HEAD(&atchan->active_list);
+		INIT_LIST_HEAD(&atchan->queue);
+		INIT_LIST_HEAD(&atchan->free_list);
+
+		tasklet_init(&atchan->tasklet, atc_tasklet,
+				(unsigned long)atchan);
+		atc_enable_chan_irq(atdma, i);
+	}
+
+	/* set base routines */
+	atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources;
+	atdma->dma_common.device_free_chan_resources = atc_free_chan_resources;
+	atdma->dma_common.device_tx_status = atc_tx_status;
+	atdma->dma_common.device_issue_pending = atc_issue_pending;
+	atdma->dma_common.dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+
+	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+
+	if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) {
+		atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset;
+		atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg;
+		atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES;
+	}
+
+	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
+		atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+		/* controller can do slave DMA: can trigger cyclic transfers */
+		dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask);
+		atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic;
+		atdma->dma_common.device_config = atc_config;
+		atdma->dma_common.device_pause = atc_pause;
+		atdma->dma_common.device_resume = atc_resume;
+		atdma->dma_common.device_terminate_all = atc_terminate_all;
+		atdma->dma_common.src_addr_widths = ATC_DMA_BUSWIDTHS;
+		atdma->dma_common.dst_addr_widths = ATC_DMA_BUSWIDTHS;
+		atdma->dma_common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+		atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	}
+
+	dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+	dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
+	  dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "",
+	  dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
+	  plat_dat->nr_channels);
+
+	dma_async_device_register(&atdma->dma_common);
+
+	/*
+	 * Do not return an error if the dmac node is not present in order to
+	 * not break the existing way of requesting channel with
+	 * dma_request_channel().
+	 */
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+						 at_dma_xlate, atdma);
+		if (err) {
+			dev_err(&pdev->dev, "could not register of_dma_controller\n");
+			goto err_of_dma_controller_register;
+		}
+	}
+
+	return 0;
+
+err_of_dma_controller_register:
+	dma_async_device_unregister(&atdma->dma_common);
+	dma_pool_destroy(atdma->memset_pool);
+err_memset_pool_create:
+	dma_pool_destroy(atdma->dma_desc_pool);
+err_desc_pool_create:
+	free_irq(platform_get_irq(pdev, 0), atdma);
+err_irq:
+	clk_disable_unprepare(atdma->clk);
+err_clk_prepare:
+	clk_put(atdma->clk);
+err_clk:
+	iounmap(atdma->regs);
+	atdma->regs = NULL;
+err_release_r:
+	release_mem_region(io->start, size);
+err_kfree:
+	kfree(atdma);
+	return err;
+}
+
+static int at_dma_remove(struct platform_device *pdev)
+{
+	struct at_dma		*atdma = platform_get_drvdata(pdev);
+	struct dma_chan		*chan, *_chan;
+	struct resource		*io;
+
+	at_dma_off(atdma);
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&atdma->dma_common);
+
+	dma_pool_destroy(atdma->memset_pool);
+	dma_pool_destroy(atdma->dma_desc_pool);
+	free_irq(platform_get_irq(pdev, 0), atdma);
+
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+
+		/* Disable interrupts */
+		atc_disable_chan_irq(atdma, chan->chan_id);
+
+		tasklet_kill(&atchan->tasklet);
+		list_del(&chan->device_node);
+	}
+
+	clk_disable_unprepare(atdma->clk);
+	clk_put(atdma->clk);
+
+	iounmap(atdma->regs);
+	atdma->regs = NULL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(io->start, resource_size(io));
+
+	kfree(atdma);
+
+	return 0;
+}
+
+static void at_dma_shutdown(struct platform_device *pdev)
+{
+	struct at_dma	*atdma = platform_get_drvdata(pdev);
+
+	at_dma_off(platform_get_drvdata(pdev));
+	clk_disable_unprepare(atdma->clk);
+}
+
+static int at_dma_prepare(struct device *dev)
+{
+	struct at_dma *atdma = dev_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+		/* wait for transaction completion (except in cyclic case) */
+		if (atc_chan_is_enabled(atchan) && !atc_chan_is_cyclic(atchan))
+			return -EAGAIN;
+	}
+	return 0;
+}
+
+static void atc_suspend_cyclic(struct at_dma_chan *atchan)
+{
+	struct dma_chan	*chan = &atchan->chan_common;
+
+	/* Channel should be paused by user
+	 * do it anyway even if it is not done already */
+	if (!atc_chan_is_paused(atchan)) {
+		dev_warn(chan2dev(chan),
+		"cyclic channel not paused, should be done by channel user\n");
+		atc_pause(chan);
+	}
+
+	/* now preserve additional data for cyclic operations */
+	/* next descriptor address in the cyclic list */
+	atchan->save_dscr = channel_readl(atchan, DSCR);
+
+	vdbg_dump_regs(atchan);
+}
+
+static int at_dma_suspend_noirq(struct device *dev)
+{
+	struct at_dma *atdma = dev_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+
+	/* preserve data */
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+		if (atc_chan_is_cyclic(atchan))
+			atc_suspend_cyclic(atchan);
+		atchan->save_cfg = channel_readl(atchan, CFG);
+	}
+	atdma->save_imr = dma_readl(atdma, EBCIMR);
+
+	/* disable DMA controller */
+	at_dma_off(atdma);
+	clk_disable_unprepare(atdma->clk);
+	return 0;
+}
+
+static void atc_resume_cyclic(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	/* restore channel status for cyclic descriptors list:
+	 * next descriptor in the cyclic list at the time of suspend */
+	channel_writel(atchan, SADDR, 0);
+	channel_writel(atchan, DADDR, 0);
+	channel_writel(atchan, CTRLA, 0);
+	channel_writel(atchan, CTRLB, 0);
+	channel_writel(atchan, DSCR, atchan->save_dscr);
+	dma_writel(atdma, CHER, atchan->mask);
+
+	/* channel pause status should be removed by channel user
+	 * We cannot take the initiative to do it here */
+
+	vdbg_dump_regs(atchan);
+}
+
+static int at_dma_resume_noirq(struct device *dev)
+{
+	struct at_dma *atdma = dev_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+
+	/* bring back DMA controller */
+	clk_prepare_enable(atdma->clk);
+	dma_writel(atdma, EN, AT_DMA_ENABLE);
+
+	/* clear any pending interrupt */
+	while (dma_readl(atdma, EBCISR))
+		cpu_relax();
+
+	/* restore saved data */
+	dma_writel(atdma, EBCIER, atdma->save_imr);
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+			device_node) {
+		struct at_dma_chan *atchan = to_at_dma_chan(chan);
+
+		channel_writel(atchan, CFG, atchan->save_cfg);
+		if (atc_chan_is_cyclic(atchan))
+			atc_resume_cyclic(atchan);
+	}
+	return 0;
+}
+
+static const struct dev_pm_ops at_dma_dev_pm_ops = {
+	.prepare = at_dma_prepare,
+	.suspend_noirq = at_dma_suspend_noirq,
+	.resume_noirq = at_dma_resume_noirq,
+};
+
+static struct platform_driver at_dma_driver = {
+	.remove		= at_dma_remove,
+	.shutdown	= at_dma_shutdown,
+	.id_table	= atdma_devtypes,
+	.driver = {
+		.name	= "at_hdmac",
+		.pm	= &at_dma_dev_pm_ops,
+		.of_match_table	= of_match_ptr(atmel_dma_dt_ids),
+	},
+};
+
+static int __init at_dma_init(void)
+{
+	return platform_driver_probe(&at_dma_driver, at_dma_probe);
+}
+subsys_initcall(at_dma_init);
+
+static void __exit at_dma_exit(void)
+{
+	platform_driver_unregister(&at_dma_driver);
+}
+module_exit(at_dma_exit);
+
+MODULE_DESCRIPTION("Atmel AHB DMA Controller driver");
+MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:at_hdmac");
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
new file mode 100644
index 0000000..ef3f227
--- /dev/null
+++ b/drivers/dma/at_hdmac_regs.h
@@ -0,0 +1,462 @@
+/*
+ * Header file for the Atmel AHB DMA Controller driver
+ *
+ * Copyright (C) 2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef AT_HDMAC_REGS_H
+#define	AT_HDMAC_REGS_H
+
+#include <linux/platform_data/dma-atmel.h>
+
+#define	AT_DMA_MAX_NR_CHANNELS	8
+
+
+#define	AT_DMA_GCFG	0x00	/* Global Configuration Register */
+#define		AT_DMA_IF_BIGEND(i)	(0x1 << (i))	/* AHB-Lite Interface i in Big-endian mode */
+#define		AT_DMA_ARB_CFG	(0x1 << 4)	/* Arbiter mode. */
+#define			AT_DMA_ARB_CFG_FIXED		(0x0 << 4)
+#define			AT_DMA_ARB_CFG_ROUND_ROBIN	(0x1 << 4)
+
+#define	AT_DMA_EN	0x04	/* Controller Enable Register */
+#define		AT_DMA_ENABLE	(0x1 << 0)
+
+#define	AT_DMA_SREQ	0x08	/* Software Single Request Register */
+#define		AT_DMA_SSREQ(x)	(0x1 << ((x) << 1))		/* Request a source single transfer on channel x */
+#define		AT_DMA_DSREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination single transfer on channel x */
+
+#define	AT_DMA_CREQ	0x0C	/* Software Chunk Transfer Request Register */
+#define		AT_DMA_SCREQ(x)	(0x1 << ((x) << 1))		/* Request a source chunk transfer on channel x */
+#define		AT_DMA_DCREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination chunk transfer on channel x */
+
+#define	AT_DMA_LAST	0x10	/* Software Last Transfer Flag Register */
+#define		AT_DMA_SLAST(x)	(0x1 << ((x) << 1))		/* This src rq is last tx of buffer on channel x */
+#define		AT_DMA_DLAST(x)	(0x1 << (1 + ((x) << 1)))	/* This dst rq is last tx of buffer on channel x */
+
+#define	AT_DMA_SYNC	0x14	/* Request Synchronization Register */
+#define		AT_DMA_SYR(h)	(0x1 << (h))			/* Synchronize handshake line h */
+
+/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
+#define	AT_DMA_EBCIER	0x18	/* Enable register */
+#define	AT_DMA_EBCIDR	0x1C	/* Disable register */
+#define	AT_DMA_EBCIMR	0x20	/* Mask Register */
+#define	AT_DMA_EBCISR	0x24	/* Status Register */
+#define		AT_DMA_CBTC_OFFSET	8
+#define		AT_DMA_ERR_OFFSET	16
+#define		AT_DMA_BTC(x)	(0x1 << (x))
+#define		AT_DMA_CBTC(x)	(0x1 << (AT_DMA_CBTC_OFFSET + (x)))
+#define		AT_DMA_ERR(x)	(0x1 << (AT_DMA_ERR_OFFSET + (x)))
+
+#define	AT_DMA_CHER	0x28	/* Channel Handler Enable Register */
+#define		AT_DMA_ENA(x)	(0x1 << (x))
+#define		AT_DMA_SUSP(x)	(0x1 << ( 8 + (x)))
+#define		AT_DMA_KEEP(x)	(0x1 << (24 + (x)))
+
+#define	AT_DMA_CHDR	0x2C	/* Channel Handler Disable Register */
+#define		AT_DMA_DIS(x)	(0x1 << (x))
+#define		AT_DMA_RES(x)	(0x1 << ( 8 + (x)))
+
+#define	AT_DMA_CHSR	0x30	/* Channel Handler Status Register */
+#define		AT_DMA_EMPT(x)	(0x1 << (16 + (x)))
+#define		AT_DMA_STAL(x)	(0x1 << (24 + (x)))
+
+
+#define	AT_DMA_CH_REGS_BASE	0x3C	/* Channel registers base address */
+#define	ch_regs(x)	(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+
+/* Hardware register offset for each channel */
+#define	ATC_SADDR_OFFSET	0x00	/* Source Address Register */
+#define	ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
+#define	ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
+#define	ATC_CTRLA_OFFSET	0x0C	/* Control A Register */
+#define	ATC_CTRLB_OFFSET	0x10	/* Control B Register */
+#define	ATC_CFG_OFFSET		0x14	/* Configuration Register */
+#define	ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
+#define	ATC_DPIP_OFFSET		0x1C	/* Dst PIP Configuration Register */
+
+
+/* Bitfield definitions */
+
+/* Bitfields in DSCR */
+#define	ATC_DSCR_IF(i)		(0x3 & (i))	/* Dsc feched via AHB-Lite Interface i */
+
+/* Bitfields in CTRLA */
+#define	ATC_BTSIZE_MAX		0xFFFFUL	/* Maximum Buffer Transfer Size */
+#define	ATC_BTSIZE(x)		(ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
+#define	ATC_SCSIZE_MASK		(0x7 << 16)	/* Source Chunk Transfer Size */
+#define		ATC_SCSIZE(x)		(ATC_SCSIZE_MASK & ((x) << 16))
+#define		ATC_SCSIZE_1		(0x0 << 16)
+#define		ATC_SCSIZE_4		(0x1 << 16)
+#define		ATC_SCSIZE_8		(0x2 << 16)
+#define		ATC_SCSIZE_16		(0x3 << 16)
+#define		ATC_SCSIZE_32		(0x4 << 16)
+#define		ATC_SCSIZE_64		(0x5 << 16)
+#define		ATC_SCSIZE_128		(0x6 << 16)
+#define		ATC_SCSIZE_256		(0x7 << 16)
+#define	ATC_DCSIZE_MASK		(0x7 << 20)	/* Destination Chunk Transfer Size */
+#define		ATC_DCSIZE(x)		(ATC_DCSIZE_MASK & ((x) << 20))
+#define		ATC_DCSIZE_1		(0x0 << 20)
+#define		ATC_DCSIZE_4		(0x1 << 20)
+#define		ATC_DCSIZE_8		(0x2 << 20)
+#define		ATC_DCSIZE_16		(0x3 << 20)
+#define		ATC_DCSIZE_32		(0x4 << 20)
+#define		ATC_DCSIZE_64		(0x5 << 20)
+#define		ATC_DCSIZE_128		(0x6 << 20)
+#define		ATC_DCSIZE_256		(0x7 << 20)
+#define	ATC_SRC_WIDTH_MASK	(0x3 << 24)	/* Source Single Transfer Size */
+#define		ATC_SRC_WIDTH(x)	((x) << 24)
+#define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
+#define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
+#define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
+#define		ATC_REG_TO_SRC_WIDTH(r)	(((r) >> 24) & 0x3)
+#define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
+#define		ATC_DST_WIDTH(x)	((x) << 28)
+#define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
+#define		ATC_DST_WIDTH_HALFWORD	(0x1 << 28)
+#define		ATC_DST_WIDTH_WORD	(0x2 << 28)
+#define	ATC_DONE		(0x1 << 31)	/* Tx Done (only written back in descriptor) */
+
+/* Bitfields in CTRLB */
+#define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
+#define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
+				  /* Specify AHB interfaces */
+#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
+#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
+
+#define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
+#define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
+#define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
+#define	ATC_DST_DSCR_DIS	(0x1 << 20)	/* Dst Descriptor fetch disable */
+#define	ATC_FC_MASK		(0x7 << 21)	/* Choose Flow Controller */
+#define		ATC_FC_MEM2MEM		(0x0 << 21)	/* Mem-to-Mem (DMA) */
+#define		ATC_FC_MEM2PER		(0x1 << 21)	/* Mem-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM		(0x2 << 21)	/* Periph-to-Mem (DMA) */
+#define		ATC_FC_PER2PER		(0x3 << 21)	/* Periph-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM_PER	(0x4 << 21)	/* Periph-to-Mem (Peripheral) */
+#define		ATC_FC_MEM2PER_PER	(0x5 << 21)	/* Mem-to-Periph (Peripheral) */
+#define		ATC_FC_PER2PER_SRCPER	(0x6 << 21)	/* Periph-to-Periph (Src Peripheral) */
+#define		ATC_FC_PER2PER_DSTPER	(0x7 << 21)	/* Periph-to-Periph (Dst Peripheral) */
+#define	ATC_SRC_ADDR_MODE_MASK	(0x3 << 24)
+#define		ATC_SRC_ADDR_MODE_INCR	(0x0 << 24)	/* Incrementing Mode */
+#define		ATC_SRC_ADDR_MODE_DECR	(0x1 << 24)	/* Decrementing Mode */
+#define		ATC_SRC_ADDR_MODE_FIXED	(0x2 << 24)	/* Fixed Mode */
+#define	ATC_DST_ADDR_MODE_MASK	(0x3 << 28)
+#define		ATC_DST_ADDR_MODE_INCR	(0x0 << 28)	/* Incrementing Mode */
+#define		ATC_DST_ADDR_MODE_DECR	(0x1 << 28)	/* Decrementing Mode */
+#define		ATC_DST_ADDR_MODE_FIXED	(0x2 << 28)	/* Fixed Mode */
+#define	ATC_IEN			(0x1 << 30)	/* BTC interrupt enable (active low) */
+#define	ATC_AUTO		(0x1 << 31)	/* Auto multiple buffer tx enable */
+
+/* Bitfields in CFG */
+/* are in at_hdmac.h */
+
+/* Bitfields in SPIP */
+#define	ATC_SPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_SPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+/* Bitfields in DPIP */
+#define	ATC_DPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_DPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+
+/*--  descriptors  -----------------------------------------------------*/
+
+/* LLI == Linked List Item; aka DMA buffer descriptor */
+struct at_lli {
+	/* values that are not changed by hardware */
+	dma_addr_t	saddr;
+	dma_addr_t	daddr;
+	/* value that may get written back: */
+	u32		ctrla;
+	/* more values that are not changed by hardware */
+	u32		ctrlb;
+	dma_addr_t	dscr;	/* chain to next lli */
+};
+
+/**
+ * struct at_desc - software descriptor
+ * @at_lli: hardware lli structure
+ * @txd: support for the async_tx api
+ * @desc_node: node on the channed descriptors list
+ * @len: descriptor byte count
+ * @total_len: total transaction byte count
+ */
+struct at_desc {
+	/* FIRST values the hardware uses */
+	struct at_lli			lli;
+
+	/* THEN values for driver housekeeping */
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		desc_node;
+	size_t				len;
+	size_t				total_len;
+
+	/* Interleaved data */
+	size_t				boundary;
+	size_t				dst_hole;
+	size_t				src_hole;
+
+	/* Memset temporary buffer */
+	bool				memset_buffer;
+	dma_addr_t			memset_paddr;
+	int				*memset_vaddr;
+};
+
+static inline struct at_desc *
+txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_desc, txd);
+}
+
+
+/*--  Channels  --------------------------------------------------------*/
+
+/**
+ * atc_status - information bits stored in channel status flag
+ *
+ * Manipulated with atomic operations.
+ */
+enum atc_status {
+	ATC_IS_ERROR = 0,
+	ATC_IS_PAUSED = 1,
+	ATC_IS_CYCLIC = 24,
+};
+
+/**
+ * struct at_dma_chan - internal representation of an Atmel HDMAC channel
+ * @chan_common: common dmaengine channel object members
+ * @device: parent device
+ * @ch_regs: memory mapped register base
+ * @mask: channel index in a mask
+ * @per_if: peripheral interface
+ * @mem_if: memory interface
+ * @status: transmit status information from irq/prep* functions
+ *                to tasklet (use atomic operations)
+ * @tasklet: bottom half to finish transaction work
+ * @save_cfg: configuration register that is saved on suspend/resume cycle
+ * @save_dscr: for cyclic operations, preserve next descriptor address in
+ *             the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via
+ * .device_config
+ * @lock: serializes enqueue/dequeue operations to descriptors lists
+ * @active_list: list of descriptors dmaengine is being running on
+ * @queue: list of descriptors ready to be submitted to engine
+ * @free_list: list of descriptors usable by the channel
+ * @descs_allocated: records the actual size of the descriptor pool
+ */
+struct at_dma_chan {
+	struct dma_chan		chan_common;
+	struct at_dma		*device;
+	void __iomem		*ch_regs;
+	u8			mask;
+	u8			per_if;
+	u8			mem_if;
+	unsigned long		status;
+	struct tasklet_struct	tasklet;
+	u32			save_cfg;
+	u32			save_dscr;
+	struct dma_slave_config dma_sconfig;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+};
+
+#define	channel_readl(atchan, name) \
+	__raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
+
+#define	channel_writel(atchan, name, val) \
+	__raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_dma_chan, chan_common);
+}
+
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return 1;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return 2;
+	default:
+		/* For 1 byte width or fallback */
+		return 0;
+	}
+}
+
+/*--  Controller  ------------------------------------------------------*/
+
+/**
+ * struct at_dma - internal representation of an Atmel HDMA Controller
+ * @chan_common: common dmaengine dma_device object members
+ * @atdma_devtype: identifier of DMA controller compatibility
+ * @ch_regs: memory mapped register base
+ * @clk: dma controller clock
+ * @save_imr: interrupt mask register that is saved on suspend/resume cycle
+ * @all_chan_mask: all channels availlable in a mask
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @chan: channels table to store at_dma_chan structures
+ */
+struct at_dma {
+	struct dma_device	dma_common;
+	void __iomem		*regs;
+	struct clk		*clk;
+	u32			save_imr;
+
+	u8			all_chan_mask;
+
+	struct dma_pool		*dma_desc_pool;
+	struct dma_pool		*memset_pool;
+	/* AT THE END channels table */
+	struct at_dma_chan	chan[0];
+};
+
+#define	dma_readl(atdma, name) \
+	__raw_readl((atdma)->regs + AT_DMA_##name)
+#define	dma_writel(atdma, name, val) \
+	__raw_writel((val), (atdma)->regs + AT_DMA_##name)
+
+static inline struct at_dma *to_at_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_dma, dma_common);
+}
+
+
+/*--  Helper functions  ------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+#if defined(VERBOSE_DEBUG)
+static void vdbg_dump_regs(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
+		atchan->chan_common.chan_id,
+		dma_readl(atdma, EBCIMR),
+		dma_readl(atdma, CHSR));
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
+		channel_readl(atchan, SADDR),
+		channel_readl(atchan, DADDR),
+		channel_readl(atchan, CTRLA),
+		channel_readl(atchan, CTRLB),
+		channel_readl(atchan, CFG),
+		channel_readl(atchan, DSCR));
+}
+#else
+static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
+#endif
+
+static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
+{
+	dev_crit(chan2dev(&atchan->chan_common),
+		 "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
+		 &lli->saddr, &lli->daddr,
+		 lli->ctrla, lli->ctrlb, &lli->dscr);
+}
+
+
+static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on)
+{
+	u32 ebci;
+
+	/* enable interrupts on buffer transfer completion & error */
+	ebci =    AT_DMA_BTC(chan_id)
+		| AT_DMA_ERR(chan_id);
+	if (on)
+		dma_writel(atdma, EBCIER, ebci);
+	else
+		dma_writel(atdma, EBCIDR, ebci);
+}
+
+static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 1);
+}
+
+static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 0);
+}
+
+
+/**
+ * atc_chan_is_enabled - test if given channel is enabled
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	return !!(dma_readl(atdma, CHSR) & atchan->mask);
+}
+
+/**
+ * atc_chan_is_paused - test channel pause/resume status
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_paused(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_PAUSED, &atchan->status);
+}
+
+/**
+ * atc_chan_is_cyclic - test if given channel has cyclic property set
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_CYCLIC, &atchan->status);
+}
+
+/**
+ * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * @desc: descriptor, signle or at the end of a chain, to end chain on
+ */
+static void set_desc_eol(struct at_desc *desc)
+{
+	u32 ctrlb = desc->lli.ctrlb;
+
+	ctrlb &= ~ATC_IEN;
+	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+
+	desc->lli.ctrlb = ctrlb;
+	desc->lli.dscr = 0;
+}
+
+#endif /* AT_HDMAC_REGS_H */
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
new file mode 100644
index 0000000..4bf7256
--- /dev/null
+++ b/drivers/dma/at_xdmac.c
@@ -0,0 +1,2125 @@
+/*
+ * Driver for the Atmel Extensible DMA Controller (aka XDMAC on AT91 systems)
+ *
+ * Copyright (C) 2014 Atmel Corporation
+ *
+ * Author: Ludovic Desroches <ludovic.desroches@atmel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/barrier.h>
+#include <dt-bindings/dma/at91.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+
+#include "dmaengine.h"
+
+/* Global registers */
+#define AT_XDMAC_GTYPE		0x00	/* Global Type Register */
+#define		AT_XDMAC_NB_CH(i)	(((i) & 0x1F) + 1)		/* Number of Channels Minus One */
+#define		AT_XDMAC_FIFO_SZ(i)	(((i) >> 5) & 0x7FF)		/* Number of Bytes */
+#define		AT_XDMAC_NB_REQ(i)	((((i) >> 16) & 0x3F) + 1)	/* Number of Peripheral Requests Minus One */
+#define AT_XDMAC_GCFG		0x04	/* Global Configuration Register */
+#define AT_XDMAC_GWAC		0x08	/* Global Weighted Arbiter Configuration Register */
+#define AT_XDMAC_GIE		0x0C	/* Global Interrupt Enable Register */
+#define AT_XDMAC_GID		0x10	/* Global Interrupt Disable Register */
+#define AT_XDMAC_GIM		0x14	/* Global Interrupt Mask Register */
+#define AT_XDMAC_GIS		0x18	/* Global Interrupt Status Register */
+#define AT_XDMAC_GE		0x1C	/* Global Channel Enable Register */
+#define AT_XDMAC_GD		0x20	/* Global Channel Disable Register */
+#define AT_XDMAC_GS		0x24	/* Global Channel Status Register */
+#define AT_XDMAC_GRS		0x28	/* Global Channel Read Suspend Register */
+#define AT_XDMAC_GWS		0x2C	/* Global Write Suspend Register */
+#define AT_XDMAC_GRWS		0x30	/* Global Channel Read Write Suspend Register */
+#define AT_XDMAC_GRWR		0x34	/* Global Channel Read Write Resume Register */
+#define AT_XDMAC_GSWR		0x38	/* Global Channel Software Request Register */
+#define AT_XDMAC_GSWS		0x3C	/* Global channel Software Request Status Register */
+#define AT_XDMAC_GSWF		0x40	/* Global Channel Software Flush Request Register */
+#define AT_XDMAC_VERSION	0xFFC	/* XDMAC Version Register */
+
+/* Channel relative registers offsets */
+#define AT_XDMAC_CIE		0x00	/* Channel Interrupt Enable Register */
+#define		AT_XDMAC_CIE_BIE	BIT(0)	/* End of Block Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_LIE	BIT(1)	/* End of Linked List Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_DIE	BIT(2)	/* End of Disable Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_FIE	BIT(3)	/* End of Flush Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_RBEIE	BIT(4)	/* Read Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_WBEIE	BIT(5)	/* Write Bus Error Interrupt Enable Bit */
+#define		AT_XDMAC_CIE_ROIE	BIT(6)	/* Request Overflow Interrupt Enable Bit */
+#define AT_XDMAC_CID		0x04	/* Channel Interrupt Disable Register */
+#define		AT_XDMAC_CID_BID	BIT(0)	/* End of Block Interrupt Disable Bit */
+#define		AT_XDMAC_CID_LID	BIT(1)	/* End of Linked List Interrupt Disable Bit */
+#define		AT_XDMAC_CID_DID	BIT(2)	/* End of Disable Interrupt Disable Bit */
+#define		AT_XDMAC_CID_FID	BIT(3)	/* End of Flush Interrupt Disable Bit */
+#define		AT_XDMAC_CID_RBEID	BIT(4)	/* Read Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_WBEID	BIT(5)	/* Write Bus Error Interrupt Disable Bit */
+#define		AT_XDMAC_CID_ROID	BIT(6)	/* Request Overflow Interrupt Disable Bit */
+#define AT_XDMAC_CIM		0x08	/* Channel Interrupt Mask Register */
+#define		AT_XDMAC_CIM_BIM	BIT(0)	/* End of Block Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_LIM	BIT(1)	/* End of Linked List Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_DIM	BIT(2)	/* End of Disable Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_FIM	BIT(3)	/* End of Flush Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_RBEIM	BIT(4)	/* Read Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_WBEIM	BIT(5)	/* Write Bus Error Interrupt Mask Bit */
+#define		AT_XDMAC_CIM_ROIM	BIT(6)	/* Request Overflow Interrupt Mask Bit */
+#define AT_XDMAC_CIS		0x0C	/* Channel Interrupt Status Register */
+#define		AT_XDMAC_CIS_BIS	BIT(0)	/* End of Block Interrupt Status Bit */
+#define		AT_XDMAC_CIS_LIS	BIT(1)	/* End of Linked List Interrupt Status Bit */
+#define		AT_XDMAC_CIS_DIS	BIT(2)	/* End of Disable Interrupt Status Bit */
+#define		AT_XDMAC_CIS_FIS	BIT(3)	/* End of Flush Interrupt Status Bit */
+#define		AT_XDMAC_CIS_RBEIS	BIT(4)	/* Read Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_WBEIS	BIT(5)	/* Write Bus Error Interrupt Status Bit */
+#define		AT_XDMAC_CIS_ROIS	BIT(6)	/* Request Overflow Interrupt Status Bit */
+#define AT_XDMAC_CSA		0x10	/* Channel Source Address Register */
+#define AT_XDMAC_CDA		0x14	/* Channel Destination Address Register */
+#define AT_XDMAC_CNDA		0x18	/* Channel Next Descriptor Address Register */
+#define		AT_XDMAC_CNDA_NDAIF(i)	((i) & 0x1)			/* Channel x Next Descriptor Interface */
+#define		AT_XDMAC_CNDA_NDA(i)	((i) & 0xfffffffc)		/* Channel x Next Descriptor Address */
+#define AT_XDMAC_CNDC		0x1C	/* Channel Next Descriptor Control Register */
+#define		AT_XDMAC_CNDC_NDE		(0x1 << 0)		/* Channel x Next Descriptor Enable */
+#define		AT_XDMAC_CNDC_NDSUP		(0x1 << 1)		/* Channel x Next Descriptor Source Update */
+#define		AT_XDMAC_CNDC_NDDUP		(0x1 << 2)		/* Channel x Next Descriptor Destination Update */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV0	(0x0 << 3)		/* Channel x Next Descriptor View 0 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV1	(0x1 << 3)		/* Channel x Next Descriptor View 1 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV2	(0x2 << 3)		/* Channel x Next Descriptor View 2 */
+#define		AT_XDMAC_CNDC_NDVIEW_NDV3	(0x3 << 3)		/* Channel x Next Descriptor View 3 */
+#define AT_XDMAC_CUBC		0x20	/* Channel Microblock Control Register */
+#define AT_XDMAC_CBC		0x24	/* Channel Block Control Register */
+#define AT_XDMAC_CC		0x28	/* Channel Configuration Register */
+#define		AT_XDMAC_CC_TYPE	(0x1 << 0)	/* Channel Transfer Type */
+#define			AT_XDMAC_CC_TYPE_MEM_TRAN	(0x0 << 0)	/* Memory to Memory Transfer */
+#define			AT_XDMAC_CC_TYPE_PER_TRAN	(0x1 << 0)	/* Peripheral to Memory or Memory to Peripheral Transfer */
+#define		AT_XDMAC_CC_MBSIZE_MASK	(0x3 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SINGLE	(0x0 << 1)
+#define			AT_XDMAC_CC_MBSIZE_FOUR		(0x1 << 1)
+#define			AT_XDMAC_CC_MBSIZE_EIGHT	(0x2 << 1)
+#define			AT_XDMAC_CC_MBSIZE_SIXTEEN	(0x3 << 1)
+#define		AT_XDMAC_CC_DSYNC	(0x1 << 4)	/* Channel Synchronization */
+#define			AT_XDMAC_CC_DSYNC_PER2MEM	(0x0 << 4)
+#define			AT_XDMAC_CC_DSYNC_MEM2PER	(0x1 << 4)
+#define		AT_XDMAC_CC_PROT	(0x1 << 5)	/* Channel Protection */
+#define			AT_XDMAC_CC_PROT_SEC		(0x0 << 5)
+#define			AT_XDMAC_CC_PROT_UNSEC		(0x1 << 5)
+#define		AT_XDMAC_CC_SWREQ	(0x1 << 6)	/* Channel Software Request Trigger */
+#define			AT_XDMAC_CC_SWREQ_HWR_CONNECTED	(0x0 << 6)
+#define			AT_XDMAC_CC_SWREQ_SWR_CONNECTED	(0x1 << 6)
+#define		AT_XDMAC_CC_MEMSET	(0x1 << 7)	/* Channel Fill Block of memory */
+#define			AT_XDMAC_CC_MEMSET_NORMAL_MODE	(0x0 << 7)
+#define			AT_XDMAC_CC_MEMSET_HW_MODE	(0x1 << 7)
+#define		AT_XDMAC_CC_CSIZE(i)	((0x7 & (i)) << 8)	/* Channel Chunk Size */
+#define		AT_XDMAC_CC_DWIDTH_OFFSET	11
+#define		AT_XDMAC_CC_DWIDTH_MASK	(0x3 << AT_XDMAC_CC_DWIDTH_OFFSET)
+#define		AT_XDMAC_CC_DWIDTH(i)	((0x3 & (i)) << AT_XDMAC_CC_DWIDTH_OFFSET)	/* Channel Data Width */
+#define			AT_XDMAC_CC_DWIDTH_BYTE		0x0
+#define			AT_XDMAC_CC_DWIDTH_HALFWORD	0x1
+#define			AT_XDMAC_CC_DWIDTH_WORD		0x2
+#define			AT_XDMAC_CC_DWIDTH_DWORD	0x3
+#define		AT_XDMAC_CC_SIF(i)	((0x1 & (i)) << 13)	/* Channel Source Interface Identifier */
+#define		AT_XDMAC_CC_DIF(i)	((0x1 & (i)) << 14)	/* Channel Destination Interface Identifier */
+#define		AT_XDMAC_CC_SAM_MASK	(0x3 << 16)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_SAM_FIXED_AM	(0x0 << 16)
+#define			AT_XDMAC_CC_SAM_INCREMENTED_AM	(0x1 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_AM		(0x2 << 16)
+#define			AT_XDMAC_CC_SAM_UBS_DS_AM	(0x3 << 16)
+#define		AT_XDMAC_CC_DAM_MASK	(0x3 << 18)	/* Channel Source Addressing Mode */
+#define			AT_XDMAC_CC_DAM_FIXED_AM	(0x0 << 18)
+#define			AT_XDMAC_CC_DAM_INCREMENTED_AM	(0x1 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_AM		(0x2 << 18)
+#define			AT_XDMAC_CC_DAM_UBS_DS_AM	(0x3 << 18)
+#define		AT_XDMAC_CC_INITD	(0x1 << 21)	/* Channel Initialization Terminated (read only) */
+#define			AT_XDMAC_CC_INITD_TERMINATED	(0x0 << 21)
+#define			AT_XDMAC_CC_INITD_IN_PROGRESS	(0x1 << 21)
+#define		AT_XDMAC_CC_RDIP	(0x1 << 22)	/* Read in Progress (read only) */
+#define			AT_XDMAC_CC_RDIP_DONE		(0x0 << 22)
+#define			AT_XDMAC_CC_RDIP_IN_PROGRESS	(0x1 << 22)
+#define		AT_XDMAC_CC_WRIP	(0x1 << 23)	/* Write in Progress (read only) */
+#define			AT_XDMAC_CC_WRIP_DONE		(0x0 << 23)
+#define			AT_XDMAC_CC_WRIP_IN_PROGRESS	(0x1 << 23)
+#define		AT_XDMAC_CC_PERID(i)	(0x7f & (i) << 24)	/* Channel Peripheral Identifier */
+#define AT_XDMAC_CDS_MSP	0x2C	/* Channel Data Stride Memory Set Pattern */
+#define AT_XDMAC_CSUS		0x30	/* Channel Source Microblock Stride */
+#define AT_XDMAC_CDUS		0x34	/* Channel Destination Microblock Stride */
+
+#define AT_XDMAC_CHAN_REG_BASE	0x50	/* Channel registers base address */
+
+/* Microblock control members */
+#define AT_XDMAC_MBR_UBC_UBLEN_MAX	0xFFFFFFUL	/* Maximum Microblock Length */
+#define AT_XDMAC_MBR_UBC_NDE		(0x1 << 24)	/* Next Descriptor Enable */
+#define AT_XDMAC_MBR_UBC_NSEN		(0x1 << 25)	/* Next Descriptor Source Update */
+#define AT_XDMAC_MBR_UBC_NDEN		(0x1 << 26)	/* Next Descriptor Destination Update */
+#define AT_XDMAC_MBR_UBC_NDV0		(0x0 << 27)	/* Next Descriptor View 0 */
+#define AT_XDMAC_MBR_UBC_NDV1		(0x1 << 27)	/* Next Descriptor View 1 */
+#define AT_XDMAC_MBR_UBC_NDV2		(0x2 << 27)	/* Next Descriptor View 2 */
+#define AT_XDMAC_MBR_UBC_NDV3		(0x3 << 27)	/* Next Descriptor View 3 */
+
+#define AT_XDMAC_MAX_CHAN	0x20
+#define AT_XDMAC_MAX_CSIZE	16	/* 16 data */
+#define AT_XDMAC_MAX_DWIDTH	8	/* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES	5
+
+#define AT_XDMAC_DMA_BUSWIDTHS\
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |\
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+enum atc_status {
+	AT_XDMAC_CHAN_IS_CYCLIC = 0,
+	AT_XDMAC_CHAN_IS_PAUSED,
+};
+
+/* ----- Channels ----- */
+struct at_xdmac_chan {
+	struct dma_chan			chan;
+	void __iomem			*ch_regs;
+	u32				mask;		/* Channel Mask */
+	u32				cfg;		/* Channel Configuration Register */
+	u8				perid;		/* Peripheral ID */
+	u8				perif;		/* Peripheral Interface */
+	u8				memif;		/* Memory Interface */
+	u32				save_cc;
+	u32				save_cim;
+	u32				save_cnda;
+	u32				save_cndc;
+	unsigned long			status;
+	struct tasklet_struct		tasklet;
+	struct dma_slave_config		sconfig;
+
+	spinlock_t			lock;
+
+	struct list_head		xfers_list;
+	struct list_head		free_descs_list;
+};
+
+
+/* ----- Controller ----- */
+struct at_xdmac {
+	struct dma_device	dma;
+	void __iomem		*regs;
+	int			irq;
+	struct clk		*clk;
+	u32			save_gim;
+	struct dma_pool		*at_xdmac_desc_pool;
+	struct at_xdmac_chan	chan[0];
+};
+
+
+/* ----- Descriptors ----- */
+
+/* Linked List Descriptor */
+struct at_xdmac_lld {
+	dma_addr_t	mbr_nda;	/* Next Descriptor Member */
+	u32		mbr_ubc;	/* Microblock Control Member */
+	dma_addr_t	mbr_sa;		/* Source Address Member */
+	dma_addr_t	mbr_da;		/* Destination Address Member */
+	u32		mbr_cfg;	/* Configuration Register */
+	u32		mbr_bc;		/* Block Control Register */
+	u32		mbr_ds;		/* Data Stride Register */
+	u32		mbr_sus;	/* Source Microblock Stride Register */
+	u32		mbr_dus;	/* Destination Microblock Stride Register */
+};
+
+/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
+struct at_xdmac_desc {
+	struct at_xdmac_lld		lld;
+	enum dma_transfer_direction	direction;
+	struct dma_async_tx_descriptor	tx_dma_desc;
+	struct list_head		desc_node;
+	/* Following members are only used by the first descriptor */
+	bool				active_xfer;
+	unsigned int			xfer_size;
+	struct list_head		descs_list;
+	struct list_head		xfer_node;
+} __aligned(sizeof(u64));
+
+static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
+{
+	return atxdmac->regs + (AT_XDMAC_CHAN_REG_BASE + chan_nb * 0x40);
+}
+
+#define at_xdmac_read(atxdmac, reg) readl_relaxed((atxdmac)->regs + (reg))
+#define at_xdmac_write(atxdmac, reg, value) \
+	writel_relaxed((value), (atxdmac)->regs + (reg))
+
+#define at_xdmac_chan_read(atchan, reg) readl_relaxed((atchan)->ch_regs + (reg))
+#define at_xdmac_chan_write(atchan, reg, value) writel_relaxed((value), (atchan)->ch_regs + (reg))
+
+static inline struct at_xdmac_chan *to_at_xdmac_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_xdmac_chan, chan);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct at_xdmac *to_at_xdmac(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_xdmac, dma);
+}
+
+static inline struct at_xdmac_desc *txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_xdmac_desc, tx_dma_desc);
+}
+
+static inline int at_xdmac_chan_is_cyclic(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+}
+
+static inline int at_xdmac_chan_is_paused(struct at_xdmac_chan *atchan)
+{
+	return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+}
+
+static inline int at_xdmac_csize(u32 maxburst)
+{
+	int csize;
+
+	csize = ffs(maxburst) - 1;
+	if (csize > 4)
+		csize = -EINVAL;
+
+	return csize;
+};
+
+static inline u8 at_xdmac_get_dwidth(u32 cfg)
+{
+	return (cfg & AT_XDMAC_CC_DWIDTH_MASK) >> AT_XDMAC_CC_DWIDTH_OFFSET;
+};
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+
+static bool at_xdmac_chan_is_enabled(struct at_xdmac_chan *atchan)
+{
+	return at_xdmac_chan_read(atchan, AT_XDMAC_GS) & atchan->mask;
+}
+
+static void at_xdmac_off(struct at_xdmac *atxdmac)
+{
+	at_xdmac_write(atxdmac, AT_XDMAC_GD, -1L);
+
+	/* Wait that all chans are disabled. */
+	while (at_xdmac_read(atxdmac, AT_XDMAC_GS))
+		cpu_relax();
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GID, -1L);
+}
+
+/* Call with lock hold. */
+static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *first)
+{
+	struct at_xdmac	*atxdmac = to_at_xdmac(atchan->chan.device);
+	u32		reg;
+
+	dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first);
+
+	if (at_xdmac_chan_is_enabled(atchan))
+		return;
+
+	/* Set transfer as active to not try to start it again. */
+	first->active_xfer = true;
+
+	/* Tell xdmac where to get the first descriptor. */
+	reg = AT_XDMAC_CNDA_NDA(first->tx_dma_desc.phys)
+	      | AT_XDMAC_CNDA_NDAIF(atchan->memif);
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, reg);
+
+	/*
+	 * When doing non cyclic transfer we need to use the next
+	 * descriptor view 2 since some fields of the configuration register
+	 * depend on transfer size and src/dest addresses.
+	 */
+	if (at_xdmac_chan_is_cyclic(atchan))
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
+	else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3)
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
+	else
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
+	/*
+	 * Even if the register will be updated from the configuration in the
+	 * descriptor when using view 2 or higher, the PROT bit won't be set
+	 * properly. This bit can be modified only by using the channel
+	 * configuration register.
+	 */
+	at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
+
+	reg |= AT_XDMAC_CNDC_NDDUP
+	       | AT_XDMAC_CNDC_NDSUP
+	       | AT_XDMAC_CNDC_NDE;
+	at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, reg);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+	at_xdmac_chan_write(atchan, AT_XDMAC_CID, 0xffffffff);
+	reg = AT_XDMAC_CIE_RBEIE | AT_XDMAC_CIE_WBEIE | AT_XDMAC_CIE_ROIE;
+	/*
+	 * There is no end of list when doing cyclic dma, we need to get
+	 * an interrupt after each periods.
+	 */
+	if (at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_BIE);
+	else
+		at_xdmac_chan_write(atchan, AT_XDMAC_CIE,
+				    reg | AT_XDMAC_CIE_LIE);
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atchan->mask);
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: enable channel (0x%08x)\n", __func__, atchan->mask);
+	wmb();
+	at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+
+	dev_vdbg(chan2dev(&atchan->chan),
+		 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+		 __func__, at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+		 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+}
+
+static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct at_xdmac_desc	*desc = txd_to_at_desc(tx);
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		irqflags;
+
+	spin_lock_irqsave(&atchan->lock, irqflags);
+	cookie = dma_cookie_assign(tx);
+
+	dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n",
+		 __func__, atchan, desc);
+	list_add_tail(&desc->xfer_node, &atchan->xfers_list);
+	if (list_is_singular(&atchan->xfers_list))
+		at_xdmac_start_xfer(atchan, desc);
+
+	spin_unlock_irqrestore(&atchan->lock, irqflags);
+	return cookie;
+}
+
+static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
+						 gfp_t gfp_flags)
+{
+	struct at_xdmac_desc	*desc;
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	dma_addr_t		phys;
+
+	desc = dma_pool_zalloc(atxdmac->at_xdmac_desc_pool, gfp_flags, &phys);
+	if (desc) {
+		INIT_LIST_HEAD(&desc->descs_list);
+		dma_async_tx_descriptor_init(&desc->tx_dma_desc, chan);
+		desc->tx_dma_desc.tx_submit = at_xdmac_tx_submit;
+		desc->tx_dma_desc.phys = phys;
+	}
+
+	return desc;
+}
+
+static void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
+{
+	memset(&desc->lld, 0, sizeof(desc->lld));
+	INIT_LIST_HEAD(&desc->descs_list);
+	desc->direction = DMA_TRANS_NONE;
+	desc->xfer_size = 0;
+	desc->active_xfer = false;
+}
+
+/* Call must be protected by lock. */
+static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc *desc;
+
+	if (list_empty(&atchan->free_descs_list)) {
+		desc = at_xdmac_alloc_desc(&atchan->chan, GFP_NOWAIT);
+	} else {
+		desc = list_first_entry(&atchan->free_descs_list,
+					struct at_xdmac_desc, desc_node);
+		list_del(&desc->desc_node);
+		at_xdmac_init_used_desc(desc);
+	}
+
+	return desc;
+}
+
+static void at_xdmac_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_desc *prev,
+				struct at_xdmac_desc *desc)
+{
+	if (!prev || !desc)
+		return;
+
+	prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+	prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;
+
+	dev_dbg(chan2dev(chan),	"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+}
+
+static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
+						  struct at_xdmac_desc *desc)
+{
+	if (!desc)
+		return;
+
+	desc->lld.mbr_bc++;
+
+	dev_dbg(chan2dev(chan),
+		"%s: incrementing the block count of the desc 0x%p\n",
+		__func__, desc);
+}
+
+static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *of_dma)
+{
+	struct at_xdmac		*atxdmac = of_dma->of_dma_data;
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan;
+	struct device		*dev = atxdmac->dma.dev;
+
+	if (dma_spec->args_count != 1) {
+		dev_err(dev, "dma phandler args: bad number of args\n");
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&atxdmac->dma);
+	if (!chan) {
+		dev_err(dev, "can't get a dma channel\n");
+		return NULL;
+	}
+
+	atchan = to_at_xdmac_chan(chan);
+	atchan->memif = AT91_XDMAC_DT_GET_MEM_IF(dma_spec->args[0]);
+	atchan->perif = AT91_XDMAC_DT_GET_PER_IF(dma_spec->args[0]);
+	atchan->perid = AT91_XDMAC_DT_GET_PERID(dma_spec->args[0]);
+	dev_dbg(dev, "chan dt cfg: memif=%u perif=%u perid=%u\n",
+		 atchan->memif, atchan->perif, atchan->perid);
+
+	return chan;
+}
+
+static int at_xdmac_compute_chan_conf(struct dma_chan *chan,
+				      enum dma_transfer_direction direction)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	int			csize, dwidth;
+
+	if (direction == DMA_DEV_TO_MEM) {
+		atchan->cfg =
+			AT91_XDMAC_DT_PERID(atchan->perid)
+			| AT_XDMAC_CC_DAM_INCREMENTED_AM
+			| AT_XDMAC_CC_SAM_FIXED_AM
+			| AT_XDMAC_CC_DIF(atchan->memif)
+			| AT_XDMAC_CC_SIF(atchan->perif)
+			| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+			| AT_XDMAC_CC_DSYNC_PER2MEM
+			| AT_XDMAC_CC_MBSIZE_SIXTEEN
+			| AT_XDMAC_CC_TYPE_PER_TRAN;
+		csize = ffs(atchan->sconfig.src_maxburst) - 1;
+		if (csize < 0) {
+			dev_err(chan2dev(chan), "invalid src maxburst value\n");
+			return -EINVAL;
+		}
+		atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+		dwidth = ffs(atchan->sconfig.src_addr_width) - 1;
+		if (dwidth < 0) {
+			dev_err(chan2dev(chan), "invalid src addr width value\n");
+			return -EINVAL;
+		}
+		atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+	} else if (direction == DMA_MEM_TO_DEV) {
+		atchan->cfg =
+			AT91_XDMAC_DT_PERID(atchan->perid)
+			| AT_XDMAC_CC_DAM_FIXED_AM
+			| AT_XDMAC_CC_SAM_INCREMENTED_AM
+			| AT_XDMAC_CC_DIF(atchan->perif)
+			| AT_XDMAC_CC_SIF(atchan->memif)
+			| AT_XDMAC_CC_SWREQ_HWR_CONNECTED
+			| AT_XDMAC_CC_DSYNC_MEM2PER
+			| AT_XDMAC_CC_MBSIZE_SIXTEEN
+			| AT_XDMAC_CC_TYPE_PER_TRAN;
+		csize = ffs(atchan->sconfig.dst_maxburst) - 1;
+		if (csize < 0) {
+			dev_err(chan2dev(chan), "invalid src maxburst value\n");
+			return -EINVAL;
+		}
+		atchan->cfg |= AT_XDMAC_CC_CSIZE(csize);
+		dwidth = ffs(atchan->sconfig.dst_addr_width) - 1;
+		if (dwidth < 0) {
+			dev_err(chan2dev(chan), "invalid dst addr width value\n");
+			return -EINVAL;
+		}
+		atchan->cfg |= AT_XDMAC_CC_DWIDTH(dwidth);
+	}
+
+	dev_dbg(chan2dev(chan),	"%s: cfg=0x%08x\n", __func__, atchan->cfg);
+
+	return 0;
+}
+
+/*
+ * Only check that maxburst and addr width values are supported by the
+ * the controller but not that the configuration is good to perform the
+ * transfer since we don't know the direction at this stage.
+ */
+static int at_xdmac_check_slave_config(struct dma_slave_config *sconfig)
+{
+	if ((sconfig->src_maxburst > AT_XDMAC_MAX_CSIZE)
+	    || (sconfig->dst_maxburst > AT_XDMAC_MAX_CSIZE))
+		return -EINVAL;
+
+	if ((sconfig->src_addr_width > AT_XDMAC_MAX_DWIDTH)
+	    || (sconfig->dst_addr_width > AT_XDMAC_MAX_DWIDTH))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int at_xdmac_set_slave_config(struct dma_chan *chan,
+				      struct dma_slave_config *sconfig)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+	if (at_xdmac_check_slave_config(sconfig)) {
+		dev_err(chan2dev(chan), "invalid slave configuration\n");
+		return -EINVAL;
+	}
+
+	memcpy(&atchan->sconfig, sconfig, sizeof(atchan->sconfig));
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, enum dma_transfer_direction direction,
+		       unsigned long flags, void *context)
+{
+	struct at_xdmac_chan		*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc		*first = NULL, *prev = NULL;
+	struct scatterlist		*sg;
+	int				i;
+	unsigned int			xfer_size = 0;
+	unsigned long			irqflags;
+	struct dma_async_tx_descriptor	*ret = NULL;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	dev_dbg(chan2dev(chan), "%s: sg_len=%d, dir=%s, flags=0x%lx\n",
+		 __func__, sg_len,
+		 direction == DMA_MEM_TO_DEV ? "to device" : "from device",
+		 flags);
+
+	/* Protect dma_sconfig field that can be modified by set_slave_conf. */
+	spin_lock_irqsave(&atchan->lock, irqflags);
+
+	if (at_xdmac_compute_chan_conf(chan, direction))
+		goto spin_unlock;
+
+	/* Prepare descriptors. */
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct at_xdmac_desc	*desc = NULL;
+		u32			len, mem, dwidth, fixed_dwidth;
+
+		len = sg_dma_len(sg);
+		mem = sg_dma_address(sg);
+		if (unlikely(!len)) {
+			dev_err(chan2dev(chan), "sg data length is zero\n");
+			goto spin_unlock;
+		}
+		dev_dbg(chan2dev(chan), "%s: * sg%d len=%u, mem=0x%08x\n",
+			 __func__, i, len, mem);
+
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			goto spin_unlock;
+		}
+
+		/* Linked list descriptor setup. */
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->sconfig.src_addr;
+			desc->lld.mbr_da = mem;
+		} else {
+			desc->lld.mbr_sa = mem;
+			desc->lld.mbr_da = atchan->sconfig.dst_addr;
+		}
+		dwidth = at_xdmac_get_dwidth(atchan->cfg);
+		fixed_dwidth = IS_ALIGNED(len, 1 << dwidth)
+			       ? dwidth
+			       : AT_XDMAC_CC_DWIDTH_BYTE;
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2			/* next descriptor view */
+			| AT_XDMAC_MBR_UBC_NDEN					/* next descriptor dst parameter update */
+			| AT_XDMAC_MBR_UBC_NSEN					/* next descriptor src parameter update */
+			| (len >> fixed_dwidth);				/* microblock length */
+		desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) |
+				    AT_XDMAC_CC_DWIDTH(fixed_dwidth);
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+		xfer_size += len;
+	}
+
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = xfer_size;
+	first->direction = direction;
+	ret = &first->tx_dma_desc;
+
+spin_unlock:
+	spin_unlock_irqrestore(&atchan->lock, irqflags);
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			 size_t buf_len, size_t period_len,
+			 enum dma_transfer_direction direction,
+			 unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	unsigned int		periods = buf_len / period_len;
+	int			i;
+	unsigned long		irqflags;
+
+	dev_dbg(chan2dev(chan), "%s: buf_addr=%pad, buf_len=%zd, period_len=%zd, dir=%s, flags=0x%lx\n",
+		__func__, &buf_addr, buf_len, period_len,
+		direction == DMA_MEM_TO_DEV ? "mem2per" : "per2mem", flags);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan2dev(chan), "invalid DMA direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status)) {
+		dev_err(chan2dev(chan), "channel currently used\n");
+		return NULL;
+	}
+
+	if (at_xdmac_compute_chan_conf(chan, direction))
+		return NULL;
+
+	for (i = 0; i < periods; i++) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		spin_lock_irqsave(&atchan->lock, irqflags);
+		desc = at_xdmac_get_desc(atchan);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			spin_unlock_irqrestore(&atchan->lock, irqflags);
+			return NULL;
+		}
+		spin_unlock_irqrestore(&atchan->lock, irqflags);
+		dev_dbg(chan2dev(chan),
+			"%s: desc=0x%p, tx_dma_desc.phys=%pad\n",
+			__func__, desc, &desc->tx_dma_desc.phys);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			desc->lld.mbr_sa = atchan->sconfig.src_addr;
+			desc->lld.mbr_da = buf_addr + i * period_len;
+		} else {
+			desc->lld.mbr_sa = buf_addr + i * period_len;
+			desc->lld.mbr_da = atchan->sconfig.dst_addr;
+		}
+		desc->lld.mbr_cfg = atchan->cfg;
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
+
+		/* Chain lld. */
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	at_xdmac_queue_desc(chan, prev, first);
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = buf_len;
+	first->direction = direction;
+
+	return &first->tx_dma_desc;
+}
+
+static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
+{
+	u32 width;
+
+	/*
+	 * Check address alignment to select the greater data width we
+	 * can use.
+	 *
+	 * Some XDMAC implementations don't provide dword transfer, in
+	 * this case selecting dword has the same behavior as
+	 * selecting word transfers.
+	 */
+	if (!(addr & 7)) {
+		width = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!(addr & 3)) {
+		width = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!(addr & 1)) {
+		width = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		width = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	return width;
+}
+
+static struct at_xdmac_desc *
+at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *prev,
+				dma_addr_t src, dma_addr_t dst,
+				struct dma_interleaved_template *xt,
+				struct data_chunk *chunk)
+{
+	struct at_xdmac_desc	*desc;
+	u32			dwidth;
+	unsigned long		flags;
+	size_t			ublen;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
+	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_dbg(chan2dev(chan),
+			"%s: chunk too big (%zu, max size %lu)...\n",
+			__func__, chunk->size,
+			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
+		return NULL;
+	}
+
+	if (prev)
+		dev_dbg(chan2dev(chan),
+			"Adding items at the end of desc 0x%p\n", prev);
+
+	if (xt->src_inc) {
+		if (xt->src_sgl)
+			chan_cc |=  AT_XDMAC_CC_SAM_UBS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
+	}
+
+	if (xt->dst_inc) {
+		if (xt->dst_sgl)
+			chan_cc |=  AT_XDMAC_CC_DAM_UBS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = chunk->size >> dwidth;
+
+	desc->lld.mbr_sa = src;
+	desc->lld.mbr_da = dst;
+	desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
+	desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);
+
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
+		desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+	/* Chain lld. */
+	if (prev)
+		at_xdmac_queue_desc(chan, prev, desc);
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_interleaved(struct dma_chan *chan,
+			  struct dma_interleaved_template *xt,
+			  unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*prev = NULL, *first = NULL;
+	dma_addr_t		dst_addr, src_addr;
+	size_t			src_skip = 0, dst_skip = 0, len = 0;
+	struct data_chunk	*chunk;
+	int			i;
+
+	if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM))
+		return NULL;
+
+	/*
+	 * TODO: Handle the case where we have to repeat a chain of
+	 * descriptors...
+	 */
+	if ((xt->numf > 1) && (xt->frame_size > 1))
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%zu, frame_size=%zu, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start,	xt->numf,
+		xt->frame_size, flags);
+
+	src_addr = xt->src_start;
+	dst_addr = xt->dst_start;
+
+	if (xt->numf > 1) {
+		first = at_xdmac_interleaved_queue_desc(chan, atchan,
+							NULL,
+							src_addr, dst_addr,
+							xt, xt->sgl);
+
+		/* Length of the block is (BLEN+1) microblocks. */
+		for (i = 0; i < xt->numf - 1; i++)
+			at_xdmac_increment_block_count(chan, first);
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			__func__, first, first);
+		list_add_tail(&first->desc_node, &first->descs_list);
+	} else {
+		for (i = 0; i < xt->frame_size; i++) {
+			size_t src_icg = 0, dst_icg = 0;
+			struct at_xdmac_desc *desc;
+
+			chunk = xt->sgl + i;
+
+			dst_icg = dmaengine_get_dst_icg(xt, chunk);
+			src_icg = dmaengine_get_src_icg(xt, chunk);
+
+			src_skip = chunk->size + src_icg;
+			dst_skip = chunk->size + dst_icg;
+
+			dev_dbg(chan2dev(chan),
+				"%s: chunk size=%zu, src icg=%zu, dst icg=%zu\n",
+				__func__, chunk->size, src_icg, dst_icg);
+
+			desc = at_xdmac_interleaved_queue_desc(chan, atchan,
+							       prev,
+							       src_addr, dst_addr,
+							       xt, chunk);
+			if (!desc) {
+				list_splice_init(&first->descs_list,
+						 &atchan->free_descs_list);
+				return NULL;
+			}
+
+			if (!first)
+				first = desc;
+
+			dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+				__func__, desc, first);
+			list_add_tail(&desc->desc_node, &first->descs_list);
+
+			if (xt->src_sgl)
+				src_addr += src_skip;
+
+			if (xt->dst_sgl)
+				dst_addr += dst_skip;
+
+			len += chunk->size;
+			prev = desc;
+		}
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*first = NULL, *prev = NULL;
+	size_t			remaining_size = len, xfer_size = 0, ublen;
+	dma_addr_t		src_addr = src, dst_addr = dest;
+	u32			dwidth;
+	/*
+	 * WARNING: We don't know the direction, it involves we can't
+	 * dynamically set the source and dest interface so we have to use the
+	 * same one. Only interface 0 allows EBI access. Hopefully we can
+	 * access DDR through both ports (at least on SAMA5D4x), so we can use
+	 * the same interface for source and dest, that solves the fact we
+	 * don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_INCREMENTED_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+	unsigned long		irqflags;
+
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, len=%zd, flags=0x%lx\n",
+		__func__, &src, &dest, len, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);
+
+	/* Prepare descriptors. */
+	while (remaining_size) {
+		struct at_xdmac_desc	*desc = NULL;
+
+		dev_dbg(chan2dev(chan), "%s: remaining_size=%zu\n", __func__, remaining_size);
+
+		spin_lock_irqsave(&atchan->lock, irqflags);
+		desc = at_xdmac_get_desc(atchan);
+		spin_unlock_irqrestore(&atchan->lock, irqflags);
+		if (!desc) {
+			dev_err(chan2dev(chan), "can't get descriptor\n");
+			if (first)
+				list_splice_init(&first->descs_list, &atchan->free_descs_list);
+			return NULL;
+		}
+
+		/* Update src and dest addresses. */
+		src_addr += xfer_size;
+		dst_addr += xfer_size;
+
+		if (remaining_size >= AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)
+			xfer_size = AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth;
+		else
+			xfer_size = remaining_size;
+
+		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
+
+		/* Check remaining length and change data width if needed. */
+		dwidth = at_xdmac_align_width(chan,
+					      src_addr | dst_addr | xfer_size);
+		chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
+		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+		ublen = xfer_size >> dwidth;
+		remaining_size -= xfer_size;
+
+		desc->lld.mbr_sa = src_addr;
+		desc->lld.mbr_da = dst_addr;
+		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
+			| AT_XDMAC_MBR_UBC_NDEN
+			| AT_XDMAC_MBR_UBC_NSEN
+			| ublen;
+		desc->lld.mbr_cfg = chan_cc;
+
+		dev_dbg(chan2dev(chan),
+			 "%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+		/* Chain lld. */
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
+
+		prev = desc;
+		if (!first)
+			first = desc;
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			 __func__, desc, first);
+		list_add_tail(&desc->desc_node, &first->descs_list);
+	}
+
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
+static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
+							 struct at_xdmac_chan *atchan,
+							 dma_addr_t dst_addr,
+							 size_t len,
+							 int value)
+{
+	struct at_xdmac_desc	*desc;
+	unsigned long		flags;
+	size_t			ublen;
+	u32			dwidth;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 * ERRATA: Even if useless for memory transfers, the PERID has to not
+	 * match the one of another channel. If not, it could lead to spurious
+	 * flag status.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_PERID(0x3f)
+					| AT_XDMAC_CC_DAM_UBS_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_MEMSET_HW_MODE
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, dst_addr);
+
+	if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_err(chan2dev(chan),
+			"%s: Transfer too large, aborting...\n",
+			__func__);
+		return NULL;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = len >> dwidth;
+
+	desc->lld.mbr_da = dst_addr;
+	desc->lld.mbr_ds = value;
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_da=%pad, mbr_ds=0x%08x, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_da, desc->lld.mbr_ds, desc->lld.mbr_ubc,
+		desc->lld.mbr_cfg);
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+
+	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%zu, pattern=0x%x, flags=0x%lx\n",
+		__func__, &dest, len, value, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
+	list_add_tail(&desc->desc_node, &desc->descs_list);
+
+	desc->tx_dma_desc.cookie = -EBUSY;
+	desc->tx_dma_desc.flags = flags;
+	desc->xfer_size = len;
+
+	return &desc->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			    unsigned int sg_len, int value,
+			    unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc, *pdesc = NULL,
+				*ppdesc = NULL, *first = NULL;
+	struct scatterlist	*sg, *psg = NULL, *ppsg = NULL;
+	size_t			stride = 0, pstride = 0, len = 0;
+	int			i;
+
+	if (!sgl)
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
+		__func__, sg_len, value, flags);
+
+	/* Prepare descriptors. */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+			__func__, &sg_dma_address(sg), sg_dma_len(sg),
+			value, flags);
+		desc = at_xdmac_memset_create_desc(chan, atchan,
+						   sg_dma_address(sg),
+						   sg_dma_len(sg),
+						   value);
+		if (!desc && first)
+			list_splice_init(&first->descs_list,
+					 &atchan->free_descs_list);
+
+		if (!first)
+			first = desc;
+
+		/* Update our strides */
+		pstride = stride;
+		if (psg)
+			stride = sg_dma_address(sg) -
+				(sg_dma_address(psg) + sg_dma_len(psg));
+
+		/*
+		 * The scatterlist API gives us only the address and
+		 * length of each elements.
+		 *
+		 * Unfortunately, we don't have the stride, which we
+		 * will need to compute.
+		 *
+		 * That make us end up in a situation like this one:
+		 *    len    stride    len    stride    len
+		 * +-------+        +-------+        +-------+
+		 * |  N-2  |        |  N-1  |        |   N   |
+		 * +-------+        +-------+        +-------+
+		 *
+		 * We need all these three elements (N-2, N-1 and N)
+		 * to actually take the decision on whether we need to
+		 * queue N-1 or reuse N-2.
+		 *
+		 * We will only consider N if it is the last element.
+		 */
+		if (ppdesc && pdesc) {
+			if ((stride == pstride) &&
+			    (sg_dma_len(ppsg) == sg_dma_len(psg))) {
+				dev_dbg(chan2dev(chan),
+					"%s: desc 0x%p can be merged with desc 0x%p\n",
+					__func__, pdesc, ppdesc);
+
+				/*
+				 * Increment the block count of the
+				 * N-2 descriptor
+				 */
+				at_xdmac_increment_block_count(chan, ppdesc);
+				ppdesc->lld.mbr_dus = stride;
+
+				/*
+				 * Put back the N-1 descriptor in the
+				 * free descriptor list
+				 */
+				list_add_tail(&pdesc->desc_node,
+					      &atchan->free_descs_list);
+
+				/*
+				 * Make our N-1 descriptor pointer
+				 * point to the N-2 since they were
+				 * actually merged.
+				 */
+				pdesc = ppdesc;
+
+			/*
+			 * Rule out the case where we don't have
+			 * pstride computed yet (our second sg
+			 * element)
+			 *
+			 * We also want to catch the case where there
+			 * would be a negative stride,
+			 */
+			} else if (pstride ||
+				   sg_dma_address(sg) < sg_dma_address(psg)) {
+				/*
+				 * Queue the N-1 descriptor after the
+				 * N-2
+				 */
+				at_xdmac_queue_desc(chan, ppdesc, pdesc);
+
+				/*
+				 * Add the N-1 descriptor to the list
+				 * of the descriptors used for this
+				 * transfer
+				 */
+				list_add_tail(&desc->desc_node,
+					      &first->descs_list);
+				dev_dbg(chan2dev(chan),
+					"%s: add desc 0x%p to descs_list 0x%p\n",
+					__func__, desc, first);
+			}
+		}
+
+		/*
+		 * If we are the last element, just see if we have the
+		 * same size than the previous element.
+		 *
+		 * If so, we can merge it with the previous descriptor
+		 * since we don't care about the stride anymore.
+		 */
+		if ((i == (sg_len - 1)) &&
+		    sg_dma_len(psg) == sg_dma_len(sg)) {
+			dev_dbg(chan2dev(chan),
+				"%s: desc 0x%p can be merged with desc 0x%p\n",
+				__func__, desc, pdesc);
+
+			/*
+			 * Increment the block count of the N-1
+			 * descriptor
+			 */
+			at_xdmac_increment_block_count(chan, pdesc);
+			pdesc->lld.mbr_dus = stride;
+
+			/*
+			 * Put back the N descriptor in the free
+			 * descriptor list
+			 */
+			list_add_tail(&desc->desc_node,
+				      &atchan->free_descs_list);
+		}
+
+		/* Update our descriptors */
+		ppdesc = pdesc;
+		pdesc = desc;
+
+		/* Update our scatter pointers */
+		ppsg = psg;
+		psg = sg;
+
+		len += sg_dma_len(sg);
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
+static enum dma_status
+at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	struct at_xdmac_desc	*desc, *_desc;
+	struct list_head	*descs_list;
+	enum dma_status		ret;
+	int			residue, retry;
+	u32			cur_nda, check_nda, cur_ubc, mask, value;
+	u8			dwidth = 0;
+	unsigned long		flags;
+	bool			initd;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!txstate)
+		return ret;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+
+	/*
+	 * If the transfer has not been started yet, don't need to compute the
+	 * residue, it's the transfer length.
+	 */
+	if (!desc->active_xfer) {
+		dma_set_residue(txstate, desc->xfer_size);
+		goto spin_unlock;
+	}
+
+	residue = desc->xfer_size;
+	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized. Flush is needed before reading CUBC because data in
+	 * the FIFO are not reported by CUBC. Reporting a residue of the
+	 * transfer length while we have data in FIFO can cause issue.
+	 * Usecase: atmel USART has a timeout which means I have received
+	 * characters but there is no more character received for a while. On
+	 * timeout, it requests the residue. If the data are in the DMA FIFO,
+	 * we will return a residue of the transfer length. It means no data
+	 * received. If an application is waiting for these data, it will hang
+	 * since we won't have another USART timeout without receiving new
+	 * data.
+	 */
+	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
+	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
+	if ((desc->lld.mbr_cfg & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	/*
+	 * The easiest way to compute the residue should be to pause the DMA
+	 * but doing this can lead to miss some data as some devices don't
+	 * have FIFO.
+	 * We need to read several registers because:
+	 * - DMA is running therefore a descriptor change is possible while
+	 * reading these registers
+	 * - When the block transfer is done, the value of the CUBC register
+	 * is set to its initial value until the fetch of the next descriptor.
+	 * This value will corrupt the residue calculation so we have to skip
+	 * it.
+	 *
+	 * INITD --------                    ------------
+	 *              |____________________|
+	 *       _______________________  _______________
+	 * NDA       @desc2             \/   @desc3
+	 *       _______________________/\_______________
+	 *       __________  ___________  _______________
+	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
+	 *       __________/\___________/\_______________
+	 *
+	 * Since descriptors are aligned on 64 bits, we can assume that
+	 * the update of NDA and CUBC is atomic.
+	 * Memory barriers are used to ensure the read order of the registers.
+	 * A max number of retries is set because unlikely it could never ends.
+	 */
+	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+		rmb();
+		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
+		rmb();
+		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+		rmb();
+
+		if ((check_nda == cur_nda) && initd)
+			break;
+	}
+
+	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+		ret = DMA_ERROR;
+		goto spin_unlock;
+	}
+
+	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized. Another flush is needed here because CUBC is updated
+	 * when the controller sends the data write command. It can lead to
+	 * report data that are not written in the memory or the device. The
+	 * FIFO flush ensures that data are really written.
+	 */
+	if ((desc->lld.mbr_cfg & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	/*
+	 * Remove size of all microblocks already transferred and the current
+	 * one. Then add the remaining size to transfer of the current
+	 * microblock.
+	 */
+	descs_list = &desc->descs_list;
+	list_for_each_entry_safe(desc, _desc, descs_list, desc_node) {
+		dwidth = at_xdmac_get_dwidth(desc->lld.mbr_cfg);
+		residue -= (desc->lld.mbr_ubc & 0xffffff) << dwidth;
+		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
+			break;
+	}
+	residue += cur_ubc << dwidth;
+
+	dma_set_residue(txstate, residue);
+
+	dev_dbg(chan2dev(chan),
+		 "%s: desc=0x%p, tx_dma_desc.phys=%pad, tx_status=%d, cookie=%d, residue=%d\n",
+		 __func__, desc, &desc->tx_dma_desc.phys, ret, cookie, residue);
+
+spin_unlock:
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	return ret;
+}
+
+/* Call must be protected by lock. */
+static void at_xdmac_remove_xfer(struct at_xdmac_chan *atchan,
+				    struct at_xdmac_desc *desc)
+{
+	dev_dbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+
+	/*
+	 * Remove the transfer from the transfer list then move the transfer
+	 * descriptors into the free descriptors list.
+	 */
+	list_del(&desc->xfer_node);
+	list_splice_init(&desc->descs_list, &atchan->free_descs_list);
+}
+
+static void at_xdmac_advance_work(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc	*desc;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	/*
+	 * If channel is enabled, do nothing, advance_work will be triggered
+	 * after the interruption.
+	 */
+	if (!at_xdmac_chan_is_enabled(atchan) && !list_empty(&atchan->xfers_list)) {
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		if (!desc->active_xfer)
+			at_xdmac_start_xfer(atchan, desc);
+	}
+
+	spin_unlock_irqrestore(&atchan->lock, flags);
+}
+
+static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan)
+{
+	struct at_xdmac_desc		*desc;
+	struct dma_async_tx_descriptor	*txd;
+
+	desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, xfer_node);
+	txd = &desc->tx_dma_desc;
+
+	if (txd->flags & DMA_PREP_INTERRUPT)
+		dmaengine_desc_get_callback_invoke(txd, NULL);
+}
+
+static void at_xdmac_tasklet(unsigned long data)
+{
+	struct at_xdmac_chan	*atchan = (struct at_xdmac_chan *)data;
+	struct at_xdmac_desc	*desc;
+	u32			error_mask;
+
+	dev_dbg(chan2dev(&atchan->chan), "%s: status=0x%08lx\n",
+		 __func__, atchan->status);
+
+	error_mask = AT_XDMAC_CIS_RBEIS
+		     | AT_XDMAC_CIS_WBEIS
+		     | AT_XDMAC_CIS_ROIS;
+
+	if (at_xdmac_chan_is_cyclic(atchan)) {
+		at_xdmac_handle_cyclic(atchan);
+	} else if ((atchan->status & AT_XDMAC_CIS_LIS)
+		   || (atchan->status & error_mask)) {
+		struct dma_async_tx_descriptor  *txd;
+
+		if (atchan->status & AT_XDMAC_CIS_RBEIS)
+			dev_err(chan2dev(&atchan->chan), "read bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_WBEIS)
+			dev_err(chan2dev(&atchan->chan), "write bus error!!!");
+		if (atchan->status & AT_XDMAC_CIS_ROIS)
+			dev_err(chan2dev(&atchan->chan), "request overflow error!!!");
+
+		spin_lock_bh(&atchan->lock);
+		desc = list_first_entry(&atchan->xfers_list,
+					struct at_xdmac_desc,
+					xfer_node);
+		dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, desc);
+		BUG_ON(!desc->active_xfer);
+
+		txd = &desc->tx_dma_desc;
+
+		at_xdmac_remove_xfer(atchan, desc);
+		spin_unlock_bh(&atchan->lock);
+
+		if (!at_xdmac_chan_is_cyclic(atchan)) {
+			dma_cookie_complete(txd);
+			if (txd->flags & DMA_PREP_INTERRUPT)
+				dmaengine_desc_get_callback_invoke(txd, NULL);
+		}
+
+		dma_run_dependencies(txd);
+
+		at_xdmac_advance_work(atchan);
+	}
+}
+
+static irqreturn_t at_xdmac_interrupt(int irq, void *dev_id)
+{
+	struct at_xdmac		*atxdmac = (struct at_xdmac *)dev_id;
+	struct at_xdmac_chan	*atchan;
+	u32			imr, status, pending;
+	u32			chan_imr, chan_status;
+	int			i, ret = IRQ_NONE;
+
+	do {
+		imr = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+		status = at_xdmac_read(atxdmac, AT_XDMAC_GIS);
+		pending = status & imr;
+
+		dev_vdbg(atxdmac->dma.dev,
+			 "%s: status=0x%08x, imr=0x%08x, pending=0x%08x\n",
+			 __func__, status, imr, pending);
+
+		if (!pending)
+			break;
+
+		/* We have to find which channel has generated the interrupt. */
+		for (i = 0; i < atxdmac->dma.chancnt; i++) {
+			if (!((1 << i) & pending))
+				continue;
+
+			atchan = &atxdmac->chan[i];
+			chan_imr = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			chan_status = at_xdmac_chan_read(atchan, AT_XDMAC_CIS);
+			atchan->status = chan_status & chan_imr;
+			dev_vdbg(atxdmac->dma.dev,
+				 "%s: chan%d: imr=0x%x, status=0x%x\n",
+				 __func__, i, chan_imr, chan_status);
+			dev_vdbg(chan2dev(&atchan->chan),
+				 "%s: CC=0x%08x CNDA=0x%08x, CNDC=0x%08x, CSA=0x%08x, CDA=0x%08x, CUBC=0x%08x\n",
+				 __func__,
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CNDC),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CSA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CDA),
+				 at_xdmac_chan_read(atchan, AT_XDMAC_CUBC));
+
+			if (atchan->status & (AT_XDMAC_CIS_RBEIS | AT_XDMAC_CIS_WBEIS))
+				at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+
+			tasklet_schedule(&atchan->tasklet);
+			ret = IRQ_HANDLED;
+		}
+
+	} while (pending);
+
+	return ret;
+}
+
+static void at_xdmac_issue_pending(struct dma_chan *chan)
+{
+	struct at_xdmac_chan *atchan = to_at_xdmac_chan(chan);
+
+	dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__);
+
+	if (!at_xdmac_chan_is_cyclic(atchan))
+		at_xdmac_advance_work(atchan);
+
+	return;
+}
+
+static int at_xdmac_device_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	int ret;
+	unsigned long		flags;
+
+	dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	ret = at_xdmac_set_slave_config(chan, config);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return ret;
+}
+
+static int at_xdmac_device_pause(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	unsigned long		flags;
+
+	dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+	if (test_and_set_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status))
+		return 0;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	at_xdmac_write(atxdmac, AT_XDMAC_GRWS, atchan->mask);
+	while (at_xdmac_chan_read(atchan, AT_XDMAC_CC)
+	       & (AT_XDMAC_CC_WRIP | AT_XDMAC_CC_RDIP))
+		cpu_relax();
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+static int at_xdmac_device_resume(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	unsigned long		flags;
+
+	dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	if (!at_xdmac_chan_is_paused(atchan)) {
+		spin_unlock_irqrestore(&atchan->lock, flags);
+		return 0;
+	}
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
+	clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+static int at_xdmac_device_terminate_all(struct dma_chan *chan)
+{
+	struct at_xdmac_desc	*desc, *_desc;
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(atchan->chan.device);
+	unsigned long		flags;
+
+	dev_dbg(chan2dev(chan), "%s\n", __func__);
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	at_xdmac_write(atxdmac, AT_XDMAC_GD, atchan->mask);
+	while (at_xdmac_read(atxdmac, AT_XDMAC_GS) & atchan->mask)
+		cpu_relax();
+
+	/* Cancel all pending transfers. */
+	list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
+		at_xdmac_remove_xfer(atchan, desc);
+
+	clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
+	clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
+	return 0;
+}
+
+static int at_xdmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+	int			i;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&atchan->lock, flags);
+
+	if (at_xdmac_chan_is_enabled(atchan)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel enabled)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	if (!list_empty(&atchan->free_descs_list)) {
+		dev_err(chan2dev(chan),
+			"can't allocate channel resources (channel not free from a previous use)\n");
+		i = -EIO;
+		goto spin_unlock;
+	}
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = at_xdmac_alloc_desc(chan, GFP_ATOMIC);
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"only %d descriptors have been allocated\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &atchan->free_descs_list);
+	}
+
+	dma_cookie_init(chan);
+
+	dev_dbg(chan2dev(chan), "%s: allocated %d descriptors\n", __func__, i);
+
+spin_unlock:
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	return i;
+}
+
+static void at_xdmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac		*atxdmac = to_at_xdmac(chan->device);
+	struct at_xdmac_desc	*desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, &atchan->free_descs_list, desc_node) {
+		dev_dbg(chan2dev(chan), "%s: freeing descriptor %p\n", __func__, desc);
+		list_del(&desc->desc_node);
+		dma_pool_free(atxdmac->at_xdmac_desc_pool, desc, desc->tx_dma_desc.phys);
+	}
+
+	return;
+}
+
+#ifdef CONFIG_PM
+static int atmel_xdmac_prepare(struct device *dev)
+{
+	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		/* Wait for transfer completion, except in cyclic case. */
+		if (at_xdmac_chan_is_enabled(atchan) && !at_xdmac_chan_is_cyclic(atchan))
+			return -EAGAIN;
+	}
+	return 0;
+}
+#else
+#	define atmel_xdmac_prepare NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int atmel_xdmac_suspend(struct device *dev)
+{
+	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
+	struct dma_chan		*chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+
+		atchan->save_cc = at_xdmac_chan_read(atchan, AT_XDMAC_CC);
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			if (!at_xdmac_chan_is_paused(atchan))
+				at_xdmac_device_pause(chan);
+			atchan->save_cim = at_xdmac_chan_read(atchan, AT_XDMAC_CIM);
+			atchan->save_cnda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA);
+			atchan->save_cndc = at_xdmac_chan_read(atchan, AT_XDMAC_CNDC);
+		}
+	}
+	atxdmac->save_gim = at_xdmac_read(atxdmac, AT_XDMAC_GIM);
+
+	at_xdmac_off(atxdmac);
+	clk_disable_unprepare(atxdmac->clk);
+	return 0;
+}
+
+static int atmel_xdmac_resume(struct device *dev)
+{
+	struct at_xdmac		*atxdmac = dev_get_drvdata(dev);
+	struct at_xdmac_chan	*atchan;
+	struct dma_chan		*chan, *_chan;
+	int			i;
+	int ret;
+
+	ret = clk_prepare_enable(atxdmac->clk);
+	if (ret)
+		return ret;
+
+	/* Clear pending interrupts. */
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		atchan = &atxdmac->chan[i];
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+
+	at_xdmac_write(atxdmac, AT_XDMAC_GIE, atxdmac->save_gim);
+	list_for_each_entry_safe(chan, _chan, &atxdmac->dma.channels, device_node) {
+		atchan = to_at_xdmac_chan(chan);
+		at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc);
+		if (at_xdmac_chan_is_cyclic(atchan)) {
+			if (at_xdmac_chan_is_paused(atchan))
+				at_xdmac_device_resume(chan);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
+			at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
+			wmb();
+			at_xdmac_write(atxdmac, AT_XDMAC_GE, atchan->mask);
+		}
+	}
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static int at_xdmac_probe(struct platform_device *pdev)
+{
+	struct resource	*res;
+	struct at_xdmac	*atxdmac;
+	int		irq, size, nr_channels, i, ret;
+	void __iomem	*base;
+	u32		reg;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/*
+	 * Read number of xdmac channels, read helper function can't be used
+	 * since atxdmac is not yet allocated and we need to know the number
+	 * of channels to do the allocation.
+	 */
+	reg = readl_relaxed(base + AT_XDMAC_GTYPE);
+	nr_channels = AT_XDMAC_NB_CH(reg);
+	if (nr_channels > AT_XDMAC_MAX_CHAN) {
+		dev_err(&pdev->dev, "invalid number of channels (%u)\n",
+			nr_channels);
+		return -EINVAL;
+	}
+
+	size = sizeof(*atxdmac);
+	size += nr_channels * sizeof(struct at_xdmac_chan);
+	atxdmac = devm_kzalloc(&pdev->dev, size, GFP_KERNEL);
+	if (!atxdmac) {
+		dev_err(&pdev->dev, "can't allocate at_xdmac structure\n");
+		return -ENOMEM;
+	}
+
+	atxdmac->regs = base;
+	atxdmac->irq = irq;
+
+	atxdmac->clk = devm_clk_get(&pdev->dev, "dma_clk");
+	if (IS_ERR(atxdmac->clk)) {
+		dev_err(&pdev->dev, "can't get dma_clk\n");
+		return PTR_ERR(atxdmac->clk);
+	}
+
+	/* Do not use dev res to prevent races with tasklet */
+	ret = request_irq(atxdmac->irq, at_xdmac_interrupt, 0, "at_xdmac", atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "can't request irq\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(atxdmac->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "can't prepare or enable clock\n");
+		goto err_free_irq;
+	}
+
+	atxdmac->at_xdmac_desc_pool =
+		dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+				sizeof(struct at_xdmac_desc), 4, 0);
+	if (!atxdmac->at_xdmac_desc_pool) {
+		dev_err(&pdev->dev, "no memory for descriptors dma pool\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
+	/*
+	 * Without DMA_PRIVATE the driver is not able to allocate more than
+	 * one channel, second allocation fails in private_candidate.
+	 */
+	dma_cap_set(DMA_PRIVATE, atxdmac->dma.cap_mask);
+	atxdmac->dma.dev				= &pdev->dev;
+	atxdmac->dma.device_alloc_chan_resources	= at_xdmac_alloc_chan_resources;
+	atxdmac->dma.device_free_chan_resources		= at_xdmac_free_chan_resources;
+	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
+	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
+	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
+	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
+	atxdmac->dma.device_prep_dma_memset		= at_xdmac_prep_dma_memset;
+	atxdmac->dma.device_prep_dma_memset_sg		= at_xdmac_prep_dma_memset_sg;
+	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
+	atxdmac->dma.device_config			= at_xdmac_device_config;
+	atxdmac->dma.device_pause			= at_xdmac_device_pause;
+	atxdmac->dma.device_resume			= at_xdmac_device_resume;
+	atxdmac->dma.device_terminate_all		= at_xdmac_device_terminate_all;
+	atxdmac->dma.src_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	atxdmac->dma.dst_addr_widths = AT_XDMAC_DMA_BUSWIDTHS;
+	atxdmac->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	atxdmac->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	/* Disable all chans and interrupts. */
+	at_xdmac_off(atxdmac);
+
+	/* Init channels. */
+	INIT_LIST_HEAD(&atxdmac->dma.channels);
+	for (i = 0; i < nr_channels; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		atchan->chan.device = &atxdmac->dma;
+		list_add_tail(&atchan->chan.device_node,
+			      &atxdmac->dma.channels);
+
+		atchan->ch_regs = at_xdmac_chan_reg_base(atxdmac, i);
+		atchan->mask = 1 << i;
+
+		spin_lock_init(&atchan->lock);
+		INIT_LIST_HEAD(&atchan->xfers_list);
+		INIT_LIST_HEAD(&atchan->free_descs_list);
+		tasklet_init(&atchan->tasklet, at_xdmac_tasklet,
+			     (unsigned long)atchan);
+
+		/* Clear pending interrupts. */
+		while (at_xdmac_chan_read(atchan, AT_XDMAC_CIS))
+			cpu_relax();
+	}
+	platform_set_drvdata(pdev, atxdmac);
+
+	ret = dma_async_device_register(&atxdmac->dma);
+	if (ret) {
+		dev_err(&pdev->dev, "fail to register DMA engine device\n");
+		goto err_clk_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 at_xdmac_xlate, atxdmac);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register of dma controller\n");
+		goto err_dma_unregister;
+	}
+
+	dev_info(&pdev->dev, "%d channels, mapped at 0x%p\n",
+		 nr_channels, atxdmac->regs);
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&atxdmac->dma);
+err_clk_disable:
+	clk_disable_unprepare(atxdmac->clk);
+err_free_irq:
+	free_irq(atxdmac->irq, atxdmac);
+	return ret;
+}
+
+static int at_xdmac_remove(struct platform_device *pdev)
+{
+	struct at_xdmac	*atxdmac = (struct at_xdmac *)platform_get_drvdata(pdev);
+	int		i;
+
+	at_xdmac_off(atxdmac);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&atxdmac->dma);
+	clk_disable_unprepare(atxdmac->clk);
+
+	free_irq(atxdmac->irq, atxdmac);
+
+	for (i = 0; i < atxdmac->dma.chancnt; i++) {
+		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
+
+		tasklet_kill(&atchan->tasklet);
+		at_xdmac_free_chan_resources(&atchan->chan);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops atmel_xdmac_dev_pm_ops = {
+	.prepare	= atmel_xdmac_prepare,
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(atmel_xdmac_suspend, atmel_xdmac_resume)
+};
+
+static const struct of_device_id atmel_xdmac_dt_ids[] = {
+	{
+		.compatible = "atmel,sama5d4-dma",
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, atmel_xdmac_dt_ids);
+
+static struct platform_driver at_xdmac_driver = {
+	.probe		= at_xdmac_probe,
+	.remove		= at_xdmac_remove,
+	.driver = {
+		.name		= "at_xdmac",
+		.of_match_table	= of_match_ptr(atmel_xdmac_dt_ids),
+		.pm		= &atmel_xdmac_dev_pm_ops,
+	}
+};
+
+static int __init at_xdmac_init(void)
+{
+	return platform_driver_probe(&at_xdmac_driver, at_xdmac_probe);
+}
+subsys_initcall(at_xdmac_init);
+
+MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
+MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/bcm-sba-raid.c b/drivers/dma/bcm-sba-raid.c
new file mode 100644
index 0000000..72878ac
--- /dev/null
+++ b/drivers/dma/bcm-sba-raid.c
@@ -0,0 +1,1787 @@
+/*
+ * Copyright (C) 2017 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Broadcom SBA RAID Driver
+ *
+ * The Broadcom stream buffer accelerator (SBA) provides offloading
+ * capabilities for RAID operations. The SBA offload engine is accessible
+ * via Broadcom SoC specific ring manager. Two or more offload engines
+ * can share same Broadcom SoC specific ring manager due to this Broadcom
+ * SoC specific ring manager driver is implemented as a mailbox controller
+ * driver and offload engine drivers are implemented as mallbox clients.
+ *
+ * Typically, Broadcom SoC specific ring manager will implement larger
+ * number of hardware rings over one or more SBA hardware devices. By
+ * design, the internal buffer size of SBA hardware device is limited
+ * but all offload operations supported by SBA can be broken down into
+ * multiple small size requests and executed parallely on multiple SBA
+ * hardware devices for achieving high through-put.
+ *
+ * The Broadcom SBA RAID driver does not require any register programming
+ * except submitting request to SBA hardware device via mailbox channels.
+ * This driver implements a DMA device with one DMA channel using a single
+ * mailbox channel provided by Broadcom SoC specific ring manager driver.
+ * For having more SBA DMA channels, we can create more SBA device nodes
+ * in Broadcom SoC specific DTS based on number of hardware rings supported
+ * by Broadcom SoC ring manager.
+ */
+
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/list.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox/brcm-message.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/raid/pq.h>
+
+#include "dmaengine.h"
+
+/* ====== Driver macros and defines ===== */
+
+#define SBA_TYPE_SHIFT					48
+#define SBA_TYPE_MASK					GENMASK(1, 0)
+#define SBA_TYPE_A					0x0
+#define SBA_TYPE_B					0x2
+#define SBA_TYPE_C					0x3
+#define SBA_USER_DEF_SHIFT				32
+#define SBA_USER_DEF_MASK				GENMASK(15, 0)
+#define SBA_R_MDATA_SHIFT				24
+#define SBA_R_MDATA_MASK				GENMASK(7, 0)
+#define SBA_C_MDATA_MS_SHIFT				18
+#define SBA_C_MDATA_MS_MASK				GENMASK(1, 0)
+#define SBA_INT_SHIFT					17
+#define SBA_INT_MASK					BIT(0)
+#define SBA_RESP_SHIFT					16
+#define SBA_RESP_MASK					BIT(0)
+#define SBA_C_MDATA_SHIFT				8
+#define SBA_C_MDATA_MASK				GENMASK(7, 0)
+#define SBA_C_MDATA_BNUMx_SHIFT(__bnum)			(2 * (__bnum))
+#define SBA_C_MDATA_BNUMx_MASK				GENMASK(1, 0)
+#define SBA_C_MDATA_DNUM_SHIFT				5
+#define SBA_C_MDATA_DNUM_MASK				GENMASK(4, 0)
+#define SBA_C_MDATA_LS(__v)				((__v) & 0xff)
+#define SBA_C_MDATA_MS(__v)				(((__v) >> 8) & 0x3)
+#define SBA_CMD_SHIFT					0
+#define SBA_CMD_MASK					GENMASK(3, 0)
+#define SBA_CMD_ZERO_BUFFER				0x4
+#define SBA_CMD_ZERO_ALL_BUFFERS			0x8
+#define SBA_CMD_LOAD_BUFFER				0x9
+#define SBA_CMD_XOR					0xa
+#define SBA_CMD_GALOIS_XOR				0xb
+#define SBA_CMD_WRITE_BUFFER				0xc
+#define SBA_CMD_GALOIS					0xe
+
+#define SBA_MAX_REQ_PER_MBOX_CHANNEL			8192
+#define SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL		8
+
+/* Driver helper macros */
+#define to_sba_request(tx)		\
+	container_of(tx, struct sba_request, tx)
+#define to_sba_device(dchan)		\
+	container_of(dchan, struct sba_device, dma_chan)
+
+/* ===== Driver data structures ===== */
+
+enum sba_request_flags {
+	SBA_REQUEST_STATE_FREE		= 0x001,
+	SBA_REQUEST_STATE_ALLOCED	= 0x002,
+	SBA_REQUEST_STATE_PENDING	= 0x004,
+	SBA_REQUEST_STATE_ACTIVE	= 0x008,
+	SBA_REQUEST_STATE_ABORTED	= 0x010,
+	SBA_REQUEST_STATE_MASK		= 0x0ff,
+	SBA_REQUEST_FENCE		= 0x100,
+};
+
+struct sba_request {
+	/* Global state */
+	struct list_head node;
+	struct sba_device *sba;
+	u32 flags;
+	/* Chained requests management */
+	struct sba_request *first;
+	struct list_head next;
+	atomic_t next_pending_count;
+	/* BRCM message data */
+	struct brcm_message msg;
+	struct dma_async_tx_descriptor tx;
+	/* SBA commands */
+	struct brcm_sba_command cmds[0];
+};
+
+enum sba_version {
+	SBA_VER_1 = 0,
+	SBA_VER_2
+};
+
+struct sba_device {
+	/* Underlying device */
+	struct device *dev;
+	/* DT configuration parameters */
+	enum sba_version ver;
+	/* Derived configuration parameters */
+	u32 max_req;
+	u32 hw_buf_size;
+	u32 hw_resp_size;
+	u32 max_pq_coefs;
+	u32 max_pq_srcs;
+	u32 max_cmd_per_req;
+	u32 max_xor_srcs;
+	u32 max_resp_pool_size;
+	u32 max_cmds_pool_size;
+	/* Maibox client and Mailbox channels */
+	struct mbox_client client;
+	struct mbox_chan *mchan;
+	struct device *mbox_dev;
+	/* DMA device and DMA channel */
+	struct dma_device dma_dev;
+	struct dma_chan dma_chan;
+	/* DMA channel resources */
+	void *resp_base;
+	dma_addr_t resp_dma_base;
+	void *cmds_base;
+	dma_addr_t cmds_dma_base;
+	spinlock_t reqs_lock;
+	bool reqs_fence;
+	struct list_head reqs_alloc_list;
+	struct list_head reqs_pending_list;
+	struct list_head reqs_active_list;
+	struct list_head reqs_aborted_list;
+	struct list_head reqs_free_list;
+	/* DebugFS directory entries */
+	struct dentry *root;
+	struct dentry *stats;
+};
+
+/* ====== Command helper routines ===== */
+
+static inline u64 __pure sba_cmd_enc(u64 cmd, u32 val, u32 shift, u32 mask)
+{
+	cmd &= ~((u64)mask << shift);
+	cmd |= ((u64)(val & mask) << shift);
+	return cmd;
+}
+
+static inline u32 __pure sba_cmd_load_c_mdata(u32 b0)
+{
+	return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_write_c_mdata(u32 b0)
+{
+	return b0 & SBA_C_MDATA_BNUMx_MASK;
+}
+
+static inline u32 __pure sba_cmd_xor_c_mdata(u32 b1, u32 b0)
+{
+	return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+	       ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1));
+}
+
+static inline u32 __pure sba_cmd_pq_c_mdata(u32 d, u32 b1, u32 b0)
+{
+	return (b0 & SBA_C_MDATA_BNUMx_MASK) |
+	       ((b1 & SBA_C_MDATA_BNUMx_MASK) << SBA_C_MDATA_BNUMx_SHIFT(1)) |
+	       ((d & SBA_C_MDATA_DNUM_MASK) << SBA_C_MDATA_DNUM_SHIFT);
+}
+
+/* ====== General helper routines ===== */
+
+static struct sba_request *sba_alloc_request(struct sba_device *sba)
+{
+	bool found = false;
+	unsigned long flags;
+	struct sba_request *req = NULL;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+	list_for_each_entry(req, &sba->reqs_free_list, node) {
+		if (async_tx_test_ack(&req->tx)) {
+			list_move_tail(&req->node, &sba->reqs_alloc_list);
+			found = true;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+	if (!found) {
+		/*
+		 * We have no more free requests so, we peek
+		 * mailbox channels hoping few active requests
+		 * would have completed which will create more
+		 * room for new requests.
+		 */
+		mbox_client_peek_data(sba->mchan);
+		return NULL;
+	}
+
+	req->flags = SBA_REQUEST_STATE_ALLOCED;
+	req->first = req;
+	INIT_LIST_HEAD(&req->next);
+	atomic_set(&req->next_pending_count, 1);
+
+	dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+	async_tx_ack(&req->tx);
+
+	return req;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_pending_request(struct sba_device *sba,
+				 struct sba_request *req)
+{
+	lockdep_assert_held(&sba->reqs_lock);
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_PENDING;
+	list_move_tail(&req->node, &sba->reqs_pending_list);
+	if (list_empty(&sba->reqs_active_list))
+		sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static bool _sba_active_request(struct sba_device *sba,
+				struct sba_request *req)
+{
+	lockdep_assert_held(&sba->reqs_lock);
+	if (list_empty(&sba->reqs_active_list))
+		sba->reqs_fence = false;
+	if (sba->reqs_fence)
+		return false;
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_ACTIVE;
+	list_move_tail(&req->node, &sba->reqs_active_list);
+	if (req->flags & SBA_REQUEST_FENCE)
+		sba->reqs_fence = true;
+	return true;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_abort_request(struct sba_device *sba,
+			       struct sba_request *req)
+{
+	lockdep_assert_held(&sba->reqs_lock);
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_ABORTED;
+	list_move_tail(&req->node, &sba->reqs_aborted_list);
+	if (list_empty(&sba->reqs_active_list))
+		sba->reqs_fence = false;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_free_request(struct sba_device *sba,
+			      struct sba_request *req)
+{
+	lockdep_assert_held(&sba->reqs_lock);
+	req->flags &= ~SBA_REQUEST_STATE_MASK;
+	req->flags |= SBA_REQUEST_STATE_FREE;
+	list_move_tail(&req->node, &sba->reqs_free_list);
+	if (list_empty(&sba->reqs_active_list))
+		sba->reqs_fence = false;
+}
+
+static void sba_free_chained_requests(struct sba_request *req)
+{
+	unsigned long flags;
+	struct sba_request *nreq;
+	struct sba_device *sba = req->sba;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	_sba_free_request(sba, req);
+	list_for_each_entry(nreq, &req->next, next)
+		_sba_free_request(sba, nreq);
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_chain_request(struct sba_request *first,
+			      struct sba_request *req)
+{
+	unsigned long flags;
+	struct sba_device *sba = req->sba;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	list_add_tail(&req->next, &first->next);
+	req->first = first;
+	atomic_inc(&first->next_pending_count);
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_nonpending_requests(struct sba_device *sba)
+{
+	unsigned long flags;
+	struct sba_request *req, *req1;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	/* Freeup all alloced request */
+	list_for_each_entry_safe(req, req1, &sba->reqs_alloc_list, node)
+		_sba_free_request(sba, req);
+
+	/* Set all active requests as aborted */
+	list_for_each_entry_safe(req, req1, &sba->reqs_active_list, node)
+		_sba_abort_request(sba, req);
+
+	/*
+	 * Note: We expect that aborted request will be eventually
+	 * freed by sba_receive_message()
+	 */
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static void sba_cleanup_pending_requests(struct sba_device *sba)
+{
+	unsigned long flags;
+	struct sba_request *req, *req1;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	/* Freeup all pending request */
+	list_for_each_entry_safe(req, req1, &sba->reqs_pending_list, node)
+		_sba_free_request(sba, req);
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static int sba_send_mbox_request(struct sba_device *sba,
+				 struct sba_request *req)
+{
+	int ret = 0;
+
+	/* Send message for the request */
+	req->msg.error = 0;
+	ret = mbox_send_message(sba->mchan, &req->msg);
+	if (ret < 0) {
+		dev_err(sba->dev, "send message failed with error %d", ret);
+		return ret;
+	}
+
+	/* Check error returned by mailbox controller */
+	ret = req->msg.error;
+	if (ret < 0) {
+		dev_err(sba->dev, "message error %d", ret);
+	}
+
+	/* Signal txdone for mailbox channel */
+	mbox_client_txdone(sba->mchan, ret);
+
+	return ret;
+}
+
+/* Note: Must be called with sba->reqs_lock held */
+static void _sba_process_pending_requests(struct sba_device *sba)
+{
+	int ret;
+	u32 count;
+	struct sba_request *req;
+
+	/* Process few pending requests */
+	count = SBA_MAX_MSG_SEND_PER_MBOX_CHANNEL;
+	while (!list_empty(&sba->reqs_pending_list) && count) {
+		/* Get the first pending request */
+		req = list_first_entry(&sba->reqs_pending_list,
+				       struct sba_request, node);
+
+		/* Try to make request active */
+		if (!_sba_active_request(sba, req))
+			break;
+
+		/* Send request to mailbox channel */
+		ret = sba_send_mbox_request(sba, req);
+		if (ret < 0) {
+			_sba_pending_request(sba, req);
+			break;
+		}
+
+		count--;
+	}
+}
+
+static void sba_process_received_request(struct sba_device *sba,
+					 struct sba_request *req)
+{
+	unsigned long flags;
+	struct dma_async_tx_descriptor *tx;
+	struct sba_request *nreq, *first = req->first;
+
+	/* Process only after all chained requests are received */
+	if (!atomic_dec_return(&first->next_pending_count)) {
+		tx = &first->tx;
+
+		WARN_ON(tx->cookie < 0);
+		if (tx->cookie > 0) {
+			spin_lock_irqsave(&sba->reqs_lock, flags);
+			dma_cookie_complete(tx);
+			spin_unlock_irqrestore(&sba->reqs_lock, flags);
+			dmaengine_desc_get_callback_invoke(tx, NULL);
+			dma_descriptor_unmap(tx);
+			tx->callback = NULL;
+			tx->callback_result = NULL;
+		}
+
+		dma_run_dependencies(tx);
+
+		spin_lock_irqsave(&sba->reqs_lock, flags);
+
+		/* Free all requests chained to first request */
+		list_for_each_entry(nreq, &first->next, next)
+			_sba_free_request(sba, nreq);
+		INIT_LIST_HEAD(&first->next);
+
+		/* Free the first request */
+		_sba_free_request(sba, first);
+
+		/* Process pending requests */
+		_sba_process_pending_requests(sba);
+
+		spin_unlock_irqrestore(&sba->reqs_lock, flags);
+	}
+}
+
+static void sba_write_stats_in_seqfile(struct sba_device *sba,
+				       struct seq_file *file)
+{
+	unsigned long flags;
+	struct sba_request *req;
+	u32 free_count = 0, alloced_count = 0;
+	u32 pending_count = 0, active_count = 0, aborted_count = 0;
+
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+
+	list_for_each_entry(req, &sba->reqs_free_list, node)
+		if (async_tx_test_ack(&req->tx))
+			free_count++;
+
+	list_for_each_entry(req, &sba->reqs_alloc_list, node)
+		alloced_count++;
+
+	list_for_each_entry(req, &sba->reqs_pending_list, node)
+		pending_count++;
+
+	list_for_each_entry(req, &sba->reqs_active_list, node)
+		active_count++;
+
+	list_for_each_entry(req, &sba->reqs_aborted_list, node)
+		aborted_count++;
+
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+	seq_printf(file, "maximum requests   = %d\n", sba->max_req);
+	seq_printf(file, "free requests      = %d\n", free_count);
+	seq_printf(file, "alloced requests   = %d\n", alloced_count);
+	seq_printf(file, "pending requests   = %d\n", pending_count);
+	seq_printf(file, "active requests    = %d\n", active_count);
+	seq_printf(file, "aborted requests   = %d\n", aborted_count);
+}
+
+/* ====== DMAENGINE callbacks ===== */
+
+static void sba_free_chan_resources(struct dma_chan *dchan)
+{
+	/*
+	 * Channel resources are pre-alloced so we just free-up
+	 * whatever we can so that we can re-use pre-alloced
+	 * channel resources next time.
+	 */
+	sba_cleanup_nonpending_requests(to_sba_device(dchan));
+}
+
+static int sba_device_terminate_all(struct dma_chan *dchan)
+{
+	/* Cleanup all pending requests */
+	sba_cleanup_pending_requests(to_sba_device(dchan));
+
+	return 0;
+}
+
+static void sba_issue_pending(struct dma_chan *dchan)
+{
+	unsigned long flags;
+	struct sba_device *sba = to_sba_device(dchan);
+
+	/* Process pending requests */
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+	_sba_process_pending_requests(sba);
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+}
+
+static dma_cookie_t sba_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	unsigned long flags;
+	dma_cookie_t cookie;
+	struct sba_device *sba;
+	struct sba_request *req, *nreq;
+
+	if (unlikely(!tx))
+		return -EINVAL;
+
+	sba = to_sba_device(tx->chan);
+	req = to_sba_request(tx);
+
+	/* Assign cookie and mark all chained requests pending */
+	spin_lock_irqsave(&sba->reqs_lock, flags);
+	cookie = dma_cookie_assign(tx);
+	_sba_pending_request(sba, req);
+	list_for_each_entry(nreq, &req->next, next)
+		_sba_pending_request(sba, nreq);
+	spin_unlock_irqrestore(&sba->reqs_lock, flags);
+
+	return cookie;
+}
+
+static enum dma_status sba_tx_status(struct dma_chan *dchan,
+				     dma_cookie_t cookie,
+				     struct dma_tx_state *txstate)
+{
+	enum dma_status ret;
+	struct sba_device *sba = to_sba_device(dchan);
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	mbox_client_peek_data(sba->mchan);
+
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void sba_fillup_interrupt_msg(struct sba_request *req,
+				     struct brcm_sba_command *cmds,
+				     struct brcm_message *msg)
+{
+	u64 cmd;
+	u32 c_mdata;
+	dma_addr_t resp_dma = req->tx.phys;
+	struct brcm_sba_command *cmdsp = cmds;
+
+	/* Type-B command to load dummy data into buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	c_mdata = sba_cmd_load_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+	cmdsp->data = resp_dma;
+	cmdsp->data_len = req->sba->hw_resp_size;
+	cmdsp++;
+
+	/* Type-A command to write buf0 to dummy location */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, req->sba->hw_resp_size,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, 0x1,
+			  SBA_RESP_SHIFT, SBA_RESP_MASK);
+	c_mdata = sba_cmd_write_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	if (req->sba->hw_resp_size) {
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+		cmdsp->resp = resp_dma;
+		cmdsp->resp_len = req->sba->hw_resp_size;
+	}
+	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+	cmdsp->data = resp_dma;
+	cmdsp->data_len = req->sba->hw_resp_size;
+	cmdsp++;
+
+	/* Fillup brcm_message */
+	msg->type = BRCM_MESSAGE_SBA;
+	msg->sba.cmds = cmds;
+	msg->sba.cmds_count = cmdsp - cmds;
+	msg->ctx = req;
+	msg->error = 0;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_interrupt(struct dma_chan *dchan, unsigned long flags)
+{
+	struct sba_request *req = NULL;
+	struct sba_device *sba = to_sba_device(dchan);
+
+	/* Alloc new request */
+	req = sba_alloc_request(sba);
+	if (!req)
+		return NULL;
+
+	/*
+	 * Force fence so that no requests are submitted
+	 * until DMA callback for this request is invoked.
+	 */
+	req->flags |= SBA_REQUEST_FENCE;
+
+	/* Fillup request message */
+	sba_fillup_interrupt_msg(req, req->cmds, &req->msg);
+
+	/* Init async_tx descriptor */
+	req->tx.flags = flags;
+	req->tx.cookie = -EBUSY;
+
+	return &req->tx;
+}
+
+static void sba_fillup_memcpy_msg(struct sba_request *req,
+				  struct brcm_sba_command *cmds,
+				  struct brcm_message *msg,
+				  dma_addr_t msg_offset, size_t msg_len,
+				  dma_addr_t dst, dma_addr_t src)
+{
+	u64 cmd;
+	u32 c_mdata;
+	dma_addr_t resp_dma = req->tx.phys;
+	struct brcm_sba_command *cmdsp = cmds;
+
+	/* Type-B command to load data into buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	c_mdata = sba_cmd_load_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+	cmdsp->data = src + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+	/* Type-A command to write buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, 0x1,
+			  SBA_RESP_SHIFT, SBA_RESP_MASK);
+	c_mdata = sba_cmd_write_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	if (req->sba->hw_resp_size) {
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+		cmdsp->resp = resp_dma;
+		cmdsp->resp_len = req->sba->hw_resp_size;
+	}
+	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+	cmdsp->data = dst + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+	/* Fillup brcm_message */
+	msg->type = BRCM_MESSAGE_SBA;
+	msg->sba.cmds = cmds;
+	msg->sba.cmds_count = cmdsp - cmds;
+	msg->ctx = req;
+	msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_memcpy_req(struct sba_device *sba,
+			dma_addr_t off, dma_addr_t dst, dma_addr_t src,
+			size_t len, unsigned long flags)
+{
+	struct sba_request *req = NULL;
+
+	/* Alloc new request */
+	req = sba_alloc_request(sba);
+	if (!req)
+		return NULL;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
+
+	/* Fillup request message */
+	sba_fillup_memcpy_msg(req, req->cmds, &req->msg,
+			      off, len, dst, src);
+
+	/* Init async_tx descriptor */
+	req->tx.flags = flags;
+	req->tx.cookie = -EBUSY;
+
+	return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+		    size_t len, unsigned long flags)
+{
+	size_t req_len;
+	dma_addr_t off = 0;
+	struct sba_device *sba = to_sba_device(dchan);
+	struct sba_request *first = NULL, *req;
+
+	/* Create chained requests where each request is upto hw_buf_size */
+	while (len) {
+		req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+		req = sba_prep_dma_memcpy_req(sba, off, dst, src,
+					      req_len, flags);
+		if (!req) {
+			if (first)
+				sba_free_chained_requests(first);
+			return NULL;
+		}
+
+		if (first)
+			sba_chain_request(first, req);
+		else
+			first = req;
+
+		off += req_len;
+		len -= req_len;
+	}
+
+	return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_xor_msg(struct sba_request *req,
+				struct brcm_sba_command *cmds,
+				struct brcm_message *msg,
+				dma_addr_t msg_offset, size_t msg_len,
+				dma_addr_t dst, dma_addr_t *src, u32 src_cnt)
+{
+	u64 cmd;
+	u32 c_mdata;
+	unsigned int i;
+	dma_addr_t resp_dma = req->tx.phys;
+	struct brcm_sba_command *cmdsp = cmds;
+
+	/* Type-B command to load data into buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	c_mdata = sba_cmd_load_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+	cmdsp->data = src[0] + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+	/* Type-B commands to xor data with buf0 and put it back in buf0 */
+	for (i = 1; i < src_cnt; i++) {
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_xor_c_mdata(0, 0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = src[i] + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Type-A command to write buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, 0x1,
+			  SBA_RESP_SHIFT, SBA_RESP_MASK);
+	c_mdata = sba_cmd_write_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	if (req->sba->hw_resp_size) {
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+		cmdsp->resp = resp_dma;
+		cmdsp->resp_len = req->sba->hw_resp_size;
+	}
+	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+	cmdsp->data = dst + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+	/* Fillup brcm_message */
+	msg->type = BRCM_MESSAGE_SBA;
+	msg->sba.cmds = cmds;
+	msg->sba.cmds_count = cmdsp - cmds;
+	msg->ctx = req;
+	msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_xor_req(struct sba_device *sba,
+		     dma_addr_t off, dma_addr_t dst, dma_addr_t *src,
+		     u32 src_cnt, size_t len, unsigned long flags)
+{
+	struct sba_request *req = NULL;
+
+	/* Alloc new request */
+	req = sba_alloc_request(sba);
+	if (!req)
+		return NULL;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
+
+	/* Fillup request message */
+	sba_fillup_xor_msg(req, req->cmds, &req->msg,
+			   off, len, dst, src, src_cnt);
+
+	/* Init async_tx descriptor */
+	req->tx.flags = flags;
+	req->tx.cookie = -EBUSY;
+
+	return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_xor(struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+		 u32 src_cnt, size_t len, unsigned long flags)
+{
+	size_t req_len;
+	dma_addr_t off = 0;
+	struct sba_device *sba = to_sba_device(dchan);
+	struct sba_request *first = NULL, *req;
+
+	/* Sanity checks */
+	if (unlikely(src_cnt > sba->max_xor_srcs))
+		return NULL;
+
+	/* Create chained requests where each request is upto hw_buf_size */
+	while (len) {
+		req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+		req = sba_prep_dma_xor_req(sba, off, dst, src, src_cnt,
+					   req_len, flags);
+		if (!req) {
+			if (first)
+				sba_free_chained_requests(first);
+			return NULL;
+		}
+
+		if (first)
+			sba_chain_request(first, req);
+		else
+			first = req;
+
+		off += req_len;
+		len -= req_len;
+	}
+
+	return (first) ? &first->tx : NULL;
+}
+
+static void sba_fillup_pq_msg(struct sba_request *req,
+				bool pq_continue,
+				struct brcm_sba_command *cmds,
+				struct brcm_message *msg,
+				dma_addr_t msg_offset, size_t msg_len,
+				dma_addr_t *dst_p, dma_addr_t *dst_q,
+				const u8 *scf, dma_addr_t *src, u32 src_cnt)
+{
+	u64 cmd;
+	u32 c_mdata;
+	unsigned int i;
+	dma_addr_t resp_dma = req->tx.phys;
+	struct brcm_sba_command *cmdsp = cmds;
+
+	if (pq_continue) {
+		/* Type-B command to load old P into buf0 */
+		if (dst_p) {
+			cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+			cmd = sba_cmd_enc(cmd, msg_len,
+				SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+			c_mdata = sba_cmd_load_c_mdata(0);
+			cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+			cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+				SBA_CMD_SHIFT, SBA_CMD_MASK);
+			cmdsp->cmd = cmd;
+			*cmdsp->cmd_dma = cpu_to_le64(cmd);
+			cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+			cmdsp->data = *dst_p + msg_offset;
+			cmdsp->data_len = msg_len;
+			cmdsp++;
+		}
+
+		/* Type-B command to load old Q into buf1 */
+		if (dst_q) {
+			cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+			cmd = sba_cmd_enc(cmd, msg_len,
+				SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+			c_mdata = sba_cmd_load_c_mdata(1);
+			cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+			cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+				SBA_CMD_SHIFT, SBA_CMD_MASK);
+			cmdsp->cmd = cmd;
+			*cmdsp->cmd_dma = cpu_to_le64(cmd);
+			cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+			cmdsp->data = *dst_q + msg_offset;
+			cmdsp->data_len = msg_len;
+			cmdsp++;
+		}
+	} else {
+		/* Type-A command to zero all buffers */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+		cmdsp++;
+	}
+
+	/* Type-B commands for generate P onto buf0 and Q onto buf1 */
+	for (i = 0; i < src_cnt; i++) {
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_pq_c_mdata(raid6_gflog[scf[i]], 1, 0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+				  SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS_XOR,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = src[i] + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Type-A command to write buf0 */
+	if (dst_p) {
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		cmd = sba_cmd_enc(cmd, 0x1,
+				  SBA_RESP_SHIFT, SBA_RESP_MASK);
+		c_mdata = sba_cmd_write_c_mdata(0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+		if (req->sba->hw_resp_size) {
+			cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+			cmdsp->resp = resp_dma;
+			cmdsp->resp_len = req->sba->hw_resp_size;
+		}
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+		cmdsp->data = *dst_p + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Type-A command to write buf1 */
+	if (dst_q) {
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		cmd = sba_cmd_enc(cmd, 0x1,
+				  SBA_RESP_SHIFT, SBA_RESP_MASK);
+		c_mdata = sba_cmd_write_c_mdata(1);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+		if (req->sba->hw_resp_size) {
+			cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+			cmdsp->resp = resp_dma;
+			cmdsp->resp_len = req->sba->hw_resp_size;
+		}
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+		cmdsp->data = *dst_q + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Fillup brcm_message */
+	msg->type = BRCM_MESSAGE_SBA;
+	msg->sba.cmds = cmds;
+	msg->sba.cmds_count = cmdsp - cmds;
+	msg->ctx = req;
+	msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_pq_req(struct sba_device *sba, dma_addr_t off,
+		    dma_addr_t *dst_p, dma_addr_t *dst_q, dma_addr_t *src,
+		    u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+	struct sba_request *req = NULL;
+
+	/* Alloc new request */
+	req = sba_alloc_request(sba);
+	if (!req)
+		return NULL;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
+
+	/* Fillup request messages */
+	sba_fillup_pq_msg(req, dmaf_continue(flags),
+			  req->cmds, &req->msg,
+			  off, len, dst_p, dst_q, scf, src, src_cnt);
+
+	/* Init async_tx descriptor */
+	req->tx.flags = flags;
+	req->tx.cookie = -EBUSY;
+
+	return req;
+}
+
+static void sba_fillup_pq_single_msg(struct sba_request *req,
+				bool pq_continue,
+				struct brcm_sba_command *cmds,
+				struct brcm_message *msg,
+				dma_addr_t msg_offset, size_t msg_len,
+				dma_addr_t *dst_p, dma_addr_t *dst_q,
+				dma_addr_t src, u8 scf)
+{
+	u64 cmd;
+	u32 c_mdata;
+	u8 pos, dpos = raid6_gflog[scf];
+	dma_addr_t resp_dma = req->tx.phys;
+	struct brcm_sba_command *cmdsp = cmds;
+
+	if (!dst_p)
+		goto skip_p;
+
+	if (pq_continue) {
+		/* Type-B command to load old P into buf0 */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_load_c_mdata(0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = *dst_p + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+
+		/*
+		 * Type-B commands to xor data with buf0 and put it
+		 * back in buf0
+		 */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_xor_c_mdata(0, 0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = src + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	} else {
+		/* Type-B command to load old P into buf0 */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_load_c_mdata(0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_LOAD_BUFFER,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = src + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Type-A command to write buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, 0x1,
+			  SBA_RESP_SHIFT, SBA_RESP_MASK);
+	c_mdata = sba_cmd_write_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	if (req->sba->hw_resp_size) {
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+		cmdsp->resp = resp_dma;
+		cmdsp->resp_len = req->sba->hw_resp_size;
+	}
+	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+	cmdsp->data = *dst_p + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+skip_p:
+	if (!dst_q)
+		goto skip_q;
+
+	/* Type-A command to zero all buffers */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_ZERO_ALL_BUFFERS,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	cmdsp++;
+
+	if (dpos == 255)
+		goto skip_q_computation;
+	pos = (dpos < req->sba->max_pq_coefs) ?
+		dpos : (req->sba->max_pq_coefs - 1);
+
+	/*
+	 * Type-B command to generate initial Q from data
+	 * and store output into buf0
+	 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	c_mdata = sba_cmd_pq_c_mdata(pos, 0, 0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+			  SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+	cmdsp->data = src + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+	dpos -= pos;
+
+	/* Multiple Type-A command to generate final Q */
+	while (dpos) {
+		pos = (dpos < req->sba->max_pq_coefs) ?
+			dpos : (req->sba->max_pq_coefs - 1);
+
+		/*
+		 * Type-A command to generate Q with buf0 and
+		 * buf1 store result in buf0
+		 */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_pq_c_mdata(pos, 0, 1);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_MS(c_mdata),
+				  SBA_C_MDATA_MS_SHIFT, SBA_C_MDATA_MS_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_GALOIS,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+		cmdsp++;
+
+		dpos -= pos;
+	}
+
+skip_q_computation:
+	if (pq_continue) {
+		/*
+		 * Type-B command to XOR previous output with
+		 * buf0 and write it into buf0
+		 */
+		cmd = sba_cmd_enc(0x0, SBA_TYPE_B,
+				  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+		cmd = sba_cmd_enc(cmd, msg_len,
+				  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+		c_mdata = sba_cmd_xor_c_mdata(0, 0);
+		cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+				  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+		cmd = sba_cmd_enc(cmd, SBA_CMD_XOR,
+				  SBA_CMD_SHIFT, SBA_CMD_MASK);
+		cmdsp->cmd = cmd;
+		*cmdsp->cmd_dma = cpu_to_le64(cmd);
+		cmdsp->flags = BRCM_SBA_CMD_TYPE_B;
+		cmdsp->data = *dst_q + msg_offset;
+		cmdsp->data_len = msg_len;
+		cmdsp++;
+	}
+
+	/* Type-A command to write buf0 */
+	cmd = sba_cmd_enc(0x0, SBA_TYPE_A,
+			  SBA_TYPE_SHIFT, SBA_TYPE_MASK);
+	cmd = sba_cmd_enc(cmd, msg_len,
+			  SBA_USER_DEF_SHIFT, SBA_USER_DEF_MASK);
+	cmd = sba_cmd_enc(cmd, 0x1,
+			  SBA_RESP_SHIFT, SBA_RESP_MASK);
+	c_mdata = sba_cmd_write_c_mdata(0);
+	cmd = sba_cmd_enc(cmd, SBA_C_MDATA_LS(c_mdata),
+			  SBA_C_MDATA_SHIFT, SBA_C_MDATA_MASK);
+	cmd = sba_cmd_enc(cmd, SBA_CMD_WRITE_BUFFER,
+			  SBA_CMD_SHIFT, SBA_CMD_MASK);
+	cmdsp->cmd = cmd;
+	*cmdsp->cmd_dma = cpu_to_le64(cmd);
+	cmdsp->flags = BRCM_SBA_CMD_TYPE_A;
+	if (req->sba->hw_resp_size) {
+		cmdsp->flags |= BRCM_SBA_CMD_HAS_RESP;
+		cmdsp->resp = resp_dma;
+		cmdsp->resp_len = req->sba->hw_resp_size;
+	}
+	cmdsp->flags |= BRCM_SBA_CMD_HAS_OUTPUT;
+	cmdsp->data = *dst_q + msg_offset;
+	cmdsp->data_len = msg_len;
+	cmdsp++;
+
+skip_q:
+	/* Fillup brcm_message */
+	msg->type = BRCM_MESSAGE_SBA;
+	msg->sba.cmds = cmds;
+	msg->sba.cmds_count = cmdsp - cmds;
+	msg->ctx = req;
+	msg->error = 0;
+}
+
+static struct sba_request *
+sba_prep_dma_pq_single_req(struct sba_device *sba, dma_addr_t off,
+			   dma_addr_t *dst_p, dma_addr_t *dst_q,
+			   dma_addr_t src, u8 scf, size_t len,
+			   unsigned long flags)
+{
+	struct sba_request *req = NULL;
+
+	/* Alloc new request */
+	req = sba_alloc_request(sba);
+	if (!req)
+		return NULL;
+	if (flags & DMA_PREP_FENCE)
+		req->flags |= SBA_REQUEST_FENCE;
+
+	/* Fillup request messages */
+	sba_fillup_pq_single_msg(req,  dmaf_continue(flags),
+				 req->cmds, &req->msg, off, len,
+				 dst_p, dst_q, src, scf);
+
+	/* Init async_tx descriptor */
+	req->tx.flags = flags;
+	req->tx.cookie = -EBUSY;
+
+	return req;
+}
+
+static struct dma_async_tx_descriptor *
+sba_prep_dma_pq(struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+		u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+	u32 i, dst_q_index;
+	size_t req_len;
+	bool slow = false;
+	dma_addr_t off = 0;
+	dma_addr_t *dst_p = NULL, *dst_q = NULL;
+	struct sba_device *sba = to_sba_device(dchan);
+	struct sba_request *first = NULL, *req;
+
+	/* Sanity checks */
+	if (unlikely(src_cnt > sba->max_pq_srcs))
+		return NULL;
+	for (i = 0; i < src_cnt; i++)
+		if (sba->max_pq_coefs <= raid6_gflog[scf[i]])
+			slow = true;
+
+	/* Figure-out P and Q destination addresses */
+	if (!(flags & DMA_PREP_PQ_DISABLE_P))
+		dst_p = &dst[0];
+	if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+		dst_q = &dst[1];
+
+	/* Create chained requests where each request is upto hw_buf_size */
+	while (len) {
+		req_len = (len < sba->hw_buf_size) ? len : sba->hw_buf_size;
+
+		if (slow) {
+			dst_q_index = src_cnt;
+
+			if (dst_q) {
+				for (i = 0; i < src_cnt; i++) {
+					if (*dst_q == src[i]) {
+						dst_q_index = i;
+						break;
+					}
+				}
+			}
+
+			if (dst_q_index < src_cnt) {
+				i = dst_q_index;
+				req = sba_prep_dma_pq_single_req(sba,
+					off, dst_p, dst_q, src[i], scf[i],
+					req_len, flags | DMA_PREP_FENCE);
+				if (!req)
+					goto fail;
+
+				if (first)
+					sba_chain_request(first, req);
+				else
+					first = req;
+
+				flags |= DMA_PREP_CONTINUE;
+			}
+
+			for (i = 0; i < src_cnt; i++) {
+				if (dst_q_index == i)
+					continue;
+
+				req = sba_prep_dma_pq_single_req(sba,
+					off, dst_p, dst_q, src[i], scf[i],
+					req_len, flags | DMA_PREP_FENCE);
+				if (!req)
+					goto fail;
+
+				if (first)
+					sba_chain_request(first, req);
+				else
+					first = req;
+
+				flags |= DMA_PREP_CONTINUE;
+			}
+		} else {
+			req = sba_prep_dma_pq_req(sba, off,
+						  dst_p, dst_q, src, src_cnt,
+						  scf, req_len, flags);
+			if (!req)
+				goto fail;
+
+			if (first)
+				sba_chain_request(first, req);
+			else
+				first = req;
+		}
+
+		off += req_len;
+		len -= req_len;
+	}
+
+	return (first) ? &first->tx : NULL;
+
+fail:
+	if (first)
+		sba_free_chained_requests(first);
+	return NULL;
+}
+
+/* ====== Mailbox callbacks ===== */
+
+static void sba_receive_message(struct mbox_client *cl, void *msg)
+{
+	struct brcm_message *m = msg;
+	struct sba_request *req = m->ctx;
+	struct sba_device *sba = req->sba;
+
+	/* Error count if message has error */
+	if (m->error < 0)
+		dev_err(sba->dev, "%s got message with error %d",
+			dma_chan_name(&sba->dma_chan), m->error);
+
+	/* Process received request */
+	sba_process_received_request(sba, req);
+}
+
+/* ====== Debugfs callbacks ====== */
+
+static int sba_debugfs_stats_show(struct seq_file *file, void *offset)
+{
+	struct platform_device *pdev = to_platform_device(file->private);
+	struct sba_device *sba = platform_get_drvdata(pdev);
+
+	/* Write stats in file */
+	sba_write_stats_in_seqfile(sba, file);
+
+	return 0;
+}
+
+/* ====== Platform driver routines ===== */
+
+static int sba_prealloc_channel_resources(struct sba_device *sba)
+{
+	int i, j, ret = 0;
+	struct sba_request *req = NULL;
+
+	sba->resp_base = dma_alloc_coherent(sba->mbox_dev,
+					    sba->max_resp_pool_size,
+					    &sba->resp_dma_base, GFP_KERNEL);
+	if (!sba->resp_base)
+		return -ENOMEM;
+
+	sba->cmds_base = dma_alloc_coherent(sba->mbox_dev,
+					    sba->max_cmds_pool_size,
+					    &sba->cmds_dma_base, GFP_KERNEL);
+	if (!sba->cmds_base) {
+		ret = -ENOMEM;
+		goto fail_free_resp_pool;
+	}
+
+	spin_lock_init(&sba->reqs_lock);
+	sba->reqs_fence = false;
+	INIT_LIST_HEAD(&sba->reqs_alloc_list);
+	INIT_LIST_HEAD(&sba->reqs_pending_list);
+	INIT_LIST_HEAD(&sba->reqs_active_list);
+	INIT_LIST_HEAD(&sba->reqs_aborted_list);
+	INIT_LIST_HEAD(&sba->reqs_free_list);
+
+	for (i = 0; i < sba->max_req; i++) {
+		req = devm_kzalloc(sba->dev,
+				   struct_size(req, cmds, sba->max_cmd_per_req),
+				   GFP_KERNEL);
+		if (!req) {
+			ret = -ENOMEM;
+			goto fail_free_cmds_pool;
+		}
+		INIT_LIST_HEAD(&req->node);
+		req->sba = sba;
+		req->flags = SBA_REQUEST_STATE_FREE;
+		INIT_LIST_HEAD(&req->next);
+		atomic_set(&req->next_pending_count, 0);
+		for (j = 0; j < sba->max_cmd_per_req; j++) {
+			req->cmds[j].cmd = 0;
+			req->cmds[j].cmd_dma = sba->cmds_base +
+				(i * sba->max_cmd_per_req + j) * sizeof(u64);
+			req->cmds[j].cmd_dma_addr = sba->cmds_dma_base +
+				(i * sba->max_cmd_per_req + j) * sizeof(u64);
+			req->cmds[j].flags = 0;
+		}
+		memset(&req->msg, 0, sizeof(req->msg));
+		dma_async_tx_descriptor_init(&req->tx, &sba->dma_chan);
+		async_tx_ack(&req->tx);
+		req->tx.tx_submit = sba_tx_submit;
+		req->tx.phys = sba->resp_dma_base + i * sba->hw_resp_size;
+		list_add_tail(&req->node, &sba->reqs_free_list);
+	}
+
+	return 0;
+
+fail_free_cmds_pool:
+	dma_free_coherent(sba->mbox_dev,
+			  sba->max_cmds_pool_size,
+			  sba->cmds_base, sba->cmds_dma_base);
+fail_free_resp_pool:
+	dma_free_coherent(sba->mbox_dev,
+			  sba->max_resp_pool_size,
+			  sba->resp_base, sba->resp_dma_base);
+	return ret;
+}
+
+static void sba_freeup_channel_resources(struct sba_device *sba)
+{
+	dmaengine_terminate_all(&sba->dma_chan);
+	dma_free_coherent(sba->mbox_dev, sba->max_cmds_pool_size,
+			  sba->cmds_base, sba->cmds_dma_base);
+	dma_free_coherent(sba->mbox_dev, sba->max_resp_pool_size,
+			  sba->resp_base, sba->resp_dma_base);
+	sba->resp_base = NULL;
+	sba->resp_dma_base = 0;
+}
+
+static int sba_async_register(struct sba_device *sba)
+{
+	int ret;
+	struct dma_device *dma_dev = &sba->dma_dev;
+
+	/* Initialize DMA channel cookie */
+	sba->dma_chan.device = dma_dev;
+	dma_cookie_init(&sba->dma_chan);
+
+	/* Initialize DMA device capability mask */
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+	dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+	/*
+	 * Set mailbox channel device as the base device of
+	 * our dma_device because the actual memory accesses
+	 * will be done by mailbox controller
+	 */
+	dma_dev->dev = sba->mbox_dev;
+
+	/* Set base prep routines */
+	dma_dev->device_free_chan_resources = sba_free_chan_resources;
+	dma_dev->device_terminate_all = sba_device_terminate_all;
+	dma_dev->device_issue_pending = sba_issue_pending;
+	dma_dev->device_tx_status = sba_tx_status;
+
+	/* Set interrupt routine */
+	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_interrupt = sba_prep_dma_interrupt;
+
+	/* Set memcpy routine */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = sba_prep_dma_memcpy;
+
+	/* Set xor routine and capability */
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->device_prep_dma_xor = sba_prep_dma_xor;
+		dma_dev->max_xor = sba->max_xor_srcs;
+	}
+
+	/* Set pq routine and capability */
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		dma_dev->device_prep_dma_pq = sba_prep_dma_pq;
+		dma_set_maxpq(dma_dev, sba->max_pq_srcs, 0);
+	}
+
+	/* Initialize DMA device channel list */
+	INIT_LIST_HEAD(&dma_dev->channels);
+	list_add_tail(&sba->dma_chan.device_node, &dma_dev->channels);
+
+	/* Register with Linux async DMA framework*/
+	ret = dma_async_device_register(dma_dev);
+	if (ret) {
+		dev_err(sba->dev, "async device register error %d", ret);
+		return ret;
+	}
+
+	dev_info(sba->dev, "%s capabilities: %s%s%s%s\n",
+	dma_chan_name(&sba->dma_chan),
+	dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "interrupt " : "",
+	dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "",
+	dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+	dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "");
+
+	return 0;
+}
+
+static int sba_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct sba_device *sba;
+	struct platform_device *mbox_pdev;
+	struct of_phandle_args args;
+
+	/* Allocate main SBA struct */
+	sba = devm_kzalloc(&pdev->dev, sizeof(*sba), GFP_KERNEL);
+	if (!sba)
+		return -ENOMEM;
+
+	sba->dev = &pdev->dev;
+	platform_set_drvdata(pdev, sba);
+
+	/* Number of mailbox channels should be atleast 1 */
+	ret = of_count_phandle_with_args(pdev->dev.of_node,
+					 "mboxes", "#mbox-cells");
+	if (ret <= 0)
+		return -ENODEV;
+
+	/* Determine SBA version from DT compatible string */
+	if (of_device_is_compatible(sba->dev->of_node, "brcm,iproc-sba"))
+		sba->ver = SBA_VER_1;
+	else if (of_device_is_compatible(sba->dev->of_node,
+					 "brcm,iproc-sba-v2"))
+		sba->ver = SBA_VER_2;
+	else
+		return -ENODEV;
+
+	/* Derived Configuration parameters */
+	switch (sba->ver) {
+	case SBA_VER_1:
+		sba->hw_buf_size = 4096;
+		sba->hw_resp_size = 8;
+		sba->max_pq_coefs = 6;
+		sba->max_pq_srcs = 6;
+		break;
+	case SBA_VER_2:
+		sba->hw_buf_size = 4096;
+		sba->hw_resp_size = 8;
+		sba->max_pq_coefs = 30;
+		/*
+		 * We can support max_pq_srcs == max_pq_coefs because
+		 * we are limited by number of SBA commands that we can
+		 * fit in one message for underlying ring manager HW.
+		 */
+		sba->max_pq_srcs = 12;
+		break;
+	default:
+		return -EINVAL;
+	}
+	sba->max_req = SBA_MAX_REQ_PER_MBOX_CHANNEL;
+	sba->max_cmd_per_req = sba->max_pq_srcs + 3;
+	sba->max_xor_srcs = sba->max_cmd_per_req - 1;
+	sba->max_resp_pool_size = sba->max_req * sba->hw_resp_size;
+	sba->max_cmds_pool_size = sba->max_req *
+				  sba->max_cmd_per_req * sizeof(u64);
+
+	/* Setup mailbox client */
+	sba->client.dev			= &pdev->dev;
+	sba->client.rx_callback		= sba_receive_message;
+	sba->client.tx_block		= false;
+	sba->client.knows_txdone	= true;
+	sba->client.tx_tout		= 0;
+
+	/* Request mailbox channel */
+	sba->mchan = mbox_request_channel(&sba->client, 0);
+	if (IS_ERR(sba->mchan)) {
+		ret = PTR_ERR(sba->mchan);
+		goto fail_free_mchan;
+	}
+
+	/* Find-out underlying mailbox device */
+	ret = of_parse_phandle_with_args(pdev->dev.of_node,
+					 "mboxes", "#mbox-cells", 0, &args);
+	if (ret)
+		goto fail_free_mchan;
+	mbox_pdev = of_find_device_by_node(args.np);
+	of_node_put(args.np);
+	if (!mbox_pdev) {
+		ret = -ENODEV;
+		goto fail_free_mchan;
+	}
+	sba->mbox_dev = &mbox_pdev->dev;
+
+	/* Prealloc channel resource */
+	ret = sba_prealloc_channel_resources(sba);
+	if (ret)
+		goto fail_free_mchan;
+
+	/* Check availability of debugfs */
+	if (!debugfs_initialized())
+		goto skip_debugfs;
+
+	/* Create debugfs root entry */
+	sba->root = debugfs_create_dir(dev_name(sba->dev), NULL);
+	if (IS_ERR_OR_NULL(sba->root)) {
+		dev_err(sba->dev, "failed to create debugfs root entry\n");
+		sba->root = NULL;
+		goto skip_debugfs;
+	}
+
+	/* Create debugfs stats entry */
+	sba->stats = debugfs_create_devm_seqfile(sba->dev, "stats", sba->root,
+						 sba_debugfs_stats_show);
+	if (IS_ERR_OR_NULL(sba->stats))
+		dev_err(sba->dev, "failed to create debugfs stats file\n");
+skip_debugfs:
+
+	/* Register DMA device with Linux async framework */
+	ret = sba_async_register(sba);
+	if (ret)
+		goto fail_free_resources;
+
+	/* Print device info */
+	dev_info(sba->dev, "%s using SBAv%d mailbox channel from %s",
+		 dma_chan_name(&sba->dma_chan), sba->ver+1,
+		 dev_name(sba->mbox_dev));
+
+	return 0;
+
+fail_free_resources:
+	debugfs_remove_recursive(sba->root);
+	sba_freeup_channel_resources(sba);
+fail_free_mchan:
+	mbox_free_channel(sba->mchan);
+	return ret;
+}
+
+static int sba_remove(struct platform_device *pdev)
+{
+	struct sba_device *sba = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&sba->dma_dev);
+
+	debugfs_remove_recursive(sba->root);
+
+	sba_freeup_channel_resources(sba);
+
+	mbox_free_channel(sba->mchan);
+
+	return 0;
+}
+
+static const struct of_device_id sba_of_match[] = {
+	{ .compatible = "brcm,iproc-sba", },
+	{ .compatible = "brcm,iproc-sba-v2", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, sba_of_match);
+
+static struct platform_driver sba_driver = {
+	.probe = sba_probe,
+	.remove = sba_remove,
+	.driver = {
+		.name = "bcm-sba-raid",
+		.of_match_table = sba_of_match,
+	},
+};
+module_platform_driver(sba_driver);
+
+MODULE_DESCRIPTION("Broadcom SBA RAID driver");
+MODULE_AUTHOR("Anup Patel <anup.patel@broadcom.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
new file mode 100644
index 0000000..847f84a
--- /dev/null
+++ b/drivers/dma/bcm2835-dma.c
@@ -0,0 +1,1067 @@
+/*
+ * BCM2835 DMA engine support
+ *
+ * This driver only supports cyclic DMA transfers
+ * as needed for the I2S module.
+ *
+ * Author:      Florian Meier <florian.meier@koalo.de>
+ *              Copyright 2013
+ *
+ * Based on
+ *	OMAP DMAengine support by Russell King
+ *
+ *	BCM2708 DMA Driver
+ *	Copyright (C) 2010 Broadcom
+ *
+ *	Raspberry Pi PCM I2S ALSA Driver
+ *	Copyright (c) by Phil Poole 2013
+ *
+ *	MARVELL MMP Peripheral DMA Driver
+ *	Copyright 2012 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED 14
+#define BCM2835_DMA_CHAN_NAME_SIZE 8
+
+struct bcm2835_dmadev {
+	struct dma_device ddev;
+	spinlock_t lock;
+	void __iomem *base;
+	struct device_dma_parameters dma_parms;
+};
+
+struct bcm2835_dma_cb {
+	uint32_t info;
+	uint32_t src;
+	uint32_t dst;
+	uint32_t length;
+	uint32_t stride;
+	uint32_t next;
+	uint32_t pad[2];
+};
+
+struct bcm2835_cb_entry {
+	struct bcm2835_dma_cb *cb;
+	dma_addr_t paddr;
+};
+
+struct bcm2835_chan {
+	struct virt_dma_chan vc;
+	struct list_head node;
+
+	struct dma_slave_config	cfg;
+	unsigned int dreq;
+
+	int ch;
+	struct bcm2835_desc *desc;
+	struct dma_pool *cb_pool;
+
+	void __iomem *chan_base;
+	int irq_number;
+	unsigned int irq_flags;
+
+	bool is_lite_channel;
+};
+
+struct bcm2835_desc {
+	struct bcm2835_chan *c;
+	struct virt_dma_desc vd;
+	enum dma_transfer_direction dir;
+
+	unsigned int frames;
+	size_t size;
+
+	bool cyclic;
+
+	struct bcm2835_cb_entry cb_list[];
+};
+
+#define BCM2835_DMA_CS		0x00
+#define BCM2835_DMA_ADDR	0x04
+#define BCM2835_DMA_TI		0x08
+#define BCM2835_DMA_SOURCE_AD	0x0c
+#define BCM2835_DMA_DEST_AD	0x10
+#define BCM2835_DMA_LEN		0x14
+#define BCM2835_DMA_STRIDE	0x18
+#define BCM2835_DMA_NEXTCB	0x1c
+#define BCM2835_DMA_DEBUG	0x20
+
+/* DMA CS Control and Status bits */
+#define BCM2835_DMA_ACTIVE	BIT(0)  /* activate the DMA */
+#define BCM2835_DMA_END		BIT(1)  /* current CB has ended */
+#define BCM2835_DMA_INT		BIT(2)  /* interrupt status */
+#define BCM2835_DMA_DREQ	BIT(3)  /* DREQ state */
+#define BCM2835_DMA_ISPAUSED	BIT(4)  /* Pause requested or not active */
+#define BCM2835_DMA_ISHELD	BIT(5)  /* Is held by DREQ flow control */
+#define BCM2835_DMA_WAITING_FOR_WRITES BIT(6) /* waiting for last
+					       * AXI-write to ack
+					       */
+#define BCM2835_DMA_ERR		BIT(8)
+#define BCM2835_DMA_PRIORITY(x) ((x & 15) << 16) /* AXI priority */
+#define BCM2835_DMA_PANIC_PRIORITY(x) ((x & 15) << 20) /* panic priority */
+/* current value of TI.BCM2835_DMA_WAIT_RESP */
+#define BCM2835_DMA_WAIT_FOR_WRITES BIT(28)
+#define BCM2835_DMA_DIS_DEBUG	BIT(29) /* disable debug pause signal */
+#define BCM2835_DMA_ABORT	BIT(30) /* Stop current CB, go to next, WO */
+#define BCM2835_DMA_RESET	BIT(31) /* WO, self clearing */
+
+/* Transfer information bits - also bcm2835_cb.info field */
+#define BCM2835_DMA_INT_EN	BIT(0)
+#define BCM2835_DMA_TDMODE	BIT(1) /* 2D-Mode */
+#define BCM2835_DMA_WAIT_RESP	BIT(3) /* wait for AXI-write to be acked */
+#define BCM2835_DMA_D_INC	BIT(4)
+#define BCM2835_DMA_D_WIDTH	BIT(5) /* 128bit writes if set */
+#define BCM2835_DMA_D_DREQ	BIT(6) /* enable DREQ for destination */
+#define BCM2835_DMA_D_IGNORE	BIT(7) /* ignore destination writes */
+#define BCM2835_DMA_S_INC	BIT(8)
+#define BCM2835_DMA_S_WIDTH	BIT(9) /* 128bit writes if set */
+#define BCM2835_DMA_S_DREQ	BIT(10) /* enable SREQ for source */
+#define BCM2835_DMA_S_IGNORE	BIT(11) /* ignore source reads - read 0 */
+#define BCM2835_DMA_BURST_LENGTH(x) ((x & 15) << 12)
+#define BCM2835_DMA_PER_MAP(x)	((x & 31) << 16) /* REQ source */
+#define BCM2835_DMA_WAIT(x)	((x & 31) << 21) /* add DMA-wait cycles */
+#define BCM2835_DMA_NO_WIDE_BURSTS BIT(26) /* no 2 beat write bursts */
+
+/* debug register bits */
+#define BCM2835_DMA_DEBUG_LAST_NOT_SET_ERR	BIT(0)
+#define BCM2835_DMA_DEBUG_FIFO_ERR		BIT(1)
+#define BCM2835_DMA_DEBUG_READ_ERR		BIT(2)
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_SHIFT 4
+#define BCM2835_DMA_DEBUG_OUTSTANDING_WRITES_BITS 4
+#define BCM2835_DMA_DEBUG_ID_SHIFT		16
+#define BCM2835_DMA_DEBUG_ID_BITS		9
+#define BCM2835_DMA_DEBUG_STATE_SHIFT		16
+#define BCM2835_DMA_DEBUG_STATE_BITS		9
+#define BCM2835_DMA_DEBUG_VERSION_SHIFT		25
+#define BCM2835_DMA_DEBUG_VERSION_BITS		3
+#define BCM2835_DMA_DEBUG_LITE			BIT(28)
+
+/* shared registers for all dma channels */
+#define BCM2835_DMA_INT_STATUS         0xfe0
+#define BCM2835_DMA_ENABLE             0xff0
+
+#define BCM2835_DMA_DATA_TYPE_S8	1
+#define BCM2835_DMA_DATA_TYPE_S16	2
+#define BCM2835_DMA_DATA_TYPE_S32	4
+#define BCM2835_DMA_DATA_TYPE_S128	16
+
+/* Valid only for channels 0 - 14, 15 has its own base address */
+#define BCM2835_DMA_CHAN(n)	((n) << 8) /* Base address */
+#define BCM2835_DMA_CHANIO(base, n) ((base) + BCM2835_DMA_CHAN(n))
+
+/* the max dma length for different channels */
+#define MAX_DMA_LEN SZ_1G
+#define MAX_LITE_DMA_LEN (SZ_64K - 4)
+
+static inline size_t bcm2835_dma_max_frame_length(struct bcm2835_chan *c)
+{
+	/* lite and normal channels have different max frame length */
+	return c->is_lite_channel ? MAX_LITE_DMA_LEN : MAX_DMA_LEN;
+}
+
+/* how many frames of max_len size do we need to transfer len bytes */
+static inline size_t bcm2835_dma_frames_for_length(size_t len,
+						   size_t max_len)
+{
+	return DIV_ROUND_UP(len, max_len);
+}
+
+static inline struct bcm2835_dmadev *to_bcm2835_dma_dev(struct dma_device *d)
+{
+	return container_of(d, struct bcm2835_dmadev, ddev);
+}
+
+static inline struct bcm2835_chan *to_bcm2835_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct bcm2835_chan, vc.chan);
+}
+
+static inline struct bcm2835_desc *to_bcm2835_dma_desc(
+		struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct bcm2835_desc, vd.tx);
+}
+
+static void bcm2835_dma_free_cb_chain(struct bcm2835_desc *desc)
+{
+	size_t i;
+
+	for (i = 0; i < desc->frames; i++)
+		dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb,
+			      desc->cb_list[i].paddr);
+
+	kfree(desc);
+}
+
+static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
+{
+	bcm2835_dma_free_cb_chain(
+		container_of(vd, struct bcm2835_desc, vd));
+}
+
+static void bcm2835_dma_create_cb_set_length(
+	struct bcm2835_chan *chan,
+	struct bcm2835_dma_cb *control_block,
+	size_t len,
+	size_t period_len,
+	size_t *total_len,
+	u32 finalextrainfo)
+{
+	size_t max_len = bcm2835_dma_max_frame_length(chan);
+
+	/* set the length taking lite-channel limitations into account */
+	control_block->length = min_t(u32, len, max_len);
+
+	/* finished if we have no period_length */
+	if (!period_len)
+		return;
+
+	/*
+	 * period_len means: that we need to generate
+	 * transfers that are terminating at every
+	 * multiple of period_len - this is typically
+	 * used to set the interrupt flag in info
+	 * which is required during cyclic transfers
+	 */
+
+	/* have we filled in period_length yet? */
+	if (*total_len + control_block->length < period_len) {
+		/* update number of bytes in this period so far */
+		*total_len += control_block->length;
+		return;
+	}
+
+	/* calculate the length that remains to reach period_length */
+	control_block->length = period_len - *total_len;
+
+	/* reset total_length for next period */
+	*total_len = 0;
+
+	/* add extrainfo bits in info */
+	control_block->info |= finalextrainfo;
+}
+
+static inline size_t bcm2835_dma_count_frames_for_sg(
+	struct bcm2835_chan *c,
+	struct scatterlist *sgl,
+	unsigned int sg_len)
+{
+	size_t frames = 0;
+	struct scatterlist *sgent;
+	unsigned int i;
+	size_t plength = bcm2835_dma_max_frame_length(c);
+
+	for_each_sg(sgl, sgent, sg_len, i)
+		frames += bcm2835_dma_frames_for_length(
+			sg_dma_len(sgent), plength);
+
+	return frames;
+}
+
+/**
+ * bcm2835_dma_create_cb_chain - create a control block and fills data in
+ *
+ * @chan:           the @dma_chan for which we run this
+ * @direction:      the direction in which we transfer
+ * @cyclic:         it is a cyclic transfer
+ * @info:           the default info bits to apply per controlblock
+ * @frames:         number of controlblocks to allocate
+ * @src:            the src address to assign (if the S_INC bit is set
+ *                  in @info, then it gets incremented)
+ * @dst:            the dst address to assign (if the D_INC bit is set
+ *                  in @info, then it gets incremented)
+ * @buf_len:        the full buffer length (may also be 0)
+ * @period_len:     the period length when to apply @finalextrainfo
+ *                  in addition to the last transfer
+ *                  this will also break some control-blocks early
+ * @finalextrainfo: additional bits in last controlblock
+ *                  (or when period_len is reached in case of cyclic)
+ * @gfp:            the GFP flag to use for allocation
+ */
+static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
+	struct dma_chan *chan, enum dma_transfer_direction direction,
+	bool cyclic, u32 info, u32 finalextrainfo, size_t frames,
+	dma_addr_t src, dma_addr_t dst, size_t buf_len,
+	size_t period_len, gfp_t gfp)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	size_t len = buf_len, total_len;
+	size_t frame;
+	struct bcm2835_desc *d;
+	struct bcm2835_cb_entry *cb_entry;
+	struct bcm2835_dma_cb *control_block;
+
+	if (!frames)
+		return NULL;
+
+	/* allocate and setup the descriptor. */
+	d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry),
+		    gfp);
+	if (!d)
+		return NULL;
+
+	d->c = c;
+	d->dir = direction;
+	d->cyclic = cyclic;
+
+	/*
+	 * Iterate over all frames, create a control block
+	 * for each frame and link them together.
+	 */
+	for (frame = 0, total_len = 0; frame < frames; d->frames++, frame++) {
+		cb_entry = &d->cb_list[frame];
+		cb_entry->cb = dma_pool_alloc(c->cb_pool, gfp,
+					      &cb_entry->paddr);
+		if (!cb_entry->cb)
+			goto error_cb;
+
+		/* fill in the control block */
+		control_block = cb_entry->cb;
+		control_block->info = info;
+		control_block->src = src;
+		control_block->dst = dst;
+		control_block->stride = 0;
+		control_block->next = 0;
+		/* set up length in control_block if requested */
+		if (buf_len) {
+			/* calculate length honoring period_length */
+			bcm2835_dma_create_cb_set_length(
+				c, control_block,
+				len, period_len, &total_len,
+				cyclic ? finalextrainfo : 0);
+
+			/* calculate new remaining length */
+			len -= control_block->length;
+		}
+
+		/* link this the last controlblock */
+		if (frame)
+			d->cb_list[frame - 1].cb->next = cb_entry->paddr;
+
+		/* update src and dst and length */
+		if (src && (info & BCM2835_DMA_S_INC))
+			src += control_block->length;
+		if (dst && (info & BCM2835_DMA_D_INC))
+			dst += control_block->length;
+
+		/* Length of total transfer */
+		d->size += control_block->length;
+	}
+
+	/* the last frame requires extra flags */
+	d->cb_list[d->frames - 1].cb->info |= finalextrainfo;
+
+	/* detect a size missmatch */
+	if (buf_len && (d->size != buf_len))
+		goto error_cb;
+
+	return d;
+error_cb:
+	bcm2835_dma_free_cb_chain(d);
+
+	return NULL;
+}
+
+static void bcm2835_dma_fill_cb_chain_with_sg(
+	struct dma_chan *chan,
+	enum dma_transfer_direction direction,
+	struct bcm2835_cb_entry *cb,
+	struct scatterlist *sgl,
+	unsigned int sg_len)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	size_t len, max_len;
+	unsigned int i;
+	dma_addr_t addr;
+	struct scatterlist *sgent;
+
+	max_len = bcm2835_dma_max_frame_length(c);
+	for_each_sg(sgl, sgent, sg_len, i) {
+		for (addr = sg_dma_address(sgent), len = sg_dma_len(sgent);
+		     len > 0;
+		     addr += cb->cb->length, len -= cb->cb->length, cb++) {
+			if (direction == DMA_DEV_TO_MEM)
+				cb->cb->dst = addr;
+			else
+				cb->cb->src = addr;
+			cb->cb->length = min(len, max_len);
+		}
+	}
+}
+
+static int bcm2835_dma_abort(void __iomem *chan_base)
+{
+	unsigned long cs;
+	long int timeout = 10000;
+
+	cs = readl(chan_base + BCM2835_DMA_CS);
+	if (!(cs & BCM2835_DMA_ACTIVE))
+		return 0;
+
+	/* Write 0 to the active bit - Pause the DMA */
+	writel(0, chan_base + BCM2835_DMA_CS);
+
+	/* Wait for any current AXI transfer to complete */
+	while ((cs & BCM2835_DMA_ISPAUSED) && --timeout) {
+		cpu_relax();
+		cs = readl(chan_base + BCM2835_DMA_CS);
+	}
+
+	/* We'll un-pause when we set of our next DMA */
+	if (!timeout)
+		return -ETIMEDOUT;
+
+	if (!(cs & BCM2835_DMA_ACTIVE))
+		return 0;
+
+	/* Terminate the control block chain */
+	writel(0, chan_base + BCM2835_DMA_NEXTCB);
+
+	/* Abort the whole DMA */
+	writel(BCM2835_DMA_ABORT | BCM2835_DMA_ACTIVE,
+	       chan_base + BCM2835_DMA_CS);
+
+	return 0;
+}
+
+static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+	struct bcm2835_desc *d;
+
+	if (!vd) {
+		c->desc = NULL;
+		return;
+	}
+
+	list_del(&vd->node);
+
+	c->desc = d = to_bcm2835_dma_desc(&vd->tx);
+
+	writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
+	writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
+}
+
+static irqreturn_t bcm2835_dma_callback(int irq, void *data)
+{
+	struct bcm2835_chan *c = data;
+	struct bcm2835_desc *d;
+	unsigned long flags;
+
+	/* check the shared interrupt */
+	if (c->irq_flags & IRQF_SHARED) {
+		/* check if the interrupt is enabled */
+		flags = readl(c->chan_base + BCM2835_DMA_CS);
+		/* if not set then we are not the reason for the irq */
+		if (!(flags & BCM2835_DMA_INT))
+			return IRQ_NONE;
+	}
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+
+	/* Acknowledge interrupt */
+	writel(BCM2835_DMA_INT, c->chan_base + BCM2835_DMA_CS);
+
+	d = c->desc;
+
+	if (d) {
+		if (d->cyclic) {
+			/* call the cyclic callback */
+			vchan_cyclic_callback(&d->vd);
+
+			/* Keep the DMA engine running */
+			writel(BCM2835_DMA_ACTIVE,
+			       c->chan_base + BCM2835_DMA_CS);
+		} else {
+			vchan_cookie_complete(&c->desc->vd);
+			bcm2835_dma_start_desc(c);
+		}
+	}
+
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct device *dev = c->vc.chan.device->dev;
+
+	dev_dbg(dev, "Allocating DMA channel %d\n", c->ch);
+
+	c->cb_pool = dma_pool_create(dev_name(dev), dev,
+				     sizeof(struct bcm2835_dma_cb), 0, 0);
+	if (!c->cb_pool) {
+		dev_err(dev, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	return request_irq(c->irq_number, bcm2835_dma_callback,
+			   c->irq_flags, "DMA IRQ", c);
+}
+
+static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+	vchan_free_chan_resources(&c->vc);
+	free_irq(c->irq_number, c);
+	dma_pool_destroy(c->cb_pool);
+
+	dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch);
+}
+
+static size_t bcm2835_dma_desc_size(struct bcm2835_desc *d)
+{
+	return d->size;
+}
+
+static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
+{
+	unsigned int i;
+	size_t size;
+
+	for (size = i = 0; i < d->frames; i++) {
+		struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
+		size_t this_size = control_block->length;
+		dma_addr_t dma;
+
+		if (d->dir == DMA_DEV_TO_MEM)
+			dma = control_block->dst;
+		else
+			dma = control_block->src;
+
+		if (size)
+			size += this_size;
+		else if (addr >= dma && addr < dma + this_size)
+			size += dma + this_size - addr;
+	}
+
+	return size;
+}
+
+static enum dma_status bcm2835_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		txstate->residue =
+			bcm2835_dma_desc_size(to_bcm2835_dma_desc(&vd->tx));
+	} else if (c->desc && c->desc->vd.tx.cookie == cookie) {
+		struct bcm2835_desc *d = c->desc;
+		dma_addr_t pos;
+
+		if (d->dir == DMA_MEM_TO_DEV)
+			pos = readl(c->chan_base + BCM2835_DMA_SOURCE_AD);
+		else if (d->dir == DMA_DEV_TO_MEM)
+			pos = readl(c->chan_base + BCM2835_DMA_DEST_AD);
+		else
+			pos = 0;
+
+		txstate->residue = bcm2835_dma_desc_size_pos(d, pos);
+	} else {
+		txstate->residue = 0;
+	}
+
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return ret;
+}
+
+static void bcm2835_dma_issue_pending(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (vchan_issue_pending(&c->vc) && !c->desc)
+		bcm2835_dma_start_desc(c);
+
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_desc *d;
+	u32 info = BCM2835_DMA_D_INC | BCM2835_DMA_S_INC;
+	u32 extra = BCM2835_DMA_INT_EN | BCM2835_DMA_WAIT_RESP;
+	size_t max_len = bcm2835_dma_max_frame_length(c);
+	size_t frames;
+
+	/* if src, dst or len is not given return with an error */
+	if (!src || !dst || !len)
+		return NULL;
+
+	/* calculate number of frames */
+	frames = bcm2835_dma_frames_for_length(len, max_len);
+
+	/* allocate the CB chain - this also fills in the pointers */
+	d = bcm2835_dma_create_cb_chain(chan, DMA_MEM_TO_MEM, false,
+					info, extra, frames,
+					src, dst, len, 0, GFP_KERNEL);
+	if (!d)
+		return NULL;
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_slave_sg(
+	struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_desc *d;
+	dma_addr_t src = 0, dst = 0;
+	u32 info = BCM2835_DMA_WAIT_RESP;
+	u32 extra = BCM2835_DMA_INT_EN;
+	size_t frames;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan->device->dev,
+			"%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (c->dreq != 0)
+		info |= BCM2835_DMA_PER_MAP(c->dreq);
+
+	if (direction == DMA_DEV_TO_MEM) {
+		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		src = c->cfg.src_addr;
+		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+	} else {
+		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		dst = c->cfg.dst_addr;
+		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
+	}
+
+	/* count frames in sg list */
+	frames = bcm2835_dma_count_frames_for_sg(c, sgl, sg_len);
+
+	/* allocate the CB chain */
+	d = bcm2835_dma_create_cb_chain(chan, direction, false,
+					info, extra,
+					frames, src, dst, 0, 0,
+					GFP_KERNEL);
+	if (!d)
+		return NULL;
+
+	/* fill in frames with scatterlist pointers */
+	bcm2835_dma_fill_cb_chain_with_sg(chan, direction, d->cb_list,
+					  sgl, sg_len);
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_desc *d;
+	dma_addr_t src, dst;
+	u32 info = BCM2835_DMA_WAIT_RESP;
+	u32 extra = BCM2835_DMA_INT_EN;
+	size_t max_len = bcm2835_dma_max_frame_length(c);
+	size_t frames;
+
+	/* Grab configuration */
+	if (!is_slave_direction(direction)) {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	if (!buf_len) {
+		dev_err(chan->device->dev,
+			"%s: bad buffer length (= 0)\n", __func__);
+		return NULL;
+	}
+
+	/*
+	 * warn if buf_len is not a multiple of period_len - this may leed
+	 * to unexpected latencies for interrupts and thus audiable clicks
+	 */
+	if (buf_len % period_len)
+		dev_warn_once(chan->device->dev,
+			      "%s: buffer_length (%zd) is not a multiple of period_len (%zd)\n",
+			      __func__, buf_len, period_len);
+
+	/* Setup DREQ channel */
+	if (c->dreq != 0)
+		info |= BCM2835_DMA_PER_MAP(c->dreq);
+
+	if (direction == DMA_DEV_TO_MEM) {
+		if (c->cfg.src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		src = c->cfg.src_addr;
+		dst = buf_addr;
+		info |= BCM2835_DMA_S_DREQ | BCM2835_DMA_D_INC;
+	} else {
+		if (c->cfg.dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES)
+			return NULL;
+		dst = c->cfg.dst_addr;
+		src = buf_addr;
+		info |= BCM2835_DMA_D_DREQ | BCM2835_DMA_S_INC;
+	}
+
+	/* calculate number of frames */
+	frames = /* number of periods */
+		 DIV_ROUND_UP(buf_len, period_len) *
+		 /* number of frames per period */
+		 bcm2835_dma_frames_for_length(period_len, max_len);
+
+	/*
+	 * allocate the CB chain
+	 * note that we need to use GFP_NOWAIT, as the ALSA i2s dmaengine
+	 * implementation calls prep_dma_cyclic with interrupts disabled.
+	 */
+	d = bcm2835_dma_create_cb_chain(chan, direction, true,
+					info, extra,
+					frames, src, dst, buf_len,
+					period_len, GFP_NOWAIT);
+	if (!d)
+		return NULL;
+
+	/* wrap around into a loop */
+	d->cb_list[d->frames - 1].cb->next = d->cb_list[0].paddr;
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static int bcm2835_dma_slave_config(struct dma_chan *chan,
+				    struct dma_slave_config *cfg)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+	if ((cfg->direction == DMA_DEV_TO_MEM &&
+	     cfg->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+	    (cfg->direction == DMA_MEM_TO_DEV &&
+	     cfg->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+	    !is_slave_direction(cfg->direction)) {
+		return -EINVAL;
+	}
+
+	c->cfg = *cfg;
+
+	return 0;
+}
+
+static int bcm2835_dma_terminate_all(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device);
+	unsigned long flags;
+	int timeout = 10000;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+
+	/* Prevent this channel being scheduled */
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after bcm_dma_abort() returns (even if it does, it will see
+	 * c->desc is NULL and exit.)
+	 */
+	if (c->desc) {
+		vchan_terminate_vdesc(&c->desc->vd);
+		c->desc = NULL;
+		bcm2835_dma_abort(c->chan_base);
+
+		/* Wait for stopping */
+		while (--timeout) {
+			if (!(readl(c->chan_base + BCM2835_DMA_CS) &
+						BCM2835_DMA_ACTIVE))
+				break;
+
+			cpu_relax();
+		}
+
+		if (!timeout)
+			dev_err(d->ddev.dev, "DMA transfer could not be terminated\n");
+	}
+
+	vchan_get_all_descriptors(&c->vc, &head);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static void bcm2835_dma_synchronize(struct dma_chan *chan)
+{
+	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+
+	vchan_synchronize(&c->vc);
+}
+
+static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id,
+				 int irq, unsigned int irq_flags)
+{
+	struct bcm2835_chan *c;
+
+	c = devm_kzalloc(d->ddev.dev, sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	c->vc.desc_free = bcm2835_dma_desc_free;
+	vchan_init(&c->vc, &d->ddev);
+	INIT_LIST_HEAD(&c->node);
+
+	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
+	c->ch = chan_id;
+	c->irq_number = irq;
+	c->irq_flags = irq_flags;
+
+	/* check in DEBUG register if this is a LITE channel */
+	if (readl(c->chan_base + BCM2835_DMA_DEBUG) &
+		BCM2835_DMA_DEBUG_LITE)
+		c->is_lite_channel = true;
+
+	return 0;
+}
+
+static void bcm2835_dma_free(struct bcm2835_dmadev *od)
+{
+	struct bcm2835_chan *c, *next;
+
+	list_for_each_entry_safe(c, next, &od->ddev.channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+}
+
+static const struct of_device_id bcm2835_dma_of_match[] = {
+	{ .compatible = "brcm,bcm2835-dma", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm2835_dma_of_match);
+
+static struct dma_chan *bcm2835_dma_xlate(struct of_phandle_args *spec,
+					   struct of_dma *ofdma)
+{
+	struct bcm2835_dmadev *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->ddev);
+	if (!chan)
+		return NULL;
+
+	/* Set DREQ from param */
+	to_bcm2835_dma_chan(chan)->dreq = spec->args[0];
+
+	return chan;
+}
+
+static int bcm2835_dma_probe(struct platform_device *pdev)
+{
+	struct bcm2835_dmadev *od;
+	struct resource *res;
+	void __iomem *base;
+	int rc;
+	int i, j;
+	int irq[BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED + 1];
+	int irq_flags;
+	uint32_t chans_available;
+	char chan_name[BCM2835_DMA_CHAN_NAME_SIZE];
+
+	if (!pdev->dev.dma_mask)
+		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (rc)
+		return rc;
+
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	pdev->dev.dma_parms = &od->dma_parms;
+	dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	od->base = base;
+
+	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, od->ddev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
+	od->ddev.device_alloc_chan_resources = bcm2835_dma_alloc_chan_resources;
+	od->ddev.device_free_chan_resources = bcm2835_dma_free_chan_resources;
+	od->ddev.device_tx_status = bcm2835_dma_tx_status;
+	od->ddev.device_issue_pending = bcm2835_dma_issue_pending;
+	od->ddev.device_prep_dma_cyclic = bcm2835_dma_prep_dma_cyclic;
+	od->ddev.device_prep_slave_sg = bcm2835_dma_prep_slave_sg;
+	od->ddev.device_prep_dma_memcpy = bcm2835_dma_prep_dma_memcpy;
+	od->ddev.device_config = bcm2835_dma_slave_config;
+	od->ddev.device_terminate_all = bcm2835_dma_terminate_all;
+	od->ddev.device_synchronize = bcm2835_dma_synchronize;
+	od->ddev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->ddev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+			      BIT(DMA_MEM_TO_MEM);
+	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	od->ddev.dev = &pdev->dev;
+	INIT_LIST_HEAD(&od->ddev.channels);
+	spin_lock_init(&od->lock);
+
+	platform_set_drvdata(pdev, od);
+
+	/* Request DMA channel mask from device tree */
+	if (of_property_read_u32(pdev->dev.of_node,
+			"brcm,dma-channel-mask",
+			&chans_available)) {
+		dev_err(&pdev->dev, "Failed to get channel mask\n");
+		rc = -EINVAL;
+		goto err_no_dma;
+	}
+
+	/* get irqs for each channel that we support */
+	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+		/* skip masked out channels */
+		if (!(chans_available & (1 << i))) {
+			irq[i] = -1;
+			continue;
+		}
+
+		/* get the named irq */
+		snprintf(chan_name, sizeof(chan_name), "dma%i", i);
+		irq[i] = platform_get_irq_byname(pdev, chan_name);
+		if (irq[i] >= 0)
+			continue;
+
+		/* legacy device tree case handling */
+		dev_warn_once(&pdev->dev,
+			      "missing interrupt-names property in device tree - legacy interpretation is used\n");
+		/*
+		 * in case of channel >= 11
+		 * use the 11th interrupt and that is shared
+		 */
+		irq[i] = platform_get_irq(pdev, i < 11 ? i : 11);
+	}
+
+	/* get irqs for each channel */
+	for (i = 0; i <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; i++) {
+		/* skip channels without irq */
+		if (irq[i] < 0)
+			continue;
+
+		/* check if there are other channels that also use this irq */
+		irq_flags = 0;
+		for (j = 0; j <= BCM2835_DMA_MAX_DMA_CHAN_SUPPORTED; j++)
+			if ((i != j) && (irq[j] == irq[i])) {
+				irq_flags = IRQF_SHARED;
+				break;
+			}
+
+		/* initialize the channel */
+		rc = bcm2835_dma_chan_init(od, i, irq[i], irq_flags);
+		if (rc)
+			goto err_no_dma;
+	}
+
+	dev_dbg(&pdev->dev, "Initialized %i DMA channels\n", i);
+
+	/* Device-tree DMA controller registration */
+	rc = of_dma_controller_register(pdev->dev.of_node,
+			bcm2835_dma_xlate, od);
+	if (rc) {
+		dev_err(&pdev->dev, "Failed to register DMA controller\n");
+		goto err_no_dma;
+	}
+
+	rc = dma_async_device_register(&od->ddev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to register slave DMA engine device: %d\n", rc);
+		goto err_no_dma;
+	}
+
+	dev_dbg(&pdev->dev, "Load BCM2835 DMA engine driver\n");
+
+	return 0;
+
+err_no_dma:
+	bcm2835_dma_free(od);
+	return rc;
+}
+
+static int bcm2835_dma_remove(struct platform_device *pdev)
+{
+	struct bcm2835_dmadev *od = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&od->ddev);
+	bcm2835_dma_free(od);
+
+	return 0;
+}
+
+static struct platform_driver bcm2835_dma_driver = {
+	.probe	= bcm2835_dma_probe,
+	.remove	= bcm2835_dma_remove,
+	.driver = {
+		.name = "bcm2835-dma",
+		.of_match_table = of_match_ptr(bcm2835_dma_of_match),
+	},
+};
+
+module_platform_driver(bcm2835_dma_driver);
+
+MODULE_ALIAS("platform:bcm2835-dma");
+MODULE_DESCRIPTION("BCM2835 DMA engine driver");
+MODULE_AUTHOR("Florian Meier <florian.meier@koalo.de>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/bestcomm/Kconfig b/drivers/dma/bestcomm/Kconfig
new file mode 100644
index 0000000..29e4270
--- /dev/null
+++ b/drivers/dma/bestcomm/Kconfig
@@ -0,0 +1,36 @@
+#
+# Kconfig options for Bestcomm
+#
+
+config PPC_BESTCOMM
+	tristate "Bestcomm DMA engine support"
+	depends on PPC_MPC52xx
+	default n
+	select PPC_LIB_RHEAP
+	help
+	  BestComm is the name of the communication coprocessor found
+	  on the Freescale MPC5200 family of processor.  Its usage is
+	  optional for some drivers (like ATA), but required for
+	  others (like FEC).
+
+	  If you want to use drivers that require DMA operations,
+	  answer Y or M. Otherwise say N.
+
+config PPC_BESTCOMM_ATA
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the ATA task.
+
+config PPC_BESTCOMM_FEC
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the FEC tasks.
+
+config PPC_BESTCOMM_GEN_BD
+	tristate
+	depends on PPC_BESTCOMM
+	help
+	  This option enables the support for the GenBD tasks.
+
diff --git a/drivers/dma/bestcomm/Makefile b/drivers/dma/bestcomm/Makefile
new file mode 100644
index 0000000..8d1b33a
--- /dev/null
+++ b/drivers/dma/bestcomm/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for BestComm & co
+#
+
+bestcomm-core-objs	:= bestcomm.o sram.o
+bestcomm-ata-objs	:= ata.o bcom_ata_task.o
+bestcomm-fec-objs	:= fec.o bcom_fec_rx_task.o bcom_fec_tx_task.o
+bestcomm-gen-bd-objs	:= gen_bd.o bcom_gen_bd_rx_task.o bcom_gen_bd_tx_task.o
+
+obj-$(CONFIG_PPC_BESTCOMM)		+= bestcomm-core.o
+obj-$(CONFIG_PPC_BESTCOMM_ATA)		+= bestcomm-ata.o
+obj-$(CONFIG_PPC_BESTCOMM_FEC)		+= bestcomm-fec.o
+obj-$(CONFIG_PPC_BESTCOMM_GEN_BD)	+= bestcomm-gen-bd.o
+ 
diff --git a/drivers/dma/bestcomm/ata.c b/drivers/dma/bestcomm/ata.c
new file mode 100644
index 0000000..2fd87f8
--- /dev/null
+++ b/drivers/dma/bestcomm/ata.c
@@ -0,0 +1,157 @@
+/*
+ * Bestcomm ATA task driver
+ *
+ *
+ * Patterned after bestcomm/fec.c by Dale Farnsworth <dfarnsworth@mvista.com>
+ *                                   2003-2004 (c) MontaVista, Software, Inc.
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006      Freescale - John Rigby
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/ata.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* ata task image */
+extern u32 bcom_ata_task[];
+
+/* ata task vars that need to be set before enabling the task */
+struct bcom_ata_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* ata task incs that need to be set before enabling the task */
+struct bcom_ata_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+	u16 pad2;
+	s16 incr_src;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_ata_init(int queue_len, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_ata_var *var;
+	struct bcom_ata_inc *inc;
+
+	/* Prefetch breaks ATA DMA.  Turn it off for ATA DMA */
+	bcom_disable_prefetch();
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_ata_bd), 0);
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	bcom_ata_reset_bd(tsk);
+
+	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_ata_task)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = maxbufsize;
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_ATA_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_RX], BCOM_IPR_ATA_RX);
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ATA_TX], BCOM_IPR_ATA_TX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum); /* Clear ints */
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_init);
+
+void bcom_ata_rx_prepare(struct bcom_task *tsk)
+{
+	struct bcom_ata_inc *inc;
+
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= 0;
+	inc->incr_dst	= sizeof(u32);
+
+	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_RX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_rx_prepare);
+
+void bcom_ata_tx_prepare(struct bcom_task *tsk)
+{
+	struct bcom_ata_inc *inc;
+
+	inc = (struct bcom_ata_inc *) bcom_task_inc(tsk->tasknum);
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= sizeof(u32);
+	inc->incr_dst	= 0;
+
+	bcom_set_initiator(tsk->tasknum, BCOM_INITIATOR_ATA_TX);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_tx_prepare);
+
+void bcom_ata_reset_bd(struct bcom_task *tsk)
+{
+	struct bcom_ata_var *var;
+
+	/* Reset all BD */
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	var = (struct bcom_ata_var *) bcom_task_var(tsk->tasknum);
+	var->bd_start = var->bd_base;
+}
+EXPORT_SYMBOL_GPL(bcom_ata_reset_bd);
+
+void bcom_ata_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the ATA tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_ata_release);
+
+
+MODULE_DESCRIPTION("BestComm ATA task driver");
+MODULE_AUTHOR("John Rigby");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/bcom_ata_task.c b/drivers/dma/bestcomm/bcom_ata_task.c
new file mode 100644
index 0000000..cc6049a
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_ata_task.c
@@ -0,0 +1,67 @@
+/*
+ * Bestcomm ATA task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Created based on bestcom/code_dma/image_rtos1/dma_image.hex
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_ata_task[] = {
+	/* header */
+	0x4243544b,
+	0x0e060709,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x8198009b, /* LCD: idx0 = var3; idx0 <= var2; idx0 += inc3 */
+	0x13e00c08, /*   DRD1A: var3 = var1; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb8000264, /*   LCD: idx1 = *idx0, idx2 = var0; idx1 < var9; idx1 += inc4, idx2 += inc4 */
+	0x10000f00, /*     DRD1A: var3 = idx0; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0c8cfc8a, /*     DRD2B1: *idx2 = EU3(); EU3(*idx2,var10)  */
+	0xd8988240, /*   LCDEXT: idx1 = idx1; idx1 > var9; idx1 += inc0 */
+	0xf845e011, /*   LCDEXT: idx2 = *(idx0 + var00000015); ; idx2 += inc2 */
+	0xb845e00a, /*   LCD: idx3 = *(idx0 + var00000019); ; idx3 += inc1 */
+	0x0bfecf90, /*     DRD1A: *idx3 = *idx2; FN=0 TFD init=31 WS=3 RS=3 */
+	0x9898802d, /*   LCD: idx1 = idx1; idx1 once var0; idx1 += inc5 */
+	0x64000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 INT EXT init=0 WS=0 RS=0 */
+	0x0c0cf849, /*     DRD2B1: *idx0 = EU3(); EU3(idx1,var9)  */
+	0x000001f8, /* NOP */
+
+	/* VAR[9]-VAR[14] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa000000c,
+	0x20000000,
+	0x00000000,
+	0x00000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_rx_task.c b/drivers/dma/bestcomm/bcom_fec_rx_task.c
new file mode 100644
index 0000000..a1ad6a0
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_rx_task.c
@@ -0,0 +1,78 @@
+/*
+ * Bestcomm FEC RX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:38 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_rx_task[] = {
+	/* header */
+	0x4243544b,
+	0x18060709,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x808220e3, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+	0x10601010, /*   DRD1A: var4 = var2; FN=0 MORE init=3 WS=0 RS=0 */
+	0xb8800264, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0xb8c58029, /*   LCD: idx3 = *(idx1 + var00000015); idx3 once var0; idx3 += inc5 */
+	0x60000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=0 RS=0 */
+	0x088cf8cc, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var12)  */
+	0x991982f2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var11; idx2 += inc6, idx3 += inc2 */
+	0x006acf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=1 RS=1 */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x034cfc4e, /*     DRD2B1: var13 = EU3(); EU3(*idx1,var14)  */
+	0x00008868, /*     DRD1A: idx2 = var13; FN=0 init=0 WS=0 RS=0 */
+	0x99198341, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var13; idx2 += inc0, idx3 += inc1 */
+	0x007ecf80, /*     DRD1A: *idx3 = *idx0; FN=0 init=3 WS=3 RS=3 */
+	0x99198272, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc6, idx3 += inc2 */
+	0x046acf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=3 WS=1 RS=1 */
+	0x9819002d, /*   LCD: idx2 = idx0; idx2 once var0; idx2 += inc5 */
+	0x0060c790, /*     DRD1A: *idx1 = *idx2; FN=0 init=3 WS=0 RS=0 */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[14] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000003,
+	0x40000008,
+	0x43ffffff,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x00000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_fec_tx_task.c b/drivers/dma/bestcomm/bcom_fec_tx_task.c
new file mode 100644
index 0000000..b1c495c
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_fec_tx_task.c
@@ -0,0 +1,91 @@
+/*
+ * Bestcomm FEC TX task microcode
+ *
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Automatically created based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 22 11:19:29 2005 GMT
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_fec_tx_task[] = {
+	/* header */
+	0x4243544b,
+	0x2407070d,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x8018001b, /* LCD: idx0 = var0; idx0 <= var0; idx0 += inc3 */
+	0x60000005, /*   DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x01ccfc0d, /*   DRD2B1: var7 = EU3(); EU3(*idx0,var13)  */
+	0x8082a123, /* LCD: idx0 = var1, idx1 = var5; idx1 <= var4; idx0 += inc4, idx1 += inc3 */
+	0x10801418, /*   DRD1A: var5 = var3; FN=0 MORE init=4 WS=0 RS=0 */
+	0xf88103a4, /*   LCDEXT: idx2 = *idx1, idx3 = var2; idx2 < var14; idx2 += inc4, idx3 += inc4 */
+	0x801a6024, /*   LCD: idx4 = var0; ; idx4 += inc4 */
+	0x10001708, /*     DRD1A: var5 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfccf, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var15)  */
+	0x991a002c, /*   LCD: idx2 = idx2, idx3 = idx4; idx2 once var0; idx2 += inc5, idx3 += inc4 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x024cfc4d, /*     DRD2B1: var9 = EU3(); EU3(*idx1,var13)  */
+	0x60000003, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=3 EXT init=0 WS=0 RS=0 */
+	0x0cccf247, /*     DRD2B1: *idx3 = EU3(); EU3(var9,var7)  */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0xb8c80029, /*   LCD: idx3 = *(idx1 + var0000001a); idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x088cf8d1, /*     DRD2B1: idx2 = EU3(); EU3(idx3,var17)  */
+	0x00002f10, /*     DRD1A: var11 = idx2; FN=0 init=0 WS=0 RS=0 */
+	0x99198432, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var16; idx2 += inc6, idx3 += inc2 */
+	0x008ac398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=1 RS=1 */
+	0x80004000, /*   LCDEXT: idx2 = 0x00000000; ; */
+	0x9999802d, /*   LCD: idx3 = idx3; idx3 once var0; idx3 += inc5 */
+	0x70000002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT MORE init=0 WS=0 RS=0 */
+	0x048cfc53, /*     DRD2B1: var18 = EU3(); EU3(*idx1,var19)  */
+	0x60000008, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=8 EXT init=0 WS=0 RS=0 */
+	0x088cf48b, /*     DRD2B1: idx2 = EU3(); EU3(var18,var11)  */
+	0x99198481, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var18; idx2 += inc0, idx3 += inc1 */
+	0x009ec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=4 WS=3 RS=3 */
+	0x991983b2, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var14; idx2 += inc6, idx3 += inc2 */
+	0x088ac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD init=4 WS=1 RS=1 */
+	0x9919002d, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc5 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf88e, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var14)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[13]-VAR[19] */
+	0x0c000000,
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x00000003,
+	0x40000004,
+	0x43ffffff,
+
+	/* INC[0]-INC[6] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x00000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
new file mode 100644
index 0000000..efee022
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_rx_task.c
@@ -0,0 +1,63 @@
+/*
+ * Bestcomm GenBD RX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ *
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_rx_task[] = {
+	/* header */
+	0x4243544b,
+	0x0d020409,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x808220da, /* LCD: idx0 = var1, idx1 = var4; idx1 <= var3; idx0 += inc3, idx1 += inc2 */
+	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb880025b, /*   LCD: idx2 = *idx1, idx3 = var0; idx2 < var9; idx2 += inc3, idx3 += inc3 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0xd9190240, /*   LCDEXT: idx2 = idx2; idx2 > var9; idx2 += inc0 */
+	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+	0x07fecf80, /*     DRD1A: *idx3 = *idx0; FN=0 INT init=31 WS=3 RS=3 */
+	0x99190024, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc4 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[10] */
+	0x40000000,
+	0x7fff7fff,
+
+	/* INC[0]-INC[3] */
+	0x40000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+};
+
diff --git a/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
new file mode 100644
index 0000000..c605aa4
--- /dev/null
+++ b/drivers/dma/bestcomm/bcom_gen_bd_tx_task.c
@@ -0,0 +1,69 @@
+/*
+ * Bestcomm GenBD TX task microcode
+ *
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ * Copyright (c) 2004 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Based on BestCommAPI-2.2/code_dma/image_rtos1/dma_image.hex
+ * on Tue Mar 4 10:14:12 2006 GMT
+ *
+ */
+
+#include <asm/types.h>
+
+/*
+ * The header consists of the following fields:
+ *	u32	magic;
+ *	u8	desc_size;
+ *	u8	var_size;
+ *	u8	inc_size;
+ *	u8	first_var;
+ *	u8	reserved[8];
+ *
+ * The size fields contain the number of 32-bit words.
+ */
+
+u32 bcom_gen_bd_tx_task[] = {
+	/* header */
+	0x4243544b,
+	0x0f040609,
+	0x00000000,
+	0x00000000,
+
+	/* Task descriptors */
+	0x800220e3, /* LCD: idx0 = var0, idx1 = var4; idx1 <= var3; idx0 += inc4, idx1 += inc3 */
+	0x13e01010, /*   DRD1A: var4 = var2; FN=0 MORE init=31 WS=0 RS=0 */
+	0xb8808264, /*   LCD: idx2 = *idx1, idx3 = var1; idx2 < var9; idx2 += inc4, idx3 += inc4 */
+	0x10001308, /*     DRD1A: var4 = idx1; FN=0 MORE init=0 WS=0 RS=0 */
+	0x60140002, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=2 EXT init=0 WS=2 RS=2 */
+	0x0cccfcca, /*     DRD2B1: *idx3 = EU3(); EU3(*idx3,var10)  */
+	0xd9190300, /*   LCDEXT: idx2 = idx2; idx2 > var12; idx2 += inc0 */
+	0xb8c5e009, /*   LCD: idx3 = *(idx1 + var00000015); ; idx3 += inc1 */
+	0x03fec398, /*     DRD1A: *idx0 = *idx3; FN=0 init=31 WS=3 RS=3 */
+	0x9919826a, /*   LCD: idx2 = idx2, idx3 = idx3; idx2 > var9; idx2 += inc5, idx3 += inc2 */
+	0x0feac398, /*     DRD1A: *idx0 = *idx3; FN=0 TFD INT init=31 WS=1 RS=1 */
+	0x99190036, /*   LCD: idx2 = idx2; idx2 once var0; idx2 += inc6 */
+	0x60000005, /*     DRD2A: EU0=0 EU1=0 EU2=0 EU3=5 EXT init=0 WS=0 RS=0 */
+	0x0c4cf889, /*     DRD2B1: *idx1 = EU3(); EU3(idx2,var9)  */
+	0x000001f8, /*   NOP */
+
+	/* VAR[9]-VAR[12] */
+	0x40000000,
+	0x7fff7fff,
+	0x00000000,
+	0x40000004,
+
+	/* INC[0]-INC[5] */
+	0x40000000,
+	0xe0000000,
+	0xe0000000,
+	0xa0000008,
+	0x20000000,
+	0x4000ffff,
+};
+
diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c
new file mode 100644
index 0000000..d91cbbe
--- /dev/null
+++ b/drivers/dma/bestcomm/bestcomm.c
@@ -0,0 +1,529 @@
+/*
+ * Driver for MPC52xx processor BestComm peripheral controller
+ *
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2005      Varma Electronics Oy,
+ *                         ( by Andrey Volkov <avolkov@varma-el.com> )
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/mpc52xx.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include "linux/fsl/bestcomm/bestcomm.h"
+
+#define DRIVER_NAME "bestcomm-core"
+
+/* MPC5200 device tree match tables */
+static const struct of_device_id mpc52xx_sram_ids[] = {
+	{ .compatible = "fsl,mpc5200-sram", },
+	{ .compatible = "mpc5200-sram", },
+	{}
+};
+
+
+struct bcom_engine *bcom_eng = NULL;
+EXPORT_SYMBOL_GPL(bcom_eng);	/* needed for inline functions */
+
+/* ======================================================================== */
+/* Public and private API                                                   */
+/* ======================================================================== */
+
+/* Private API */
+
+struct bcom_task *
+bcom_task_alloc(int bd_count, int bd_size, int priv_size)
+{
+	int i, tasknum = -1;
+	struct bcom_task *tsk;
+
+	/* Don't try to do anything if bestcomm init failed */
+	if (!bcom_eng)
+		return NULL;
+
+	/* Get and reserve a task num */
+	spin_lock(&bcom_eng->lock);
+
+	for (i=0; i<BCOM_MAX_TASKS; i++)
+		if (!bcom_eng->tdt[i].stop) {	/* we use stop as a marker */
+			bcom_eng->tdt[i].stop = 0xfffffffful; /* dummy addr */
+			tasknum = i;
+			break;
+		}
+
+	spin_unlock(&bcom_eng->lock);
+
+	if (tasknum < 0)
+		return NULL;
+
+	/* Allocate our structure */
+	tsk = kzalloc(sizeof(struct bcom_task) + priv_size, GFP_KERNEL);
+	if (!tsk)
+		goto error;
+
+	tsk->tasknum = tasknum;
+	if (priv_size)
+		tsk->priv = (void*)tsk + sizeof(struct bcom_task);
+
+	/* Get IRQ of that task */
+	tsk->irq = irq_of_parse_and_map(bcom_eng->ofnode, tsk->tasknum);
+	if (!tsk->irq)
+		goto error;
+
+	/* Init the BDs, if needed */
+	if (bd_count) {
+		tsk->cookie = kmalloc_array(bd_count, sizeof(void *),
+					    GFP_KERNEL);
+		if (!tsk->cookie)
+			goto error;
+
+		tsk->bd = bcom_sram_alloc(bd_count * bd_size, 4, &tsk->bd_pa);
+		if (!tsk->bd)
+			goto error;
+		memset(tsk->bd, 0x00, bd_count * bd_size);
+
+		tsk->num_bd = bd_count;
+		tsk->bd_size = bd_size;
+	}
+
+	return tsk;
+
+error:
+	if (tsk) {
+		if (tsk->irq)
+			irq_dispose_mapping(tsk->irq);
+		bcom_sram_free(tsk->bd);
+		kfree(tsk->cookie);
+		kfree(tsk);
+	}
+
+	bcom_eng->tdt[tasknum].stop = 0;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(bcom_task_alloc);
+
+void
+bcom_task_free(struct bcom_task *tsk)
+{
+	/* Stop the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Clear TDT */
+	bcom_eng->tdt[tsk->tasknum].start = 0;
+	bcom_eng->tdt[tsk->tasknum].stop  = 0;
+
+	/* Free everything */
+	irq_dispose_mapping(tsk->irq);
+	bcom_sram_free(tsk->bd);
+	kfree(tsk->cookie);
+	kfree(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_task_free);
+
+int
+bcom_load_image(int task, u32 *task_image)
+{
+	struct bcom_task_header *hdr = (struct bcom_task_header *)task_image;
+	struct bcom_tdt *tdt;
+	u32 *desc, *var, *inc;
+	u32 *desc_src, *var_src, *inc_src;
+
+	/* Safety checks */
+	if (hdr->magic != BCOM_TASK_MAGIC) {
+		printk(KERN_ERR DRIVER_NAME
+			": Trying to load invalid microcode\n");
+		return -EINVAL;
+	}
+
+	if ((task < 0) || (task >= BCOM_MAX_TASKS)) {
+		printk(KERN_ERR DRIVER_NAME
+			": Trying to load invalid task %d\n", task);
+		return -EINVAL;
+	}
+
+	/* Initial load or reload */
+	tdt = &bcom_eng->tdt[task];
+
+	if (tdt->start) {
+		desc = bcom_task_desc(task);
+		if (hdr->desc_size != bcom_task_num_descs(task)) {
+			printk(KERN_ERR DRIVER_NAME
+				": Trying to reload wrong task image "
+				"(%d size %d/%d)!\n",
+				task,
+				hdr->desc_size,
+				bcom_task_num_descs(task));
+			return -EINVAL;
+		}
+	} else {
+		phys_addr_t start_pa;
+
+		desc = bcom_sram_alloc(hdr->desc_size * sizeof(u32), 4, &start_pa);
+		if (!desc)
+			return -ENOMEM;
+
+		tdt->start = start_pa;
+		tdt->stop = start_pa + ((hdr->desc_size-1) * sizeof(u32));
+	}
+
+	var = bcom_task_var(task);
+	inc = bcom_task_inc(task);
+
+	/* Clear & copy */
+	memset(var, 0x00, BCOM_VAR_SIZE);
+	memset(inc, 0x00, BCOM_INC_SIZE);
+
+	desc_src = (u32 *)(hdr + 1);
+	var_src = desc_src + hdr->desc_size;
+	inc_src = var_src + hdr->var_size;
+
+	memcpy(desc, desc_src, hdr->desc_size * sizeof(u32));
+	memcpy(var + hdr->first_var, var_src, hdr->var_size * sizeof(u32));
+	memcpy(inc, inc_src, hdr->inc_size * sizeof(u32));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_load_image);
+
+void
+bcom_set_initiator(int task, int initiator)
+{
+	int i;
+	int num_descs;
+	u32 *desc;
+	int next_drd_has_initiator;
+
+	bcom_set_tcr_initiator(task, initiator);
+
+	/* Just setting tcr is apparently not enough due to some problem */
+	/* with it. So we just go thru all the microcode and replace in  */
+	/* the DRD directly */
+
+	desc = bcom_task_desc(task);
+	next_drd_has_initiator = 1;
+	num_descs = bcom_task_num_descs(task);
+
+	for (i=0; i<num_descs; i++, desc++) {
+		if (!bcom_desc_is_drd(*desc))
+			continue;
+		if (next_drd_has_initiator)
+			if (bcom_desc_initiator(*desc) != BCOM_INITIATOR_ALWAYS)
+				bcom_set_desc_initiator(desc, initiator);
+		next_drd_has_initiator = !bcom_drd_is_extended(*desc);
+	}
+}
+EXPORT_SYMBOL_GPL(bcom_set_initiator);
+
+
+/* Public API */
+
+void
+bcom_enable(struct bcom_task *tsk)
+{
+	bcom_enable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_enable);
+
+void
+bcom_disable(struct bcom_task *tsk)
+{
+	bcom_disable_task(tsk->tasknum);
+}
+EXPORT_SYMBOL_GPL(bcom_disable);
+
+
+/* ======================================================================== */
+/* Engine init/cleanup                                                      */
+/* ======================================================================== */
+
+/* Function Descriptor table */
+/* this will need to be updated if Freescale changes their task code FDT */
+static u32 fdt_ops[] = {
+	0xa0045670,	/* FDT[48] - load_acc()	  */
+	0x80045670,	/* FDT[49] - unload_acc() */
+	0x21800000,	/* FDT[50] - and()        */
+	0x21e00000,	/* FDT[51] - or()         */
+	0x21500000,	/* FDT[52] - xor()        */
+	0x21400000,	/* FDT[53] - andn()       */
+	0x21500000,	/* FDT[54] - not()        */
+	0x20400000,	/* FDT[55] - add()        */
+	0x20500000,	/* FDT[56] - sub()        */
+	0x20800000,	/* FDT[57] - lsh()        */
+	0x20a00000,	/* FDT[58] - rsh()        */
+	0xc0170000,	/* FDT[59] - crc8()       */
+	0xc0145670,	/* FDT[60] - crc16()      */
+	0xc0345670,	/* FDT[61] - crc32()      */
+	0xa0076540,	/* FDT[62] - endian32()   */
+	0xa0000760,	/* FDT[63] - endian16()   */
+};
+
+
+static int bcom_engine_init(void)
+{
+	int task;
+	phys_addr_t tdt_pa, ctx_pa, var_pa, fdt_pa;
+	unsigned int tdt_size, ctx_size, var_size, fdt_size;
+
+	/* Allocate & clear SRAM zones for FDT, TDTs, contexts and vars/incs */
+	tdt_size = BCOM_MAX_TASKS * sizeof(struct bcom_tdt);
+	ctx_size = BCOM_MAX_TASKS * BCOM_CTX_SIZE;
+	var_size = BCOM_MAX_TASKS * (BCOM_VAR_SIZE + BCOM_INC_SIZE);
+	fdt_size = BCOM_FDT_SIZE;
+
+	bcom_eng->tdt = bcom_sram_alloc(tdt_size, sizeof(u32), &tdt_pa);
+	bcom_eng->ctx = bcom_sram_alloc(ctx_size, BCOM_CTX_ALIGN, &ctx_pa);
+	bcom_eng->var = bcom_sram_alloc(var_size, BCOM_VAR_ALIGN, &var_pa);
+	bcom_eng->fdt = bcom_sram_alloc(fdt_size, BCOM_FDT_ALIGN, &fdt_pa);
+
+	if (!bcom_eng->tdt || !bcom_eng->ctx || !bcom_eng->var || !bcom_eng->fdt) {
+		printk(KERN_ERR "DMA: SRAM alloc failed in engine init !\n");
+
+		bcom_sram_free(bcom_eng->tdt);
+		bcom_sram_free(bcom_eng->ctx);
+		bcom_sram_free(bcom_eng->var);
+		bcom_sram_free(bcom_eng->fdt);
+
+		return -ENOMEM;
+	}
+
+	memset(bcom_eng->tdt, 0x00, tdt_size);
+	memset(bcom_eng->ctx, 0x00, ctx_size);
+	memset(bcom_eng->var, 0x00, var_size);
+	memset(bcom_eng->fdt, 0x00, fdt_size);
+
+	/* Copy the FDT for the EU#3 */
+	memcpy(&bcom_eng->fdt[48], fdt_ops, sizeof(fdt_ops));
+
+	/* Initialize Task base structure */
+	for (task=0; task<BCOM_MAX_TASKS; task++)
+	{
+		out_be16(&bcom_eng->regs->tcr[task], 0);
+		out_8(&bcom_eng->regs->ipr[task], 0);
+
+		bcom_eng->tdt[task].context	= ctx_pa;
+		bcom_eng->tdt[task].var	= var_pa;
+		bcom_eng->tdt[task].fdt	= fdt_pa;
+
+		var_pa += BCOM_VAR_SIZE + BCOM_INC_SIZE;
+		ctx_pa += BCOM_CTX_SIZE;
+	}
+
+	out_be32(&bcom_eng->regs->taskBar, tdt_pa);
+
+	/* Init 'always' initiator */
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_ALWAYS], BCOM_IPR_ALWAYS);
+
+	/* Disable COMM Bus Prefetch on the original 5200; it's broken */
+	if ((mfspr(SPRN_SVR) & MPC5200_SVR_MASK) == MPC5200_SVR)
+		bcom_disable_prefetch();
+
+	/* Init lock */
+	spin_lock_init(&bcom_eng->lock);
+
+	return 0;
+}
+
+static void
+bcom_engine_cleanup(void)
+{
+	int task;
+
+	/* Stop all tasks */
+	for (task=0; task<BCOM_MAX_TASKS; task++)
+	{
+		out_be16(&bcom_eng->regs->tcr[task], 0);
+		out_8(&bcom_eng->regs->ipr[task], 0);
+	}
+
+	out_be32(&bcom_eng->regs->taskBar, 0ul);
+
+	/* Release the SRAM zones */
+	bcom_sram_free(bcom_eng->tdt);
+	bcom_sram_free(bcom_eng->ctx);
+	bcom_sram_free(bcom_eng->var);
+	bcom_sram_free(bcom_eng->fdt);
+}
+
+
+/* ======================================================================== */
+/* OF platform driver                                                       */
+/* ======================================================================== */
+
+static int mpc52xx_bcom_probe(struct platform_device *op)
+{
+	struct device_node *ofn_sram;
+	struct resource res_bcom;
+
+	int rv;
+
+	/* Inform user we're ok so far */
+	printk(KERN_INFO "DMA: MPC52xx BestComm driver\n");
+
+	/* Get the bestcomm node */
+	of_node_get(op->dev.of_node);
+
+	/* Prepare SRAM */
+	ofn_sram = of_find_matching_node(NULL, mpc52xx_sram_ids);
+	if (!ofn_sram) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"No SRAM found in device tree\n");
+		rv = -ENODEV;
+		goto error_ofput;
+	}
+	rv = bcom_sram_init(ofn_sram, DRIVER_NAME);
+	of_node_put(ofn_sram);
+
+	if (rv) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Error in SRAM init\n");
+		goto error_ofput;
+	}
+
+	/* Get a clean struct */
+	bcom_eng = kzalloc(sizeof(struct bcom_engine), GFP_KERNEL);
+	if (!bcom_eng) {
+		rv = -ENOMEM;
+		goto error_sramclean;
+	}
+
+	/* Save the node */
+	bcom_eng->ofnode = op->dev.of_node;
+
+	/* Get, reserve & map io */
+	if (of_address_to_resource(op->dev.of_node, 0, &res_bcom)) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't get resource\n");
+		rv = -EINVAL;
+		goto error_sramclean;
+	}
+
+	if (!request_mem_region(res_bcom.start, resource_size(&res_bcom),
+				DRIVER_NAME)) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't request registers region\n");
+		rv = -EBUSY;
+		goto error_sramclean;
+	}
+
+	bcom_eng->regs_base = res_bcom.start;
+	bcom_eng->regs = ioremap(res_bcom.start, sizeof(struct mpc52xx_sdma));
+	if (!bcom_eng->regs) {
+		printk(KERN_ERR DRIVER_NAME ": "
+			"Can't map registers\n");
+		rv = -ENOMEM;
+		goto error_release;
+	}
+
+	/* Now, do the real init */
+	rv = bcom_engine_init();
+	if (rv)
+		goto error_unmap;
+
+	/* Done ! */
+	printk(KERN_INFO "DMA: MPC52xx BestComm engine @%08lx ok !\n",
+		(long)bcom_eng->regs_base);
+
+	return 0;
+
+	/* Error path */
+error_unmap:
+	iounmap(bcom_eng->regs);
+error_release:
+	release_mem_region(res_bcom.start, sizeof(struct mpc52xx_sdma));
+error_sramclean:
+	kfree(bcom_eng);
+	bcom_sram_cleanup();
+error_ofput:
+	of_node_put(op->dev.of_node);
+
+	printk(KERN_ERR "DMA: MPC52xx BestComm init failed !\n");
+
+	return rv;
+}
+
+
+static int mpc52xx_bcom_remove(struct platform_device *op)
+{
+	/* Clean up the engine */
+	bcom_engine_cleanup();
+
+	/* Cleanup SRAM */
+	bcom_sram_cleanup();
+
+	/* Release regs */
+	iounmap(bcom_eng->regs);
+	release_mem_region(bcom_eng->regs_base, sizeof(struct mpc52xx_sdma));
+
+	/* Release the node */
+	of_node_put(bcom_eng->ofnode);
+
+	/* Release memory */
+	kfree(bcom_eng);
+	bcom_eng = NULL;
+
+	return 0;
+}
+
+static const struct of_device_id mpc52xx_bcom_of_match[] = {
+	{ .compatible = "fsl,mpc5200-bestcomm", },
+	{ .compatible = "mpc5200-bestcomm", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, mpc52xx_bcom_of_match);
+
+
+static struct platform_driver mpc52xx_bcom_of_platform_driver = {
+	.probe		= mpc52xx_bcom_probe,
+	.remove		= mpc52xx_bcom_remove,
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = mpc52xx_bcom_of_match,
+	},
+};
+
+
+/* ======================================================================== */
+/* Module                                                                   */
+/* ======================================================================== */
+
+static int __init
+mpc52xx_bcom_init(void)
+{
+	return platform_driver_register(&mpc52xx_bcom_of_platform_driver);
+}
+
+static void __exit
+mpc52xx_bcom_exit(void)
+{
+	platform_driver_unregister(&mpc52xx_bcom_of_platform_driver);
+}
+
+/* If we're not a module, we must make sure everything is setup before  */
+/* anyone tries to use us ... that's why we use subsys_initcall instead */
+/* of module_init. */
+subsys_initcall(mpc52xx_bcom_init);
+module_exit(mpc52xx_bcom_exit);
+
+MODULE_DESCRIPTION("Freescale MPC52xx BestComm DMA");
+MODULE_AUTHOR("Sylvain Munaut <tnt@246tNt.com>");
+MODULE_AUTHOR("Andrey Volkov <avolkov@varma-el.com>");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/fec.c b/drivers/dma/bestcomm/fec.c
new file mode 100644
index 0000000..7f1fb1c
--- /dev/null
+++ b/drivers/dma/bestcomm/fec.c
@@ -0,0 +1,270 @@
+/*
+ * Bestcomm FEC tasks driver
+ *
+ *
+ * Copyright (C) 2006-2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2003-2004 MontaVista, Software, Inc.
+ *                         ( by Dale Farnsworth <dfarnsworth@mvista.com> )
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <asm/io.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/fec.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* fec tasks images */
+extern u32 bcom_fec_rx_task[];
+extern u32 bcom_fec_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_fec_rx_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 fifo;		/* (u32*) address of fec's fifo */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_fec_rx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+	u16 pad2;
+	s16 incr_dst_ma;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_fec_tx_var {
+	u32 DRD;		/* (u32*) address of self-modified DRD */
+	u32 fifo;		/* (u32*) address of fec's fifo */
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_fec_tx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_src;
+	u16 pad2;
+	s16 incr_src_ma;
+};
+
+/* private structure in the task */
+struct bcom_fec_priv {
+	phys_addr_t	fifo;
+	int		maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_fec_rx_init(int queue_len, phys_addr_t fifo, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_fec_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+				sizeof(struct bcom_fec_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo = fifo;
+	priv->maxbufsize = maxbufsize;
+
+	if (bcom_fec_rx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_init);
+
+int
+bcom_fec_rx_reset(struct bcom_task *tsk)
+{
+	struct bcom_fec_priv *priv = tsk->priv;
+	struct bcom_fec_rx_var *var;
+	struct bcom_fec_rx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_fec_rx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_fec_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_fec_rx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = priv->maxbufsize;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
+	inc->incr_dst	= sizeof(u32);		/* task image, but we stick */
+	inc->incr_dst_ma= sizeof(u8);		/* to the official ones     */
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_RX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_RX], BCOM_IPR_FEC_RX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_reset);
+
+void
+bcom_fec_rx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the FEC tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_rx_release);
+
+
+
+	/* Return 2nd to last DRD */
+	/* This is an ugly hack, but at least it's only done
+	   once at initialization */
+static u32 *self_modified_drd(int tasknum)
+{
+	u32 *desc;
+	int num_descs;
+	int drd_count;
+	int i;
+
+	num_descs = bcom_task_num_descs(tasknum);
+	desc = bcom_task_desc(tasknum) + num_descs - 1;
+	drd_count = 0;
+	for (i=0; i<num_descs; i++, desc--)
+		if (bcom_desc_is_drd(*desc) && ++drd_count == 3)
+			break;
+	return desc;
+}
+
+struct bcom_task *
+bcom_fec_tx_init(int queue_len, phys_addr_t fifo)
+{
+	struct bcom_task *tsk;
+	struct bcom_fec_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_fec_bd),
+				sizeof(struct bcom_fec_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_ENABLE_TASK;
+
+	priv = tsk->priv;
+	priv->fifo = fifo;
+
+	if (bcom_fec_tx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_init);
+
+int
+bcom_fec_tx_reset(struct bcom_task *tsk)
+{
+	struct bcom_fec_priv *priv = tsk->priv;
+	struct bcom_fec_tx_var *var;
+	struct bcom_fec_tx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_fec_tx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_fec_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_fec_tx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->DRD	= bcom_sram_va2pa(self_modified_drd(tsk->tasknum));
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);	/* These should be in the   */
+	inc->incr_src	= sizeof(u32);		/* task image, but we stick */
+	inc->incr_src_ma= sizeof(u8);		/* to the official ones     */
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_FEC_TX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[BCOM_INITIATOR_FEC_TX], BCOM_IPR_FEC_TX);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_reset);
+
+void
+bcom_fec_tx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the FEC tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_fec_tx_release);
+
+
+MODULE_DESCRIPTION("BestComm FEC tasks driver");
+MODULE_AUTHOR("Dale Farnsworth <dfarnsworth@mvista.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/gen_bd.c b/drivers/dma/bestcomm/gen_bd.c
new file mode 100644
index 0000000..1a5b22d
--- /dev/null
+++ b/drivers/dma/bestcomm/gen_bd.c
@@ -0,0 +1,354 @@
+/*
+ * Driver for MPC52xx processor BestComm General Buffer Descriptor
+ *
+ * Copyright (C) 2007 Sylvain Munaut <tnt@246tNt.com>
+ * Copyright (C) 2006 AppSpec Computer Technologies Corp.
+ *                    Jeff Gibbons <jeff.gibbons@appspec.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/errno.h>
+#include <asm/io.h>
+
+#include <asm/mpc52xx.h>
+#include <asm/mpc52xx_psc.h>
+
+#include <linux/fsl/bestcomm/bestcomm.h>
+#include <linux/fsl/bestcomm/bestcomm_priv.h>
+#include <linux/fsl/bestcomm/gen_bd.h>
+
+
+/* ======================================================================== */
+/* Task image/var/inc                                                       */
+/* ======================================================================== */
+
+/* gen_bd tasks images */
+extern u32 bcom_gen_bd_rx_task[];
+extern u32 bcom_gen_bd_tx_task[];
+
+/* rx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_rx_var {
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 fifo;		/* (u32*) address of gen_bd's fifo */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* size of receive buffer */
+};
+
+/* rx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_rx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_dst;
+};
+
+/* tx task vars that need to be set before enabling the task */
+struct bcom_gen_bd_tx_var {
+	u32 fifo;		/* (u32*) address of gen_bd's fifo */
+	u32 enable;		/* (u16*) address of task's control register */
+	u32 bd_base;		/* (struct bcom_bd*) beginning of ring buffer */
+	u32 bd_last;		/* (struct bcom_bd*) end of ring buffer */
+	u32 bd_start;		/* (struct bcom_bd*) current bd */
+	u32 buffer_size;	/* set by uCode for each packet */
+};
+
+/* tx task incs that need to be set before enabling the task */
+struct bcom_gen_bd_tx_inc {
+	u16 pad0;
+	s16 incr_bytes;
+	u16 pad1;
+	s16 incr_src;
+	u16 pad2;
+	s16 incr_src_ma;
+};
+
+/* private structure */
+struct bcom_gen_bd_priv {
+	phys_addr_t	fifo;
+	int		initiator;
+	int		ipr;
+	int		maxbufsize;
+};
+
+
+/* ======================================================================== */
+/* Task support code                                                        */
+/* ======================================================================== */
+
+struct bcom_task *
+bcom_gen_bd_rx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr, int maxbufsize)
+{
+	struct bcom_task *tsk;
+	struct bcom_gen_bd_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+			sizeof(struct bcom_gen_bd_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo	= fifo;
+	priv->initiator	= initiator;
+	priv->ipr	= ipr;
+	priv->maxbufsize = maxbufsize;
+
+	if (bcom_gen_bd_rx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_init);
+
+int
+bcom_gen_bd_rx_reset(struct bcom_task *tsk)
+{
+	struct bcom_gen_bd_priv *priv = tsk->priv;
+	struct bcom_gen_bd_rx_var *var;
+	struct bcom_gen_bd_rx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_gen_bd_rx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_gen_bd_rx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_rx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+	var->buffer_size = priv->maxbufsize;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_dst	= sizeof(u32);
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_RX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+	bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_reset);
+
+void
+bcom_gen_bd_rx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the GenBD tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_rx_release);
+
+
+extern struct bcom_task *
+bcom_gen_bd_tx_init(int queue_len, phys_addr_t fifo,
+			int initiator, int ipr)
+{
+	struct bcom_task *tsk;
+	struct bcom_gen_bd_priv *priv;
+
+	tsk = bcom_task_alloc(queue_len, sizeof(struct bcom_gen_bd),
+			sizeof(struct bcom_gen_bd_priv));
+	if (!tsk)
+		return NULL;
+
+	tsk->flags = BCOM_FLAGS_NONE;
+
+	priv = tsk->priv;
+	priv->fifo	= fifo;
+	priv->initiator	= initiator;
+	priv->ipr	= ipr;
+
+	if (bcom_gen_bd_tx_reset(tsk)) {
+		bcom_task_free(tsk);
+		return NULL;
+	}
+
+	return tsk;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_init);
+
+int
+bcom_gen_bd_tx_reset(struct bcom_task *tsk)
+{
+	struct bcom_gen_bd_priv *priv = tsk->priv;
+	struct bcom_gen_bd_tx_var *var;
+	struct bcom_gen_bd_tx_inc *inc;
+
+	/* Shutdown the task */
+	bcom_disable_task(tsk->tasknum);
+
+	/* Reset the microcode */
+	var = (struct bcom_gen_bd_tx_var *) bcom_task_var(tsk->tasknum);
+	inc = (struct bcom_gen_bd_tx_inc *) bcom_task_inc(tsk->tasknum);
+
+	if (bcom_load_image(tsk->tasknum, bcom_gen_bd_tx_task))
+		return -1;
+
+	var->enable	= bcom_eng->regs_base +
+				offsetof(struct mpc52xx_sdma, tcr[tsk->tasknum]);
+	var->fifo	= (u32) priv->fifo;
+	var->bd_base	= tsk->bd_pa;
+	var->bd_last	= tsk->bd_pa + ((tsk->num_bd-1) * tsk->bd_size);
+	var->bd_start	= tsk->bd_pa;
+
+	inc->incr_bytes	= -(s16)sizeof(u32);
+	inc->incr_src	= sizeof(u32);
+	inc->incr_src_ma = sizeof(u8);
+
+	/* Reset the BDs */
+	tsk->index = 0;
+	tsk->outdex = 0;
+
+	memset(tsk->bd, 0x00, tsk->num_bd * tsk->bd_size);
+
+	/* Configure some stuff */
+	bcom_set_task_pragma(tsk->tasknum, BCOM_GEN_TX_BD_PRAGMA);
+	bcom_set_task_auto_start(tsk->tasknum, tsk->tasknum);
+
+	out_8(&bcom_eng->regs->ipr[priv->initiator], priv->ipr);
+	bcom_set_initiator(tsk->tasknum, priv->initiator);
+
+	out_be32(&bcom_eng->regs->IntPend, 1<<tsk->tasknum);	/* Clear ints */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_reset);
+
+void
+bcom_gen_bd_tx_release(struct bcom_task *tsk)
+{
+	/* Nothing special for the GenBD tasks */
+	bcom_task_free(tsk);
+}
+EXPORT_SYMBOL_GPL(bcom_gen_bd_tx_release);
+
+/* ---------------------------------------------------------------------
+ * PSC support code
+ */
+
+/**
+ * bcom_psc_parameters - Bestcomm initialization value table for PSC devices
+ *
+ * This structure is only used internally.  It is a lookup table for PSC
+ * specific parameters to bestcomm tasks.
+ */
+static struct bcom_psc_params {
+	int rx_initiator;
+	int rx_ipr;
+	int tx_initiator;
+	int tx_ipr;
+} bcom_psc_params[] = {
+	[0] = {
+		.rx_initiator = BCOM_INITIATOR_PSC1_RX,
+		.rx_ipr = BCOM_IPR_PSC1_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC1_TX,
+		.tx_ipr = BCOM_IPR_PSC1_TX,
+	},
+	[1] = {
+		.rx_initiator = BCOM_INITIATOR_PSC2_RX,
+		.rx_ipr = BCOM_IPR_PSC2_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC2_TX,
+		.tx_ipr = BCOM_IPR_PSC2_TX,
+	},
+	[2] = {
+		.rx_initiator = BCOM_INITIATOR_PSC3_RX,
+		.rx_ipr = BCOM_IPR_PSC3_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC3_TX,
+		.tx_ipr = BCOM_IPR_PSC3_TX,
+	},
+	[3] = {
+		.rx_initiator = BCOM_INITIATOR_PSC4_RX,
+		.rx_ipr = BCOM_IPR_PSC4_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC4_TX,
+		.tx_ipr = BCOM_IPR_PSC4_TX,
+	},
+	[4] = {
+		.rx_initiator = BCOM_INITIATOR_PSC5_RX,
+		.rx_ipr = BCOM_IPR_PSC5_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC5_TX,
+		.tx_ipr = BCOM_IPR_PSC5_TX,
+	},
+	[5] = {
+		.rx_initiator = BCOM_INITIATOR_PSC6_RX,
+		.rx_ipr = BCOM_IPR_PSC6_RX,
+		.tx_initiator = BCOM_INITIATOR_PSC6_TX,
+		.tx_ipr = BCOM_IPR_PSC6_TX,
+	},
+};
+
+/**
+ * bcom_psc_gen_bd_rx_init - Allocate a receive bcom_task for a PSC port
+ * @psc_num:	Number of the PSC to allocate a task for
+ * @queue_len:	number of buffer descriptors to allocate for the task
+ * @fifo:	physical address of FIFO register
+ * @maxbufsize:	Maximum receive data size in bytes.
+ *
+ * Allocate a bestcomm task structure for receiving data from a PSC.
+ */
+struct bcom_task * bcom_psc_gen_bd_rx_init(unsigned psc_num, int queue_len,
+					   phys_addr_t fifo, int maxbufsize)
+{
+	if (psc_num >= MPC52xx_PSC_MAXNUM)
+		return NULL;
+
+	return bcom_gen_bd_rx_init(queue_len, fifo,
+				   bcom_psc_params[psc_num].rx_initiator,
+				   bcom_psc_params[psc_num].rx_ipr,
+				   maxbufsize);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_rx_init);
+
+/**
+ * bcom_psc_gen_bd_tx_init - Allocate a transmit bcom_task for a PSC port
+ * @psc_num:	Number of the PSC to allocate a task for
+ * @queue_len:	number of buffer descriptors to allocate for the task
+ * @fifo:	physical address of FIFO register
+ *
+ * Allocate a bestcomm task structure for transmitting data to a PSC.
+ */
+struct bcom_task *
+bcom_psc_gen_bd_tx_init(unsigned psc_num, int queue_len, phys_addr_t fifo)
+{
+	struct psc;
+	return bcom_gen_bd_tx_init(queue_len, fifo,
+				   bcom_psc_params[psc_num].tx_initiator,
+				   bcom_psc_params[psc_num].tx_ipr);
+}
+EXPORT_SYMBOL_GPL(bcom_psc_gen_bd_tx_init);
+
+
+MODULE_DESCRIPTION("BestComm General Buffer Descriptor tasks driver");
+MODULE_AUTHOR("Jeff Gibbons <jeff.gibbons@appspec.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/drivers/dma/bestcomm/sram.c b/drivers/dma/bestcomm/sram.c
new file mode 100644
index 0000000..2074e0e
--- /dev/null
+++ b/drivers/dma/bestcomm/sram.c
@@ -0,0 +1,179 @@
+/*
+ * Simple memory allocator for on-board SRAM
+ *
+ *
+ * Maintainer : Sylvain Munaut <tnt@246tNt.com>
+ *
+ * Copyright (C) 2005 Sylvain Munaut <tnt@246tNt.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/ioport.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/io.h>
+#include <asm/mmu.h>
+
+#include <linux/fsl/bestcomm/sram.h>
+
+
+/* Struct keeping our 'state' */
+struct bcom_sram *bcom_sram = NULL;
+EXPORT_SYMBOL_GPL(bcom_sram);	/* needed for inline functions */
+
+
+/* ======================================================================== */
+/* Public API                                                               */
+/* ======================================================================== */
+/* DO NOT USE in interrupts, if needed in irq handler, we should use the
+   _irqsave version of the spin_locks */
+
+int bcom_sram_init(struct device_node *sram_node, char *owner)
+{
+	int rv;
+	const u32 *regaddr_p;
+	u64 regaddr64, size64;
+	unsigned int psize;
+
+	/* Create our state struct */
+	if (bcom_sram) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Already initialized !\n", owner);
+		return -EBUSY;
+	}
+
+	bcom_sram = kmalloc(sizeof(struct bcom_sram), GFP_KERNEL);
+	if (!bcom_sram) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Couldn't allocate internal state !\n", owner);
+		return -ENOMEM;
+	}
+
+	/* Get address and size of the sram */
+	regaddr_p = of_get_address(sram_node, 0, &size64, NULL);
+	if (!regaddr_p) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Invalid device node !\n", owner);
+		rv = -EINVAL;
+		goto error_free;
+	}
+
+	regaddr64 = of_translate_address(sram_node, regaddr_p);
+
+	bcom_sram->base_phys = (phys_addr_t) regaddr64;
+	bcom_sram->size = (unsigned int) size64;
+
+	/* Request region */
+	if (!request_mem_region(bcom_sram->base_phys, bcom_sram->size, owner)) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Couldn't request region !\n", owner);
+		rv = -EBUSY;
+		goto error_free;
+	}
+
+	/* Map SRAM */
+		/* sram is not really __iomem */
+	bcom_sram->base_virt = (void*) ioremap(bcom_sram->base_phys, bcom_sram->size);
+
+	if (!bcom_sram->base_virt) {
+		printk(KERN_ERR "%s: bcom_sram_init: "
+			"Map error SRAM zone 0x%08lx (0x%0x)!\n",
+			owner, (long)bcom_sram->base_phys, bcom_sram->size );
+		rv = -ENOMEM;
+		goto error_release;
+	}
+
+	/* Create an rheap (defaults to 32 bits word alignment) */
+	bcom_sram->rh = rh_create(4);
+
+	/* Attach the free zones */
+#if 0
+	/* Currently disabled ... for future use only */
+	reg_addr_p = of_get_property(sram_node, "available", &psize);
+#else
+	regaddr_p = NULL;
+	psize = 0;
+#endif
+
+	if (!regaddr_p || !psize) {
+		/* Attach the whole zone */
+		rh_attach_region(bcom_sram->rh, 0, bcom_sram->size);
+	} else {
+		/* Attach each zone independently */
+		while (psize >= 2 * sizeof(u32)) {
+			phys_addr_t zbase = of_translate_address(sram_node, regaddr_p);
+			rh_attach_region(bcom_sram->rh, zbase - bcom_sram->base_phys, regaddr_p[1]);
+			regaddr_p += 2;
+			psize -= 2 * sizeof(u32);
+		}
+	}
+
+	/* Init our spinlock */
+	spin_lock_init(&bcom_sram->lock);
+
+	return 0;
+
+error_release:
+	release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+error_free:
+	kfree(bcom_sram);
+	bcom_sram = NULL;
+
+	return rv;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_init);
+
+void bcom_sram_cleanup(void)
+{
+	/* Free resources */
+	if (bcom_sram) {
+		rh_destroy(bcom_sram->rh);
+		iounmap((void __iomem *)bcom_sram->base_virt);
+		release_mem_region(bcom_sram->base_phys, bcom_sram->size);
+		kfree(bcom_sram);
+		bcom_sram = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(bcom_sram_cleanup);
+
+void* bcom_sram_alloc(int size, int align, phys_addr_t *phys)
+{
+	unsigned long offset;
+
+	spin_lock(&bcom_sram->lock);
+	offset = rh_alloc_align(bcom_sram->rh, size, align, NULL);
+	spin_unlock(&bcom_sram->lock);
+
+	if (IS_ERR_VALUE(offset))
+		return NULL;
+
+	*phys = bcom_sram->base_phys + offset;
+	return bcom_sram->base_virt + offset;
+}
+EXPORT_SYMBOL_GPL(bcom_sram_alloc);
+
+void bcom_sram_free(void *ptr)
+{
+	unsigned long offset;
+
+	if (!ptr)
+		return;
+
+	offset = ptr - bcom_sram->base_virt;
+
+	spin_lock(&bcom_sram->lock);
+	rh_free(bcom_sram->rh, offset);
+	spin_unlock(&bcom_sram->lock);
+}
+EXPORT_SYMBOL_GPL(bcom_sram_free);
+
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
new file mode 100644
index 0000000..da74fd7
--- /dev/null
+++ b/drivers/dma/coh901318.c
@@ -0,0 +1,2802 @@
+/*
+ * driver/dma/coh901318.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * DMA driver for COH 901 318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h> /* printk() */
+#include <linux/fs.h> /* everything... */
+#include <linux/scatterlist.h>
+#include <linux/slab.h> /* kmalloc() */
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/irqreturn.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/platform_data/dma-coh901318.h>
+#include <linux/of_dma.h>
+
+#include "coh901318.h"
+#include "dmaengine.h"
+
+#define COH901318_MOD32_MASK					(0x1F)
+#define COH901318_WORD_MASK					(0xFFFFFFFF)
+/* INT_STATUS - Interrupt Status Registers 32bit (R/-) */
+#define COH901318_INT_STATUS1					(0x0000)
+#define COH901318_INT_STATUS2					(0x0004)
+/* TC_INT_STATUS - Terminal Count Interrupt Status Registers 32bit (R/-) */
+#define COH901318_TC_INT_STATUS1				(0x0008)
+#define COH901318_TC_INT_STATUS2				(0x000C)
+/* TC_INT_CLEAR - Terminal Count Interrupt Clear Registers 32bit (-/W) */
+#define COH901318_TC_INT_CLEAR1					(0x0010)
+#define COH901318_TC_INT_CLEAR2					(0x0014)
+/* RAW_TC_INT_STATUS - Raw Term Count Interrupt Status Registers 32bit (R/-) */
+#define COH901318_RAW_TC_INT_STATUS1				(0x0018)
+#define COH901318_RAW_TC_INT_STATUS2				(0x001C)
+/* BE_INT_STATUS - Bus Error Interrupt Status Registers 32bit (R/-) */
+#define COH901318_BE_INT_STATUS1				(0x0020)
+#define COH901318_BE_INT_STATUS2				(0x0024)
+/* BE_INT_CLEAR - Bus Error Interrupt Clear Registers 32bit (-/W) */
+#define COH901318_BE_INT_CLEAR1					(0x0028)
+#define COH901318_BE_INT_CLEAR2					(0x002C)
+/* RAW_BE_INT_STATUS - Raw Term Count Interrupt Status Registers 32bit (R/-) */
+#define COH901318_RAW_BE_INT_STATUS1				(0x0030)
+#define COH901318_RAW_BE_INT_STATUS2				(0x0034)
+
+/*
+ * CX_CFG - Channel Configuration Registers 32bit (R/W)
+ */
+#define COH901318_CX_CFG					(0x0100)
+#define COH901318_CX_CFG_SPACING				(0x04)
+/* Channel enable activates tha dma job */
+#define COH901318_CX_CFG_CH_ENABLE				(0x00000001)
+#define COH901318_CX_CFG_CH_DISABLE				(0x00000000)
+/* Request Mode */
+#define COH901318_CX_CFG_RM_MASK				(0x00000006)
+#define COH901318_CX_CFG_RM_MEMORY_TO_MEMORY			(0x0 << 1)
+#define COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY			(0x1 << 1)
+#define COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY			(0x1 << 1)
+#define COH901318_CX_CFG_RM_PRIMARY_TO_SECONDARY		(0x3 << 1)
+#define COH901318_CX_CFG_RM_SECONDARY_TO_PRIMARY		(0x3 << 1)
+/* Linked channel request field. RM must == 11 */
+#define COH901318_CX_CFG_LCRF_SHIFT				3
+#define COH901318_CX_CFG_LCRF_MASK				(0x000001F8)
+#define COH901318_CX_CFG_LCR_DISABLE				(0x00000000)
+/* Terminal Counter Interrupt Request Mask */
+#define COH901318_CX_CFG_TC_IRQ_ENABLE				(0x00000200)
+#define COH901318_CX_CFG_TC_IRQ_DISABLE				(0x00000000)
+/* Bus Error interrupt Mask */
+#define COH901318_CX_CFG_BE_IRQ_ENABLE				(0x00000400)
+#define COH901318_CX_CFG_BE_IRQ_DISABLE				(0x00000000)
+
+/*
+ * CX_STAT - Channel Status Registers 32bit (R/-)
+ */
+#define COH901318_CX_STAT					(0x0200)
+#define COH901318_CX_STAT_SPACING				(0x04)
+#define COH901318_CX_STAT_RBE_IRQ_IND				(0x00000008)
+#define COH901318_CX_STAT_RTC_IRQ_IND				(0x00000004)
+#define COH901318_CX_STAT_ACTIVE				(0x00000002)
+#define COH901318_CX_STAT_ENABLED				(0x00000001)
+
+/*
+ * CX_CTRL - Channel Control Registers 32bit (R/W)
+ */
+#define COH901318_CX_CTRL					(0x0400)
+#define COH901318_CX_CTRL_SPACING				(0x10)
+/* Transfer Count Enable */
+#define COH901318_CX_CTRL_TC_ENABLE				(0x00001000)
+#define COH901318_CX_CTRL_TC_DISABLE				(0x00000000)
+/* Transfer Count Value 0 - 4095 */
+#define COH901318_CX_CTRL_TC_VALUE_MASK				(0x00000FFF)
+/* Burst count */
+#define COH901318_CX_CTRL_BURST_COUNT_MASK			(0x0000E000)
+#define COH901318_CX_CTRL_BURST_COUNT_64_BYTES			(0x7 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_48_BYTES			(0x6 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_32_BYTES			(0x5 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_16_BYTES			(0x4 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_8_BYTES			(0x3 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_4_BYTES			(0x2 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_2_BYTES			(0x1 << 13)
+#define COH901318_CX_CTRL_BURST_COUNT_1_BYTE			(0x0 << 13)
+/* Source bus size  */
+#define COH901318_CX_CTRL_SRC_BUS_SIZE_MASK			(0x00030000)
+#define COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS			(0x2 << 16)
+#define COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS			(0x1 << 16)
+#define COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS			(0x0 << 16)
+/* Source address increment */
+#define COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE			(0x00040000)
+#define COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE			(0x00000000)
+/* Destination Bus Size */
+#define COH901318_CX_CTRL_DST_BUS_SIZE_MASK			(0x00180000)
+#define COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS			(0x2 << 19)
+#define COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS			(0x1 << 19)
+#define COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS			(0x0 << 19)
+/* Destination address increment */
+#define COH901318_CX_CTRL_DST_ADDR_INC_ENABLE			(0x00200000)
+#define COH901318_CX_CTRL_DST_ADDR_INC_DISABLE			(0x00000000)
+/* Master Mode (Master2 is only connected to MSL) */
+#define COH901318_CX_CTRL_MASTER_MODE_MASK			(0x00C00000)
+#define COH901318_CX_CTRL_MASTER_MODE_M2R_M1W			(0x3 << 22)
+#define COH901318_CX_CTRL_MASTER_MODE_M1R_M2W			(0x2 << 22)
+#define COH901318_CX_CTRL_MASTER_MODE_M2RW			(0x1 << 22)
+#define COH901318_CX_CTRL_MASTER_MODE_M1RW			(0x0 << 22)
+/* Terminal Count flag to PER enable */
+#define COH901318_CX_CTRL_TCP_ENABLE				(0x01000000)
+#define COH901318_CX_CTRL_TCP_DISABLE				(0x00000000)
+/* Terminal Count flags to CPU enable */
+#define COH901318_CX_CTRL_TC_IRQ_ENABLE				(0x02000000)
+#define COH901318_CX_CTRL_TC_IRQ_DISABLE			(0x00000000)
+/* Hand shake to peripheral */
+#define COH901318_CX_CTRL_HSP_ENABLE				(0x04000000)
+#define COH901318_CX_CTRL_HSP_DISABLE				(0x00000000)
+#define COH901318_CX_CTRL_HSS_ENABLE				(0x08000000)
+#define COH901318_CX_CTRL_HSS_DISABLE				(0x00000000)
+/* DMA mode */
+#define COH901318_CX_CTRL_DDMA_MASK				(0x30000000)
+#define COH901318_CX_CTRL_DDMA_LEGACY				(0x0 << 28)
+#define COH901318_CX_CTRL_DDMA_DEMAND_DMA1			(0x1 << 28)
+#define COH901318_CX_CTRL_DDMA_DEMAND_DMA2			(0x2 << 28)
+/* Primary Request Data Destination */
+#define COH901318_CX_CTRL_PRDD_MASK				(0x40000000)
+#define COH901318_CX_CTRL_PRDD_DEST				(0x1 << 30)
+#define COH901318_CX_CTRL_PRDD_SOURCE				(0x0 << 30)
+
+/*
+ * CX_SRC_ADDR - Channel Source Address Registers 32bit (R/W)
+ */
+#define COH901318_CX_SRC_ADDR					(0x0404)
+#define COH901318_CX_SRC_ADDR_SPACING				(0x10)
+
+/*
+ * CX_DST_ADDR - Channel Destination Address Registers 32bit R/W
+ */
+#define COH901318_CX_DST_ADDR					(0x0408)
+#define COH901318_CX_DST_ADDR_SPACING				(0x10)
+
+/*
+ * CX_LNK_ADDR - Channel Link Address Registers 32bit (R/W)
+ */
+#define COH901318_CX_LNK_ADDR					(0x040C)
+#define COH901318_CX_LNK_ADDR_SPACING				(0x10)
+#define COH901318_CX_LNK_LINK_IMMEDIATE				(0x00000001)
+
+/**
+ * struct coh901318_params - parameters for DMAC configuration
+ * @config: DMA config register
+ * @ctrl_lli_last: DMA control register for the last lli in the list
+ * @ctrl_lli: DMA control register for an lli
+ * @ctrl_lli_chained: DMA control register for a chained lli
+ */
+struct coh901318_params {
+	u32 config;
+	u32 ctrl_lli_last;
+	u32 ctrl_lli;
+	u32 ctrl_lli_chained;
+};
+
+/**
+ * struct coh_dma_channel - dma channel base
+ * @name: ascii name of dma channel
+ * @number: channel id number
+ * @desc_nbr_max: number of preallocated descriptors
+ * @priority_high: prio of channel, 0 low otherwise high.
+ * @param: configuration parameters
+ */
+struct coh_dma_channel {
+	const char name[32];
+	const int number;
+	const int desc_nbr_max;
+	const int priority_high;
+	const struct coh901318_params param;
+};
+
+/**
+ * struct powersave - DMA power save structure
+ * @lock: lock protecting data in this struct
+ * @started_channels: bit mask indicating active dma channels
+ */
+struct powersave {
+	spinlock_t lock;
+	u64 started_channels;
+};
+
+/* points out all dma slave channels.
+ * Syntax is [A1, B1, A2, B2, .... ,-1,-1]
+ * Select all channels from A to B, end of list is marked with -1,-1
+ */
+static int dma_slave_channels[] = {
+	U300_DMA_MSL_TX_0, U300_DMA_SPI_RX,
+	U300_DMA_UART1_TX, U300_DMA_UART1_RX, -1, -1};
+
+/* points out all dma memcpy channels. */
+static int dma_memcpy_channels[] = {
+	U300_DMA_GENERAL_PURPOSE_0, U300_DMA_GENERAL_PURPOSE_8, -1, -1};
+
+#define flags_memcpy_config (COH901318_CX_CFG_CH_DISABLE | \
+			COH901318_CX_CFG_RM_MEMORY_TO_MEMORY | \
+			COH901318_CX_CFG_LCR_DISABLE | \
+			COH901318_CX_CFG_TC_IRQ_ENABLE | \
+			COH901318_CX_CFG_BE_IRQ_ENABLE)
+#define flags_memcpy_lli_chained (COH901318_CX_CTRL_TC_ENABLE | \
+			COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_MASTER_MODE_M1RW | \
+			COH901318_CX_CTRL_TCP_DISABLE | \
+			COH901318_CX_CTRL_TC_IRQ_DISABLE | \
+			COH901318_CX_CTRL_HSP_DISABLE | \
+			COH901318_CX_CTRL_HSS_DISABLE | \
+			COH901318_CX_CTRL_DDMA_LEGACY | \
+			COH901318_CX_CTRL_PRDD_SOURCE)
+#define flags_memcpy_lli (COH901318_CX_CTRL_TC_ENABLE | \
+			COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_MASTER_MODE_M1RW | \
+			COH901318_CX_CTRL_TCP_DISABLE | \
+			COH901318_CX_CTRL_TC_IRQ_DISABLE | \
+			COH901318_CX_CTRL_HSP_DISABLE | \
+			COH901318_CX_CTRL_HSS_DISABLE | \
+			COH901318_CX_CTRL_DDMA_LEGACY | \
+			COH901318_CX_CTRL_PRDD_SOURCE)
+#define flags_memcpy_lli_last (COH901318_CX_CTRL_TC_ENABLE | \
+			COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
+			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
+			COH901318_CX_CTRL_MASTER_MODE_M1RW | \
+			COH901318_CX_CTRL_TCP_DISABLE | \
+			COH901318_CX_CTRL_TC_IRQ_ENABLE | \
+			COH901318_CX_CTRL_HSP_DISABLE | \
+			COH901318_CX_CTRL_HSS_DISABLE | \
+			COH901318_CX_CTRL_DDMA_LEGACY | \
+			COH901318_CX_CTRL_PRDD_SOURCE)
+
+static const struct coh_dma_channel chan_config[U300_DMA_CHANNELS] = {
+	{
+		.number = U300_DMA_MSL_TX_0,
+		.name = "MSL TX 0",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_MSL_TX_1,
+		.name = "MSL TX 1",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+	},
+	{
+		.number = U300_DMA_MSL_TX_2,
+		.name = "MSL TX 2",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.desc_nbr_max = 10,
+	},
+	{
+		.number = U300_DMA_MSL_TX_3,
+		.name = "MSL TX 3",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+	},
+	{
+		.number = U300_DMA_MSL_TX_4,
+		.name = "MSL TX 4",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+	},
+	{
+		.number = U300_DMA_MSL_TX_5,
+		.name = "MSL TX 5",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_MSL_TX_6,
+		.name = "MSL TX 6",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_MSL_RX_0,
+		.name = "MSL RX 0",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_MSL_RX_1,
+		.name = "MSL RX 1",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_MSL_RX_2,
+		.name = "MSL RX 2",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_MSL_RX_3,
+		.name = "MSL RX 3",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_MSL_RX_4,
+		.name = "MSL RX 4",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_MSL_RX_5,
+		.name = "MSL RX 5",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_MSL_RX_6,
+		.name = "MSL RX 6",
+		.priority_high = 0,
+	},
+	/*
+	 * Don't set up device address, burst count or size of src
+	 * or dst bus for this peripheral - handled by PrimeCell
+	 * DMA extension.
+	 */
+	{
+		.number = U300_DMA_MMCSD_RX_TX,
+		.name = "MMCSD RX TX",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+
+	},
+	{
+		.number = U300_DMA_MSPRO_TX,
+		.name = "MSPRO TX",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_MSPRO_RX,
+		.name = "MSPRO RX",
+		.priority_high = 0,
+	},
+	/*
+	 * Don't set up device address, burst count or size of src
+	 * or dst bus for this peripheral - handled by PrimeCell
+	 * DMA extension.
+	 */
+	{
+		.number = U300_DMA_UART0_TX,
+		.name = "UART0 TX",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+	},
+	{
+		.number = U300_DMA_UART0_RX,
+		.name = "UART0 RX",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+	},
+	{
+		.number = U300_DMA_APEX_TX,
+		.name = "APEX TX",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_APEX_RX,
+		.name = "APEX RX",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_PCM_I2S0_TX,
+		.name = "PCM I2S0 TX",
+		.priority_high = 1,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+	},
+	{
+		.number = U300_DMA_PCM_I2S0_RX,
+		.name = "PCM I2S0 RX",
+		.priority_high = 1,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_PCM_I2S1_TX,
+		.name = "PCM I2S1 TX",
+		.priority_high = 1,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_SOURCE,
+	},
+	{
+		.number = U300_DMA_PCM_I2S1_RX,
+		.name = "PCM I2S1 RX",
+		.priority_high = 1,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
+				COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
+				COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
+				COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_ENABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY |
+				COH901318_CX_CTRL_PRDD_DEST,
+	},
+	{
+		.number = U300_DMA_XGAM_CDI,
+		.name = "XGAM CDI",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_XGAM_PDI,
+		.name = "XGAM PDI",
+		.priority_high = 0,
+	},
+	/*
+	 * Don't set up device address, burst count or size of src
+	 * or dst bus for this peripheral - handled by PrimeCell
+	 * DMA extension.
+	 */
+	{
+		.number = U300_DMA_SPI_TX,
+		.name = "SPI TX",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+	},
+	{
+		.number = U300_DMA_SPI_RX,
+		.name = "SPI RX",
+		.priority_high = 0,
+		.param.config = COH901318_CX_CFG_CH_DISABLE |
+				COH901318_CX_CFG_LCR_DISABLE |
+				COH901318_CX_CFG_TC_IRQ_ENABLE |
+				COH901318_CX_CFG_BE_IRQ_ENABLE,
+		.param.ctrl_lli_chained = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_DISABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+		.param.ctrl_lli_last = 0 |
+				COH901318_CX_CTRL_TC_ENABLE |
+				COH901318_CX_CTRL_MASTER_MODE_M1RW |
+				COH901318_CX_CTRL_TCP_DISABLE |
+				COH901318_CX_CTRL_TC_IRQ_ENABLE |
+				COH901318_CX_CTRL_HSP_ENABLE |
+				COH901318_CX_CTRL_HSS_DISABLE |
+				COH901318_CX_CTRL_DDMA_LEGACY,
+
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_0,
+		.name = "GENERAL 00",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_1,
+		.name = "GENERAL 01",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_2,
+		.name = "GENERAL 02",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_3,
+		.name = "GENERAL 03",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_4,
+		.name = "GENERAL 04",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_5,
+		.name = "GENERAL 05",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_6,
+		.name = "GENERAL 06",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_7,
+		.name = "GENERAL 07",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_GENERAL_PURPOSE_8,
+		.name = "GENERAL 08",
+		.priority_high = 0,
+
+		.param.config = flags_memcpy_config,
+		.param.ctrl_lli_chained = flags_memcpy_lli_chained,
+		.param.ctrl_lli = flags_memcpy_lli,
+		.param.ctrl_lli_last = flags_memcpy_lli_last,
+	},
+	{
+		.number = U300_DMA_UART1_TX,
+		.name = "UART1 TX",
+		.priority_high = 0,
+	},
+	{
+		.number = U300_DMA_UART1_RX,
+		.name = "UART1 RX",
+		.priority_high = 0,
+	}
+};
+
+#define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
+
+#ifdef VERBOSE_DEBUG
+#define COH_DBG(x) ({ if (1) x; 0; })
+#else
+#define COH_DBG(x) ({ if (0) x; 0; })
+#endif
+
+struct coh901318_desc {
+	struct dma_async_tx_descriptor desc;
+	struct list_head node;
+	struct scatterlist *sg;
+	unsigned int sg_len;
+	struct coh901318_lli *lli;
+	enum dma_transfer_direction dir;
+	unsigned long flags;
+	u32 head_config;
+	u32 head_ctrl;
+};
+
+struct coh901318_base {
+	struct device *dev;
+	void __iomem *virtbase;
+	unsigned int irq;
+	struct coh901318_pool pool;
+	struct powersave pm;
+	struct dma_device dma_slave;
+	struct dma_device dma_memcpy;
+	struct coh901318_chan *chans;
+};
+
+struct coh901318_chan {
+	spinlock_t lock;
+	int allocated;
+	int id;
+	int stopped;
+
+	struct work_struct free_work;
+	struct dma_chan chan;
+
+	struct tasklet_struct tasklet;
+
+	struct list_head active;
+	struct list_head queue;
+	struct list_head free;
+
+	unsigned long nbr_active_done;
+	unsigned long busy;
+
+	u32 addr;
+	u32 ctrl;
+
+	struct coh901318_base *base;
+};
+
+static void coh901318_list_print(struct coh901318_chan *cohc,
+				 struct coh901318_lli *lli)
+{
+	struct coh901318_lli *l = lli;
+	int i = 0;
+
+	while (l) {
+		dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src %pad"
+			 ", dst %pad, link %pad virt_link_addr 0x%p\n",
+			 i, l, l->control, &l->src_addr, &l->dst_addr,
+			 &l->link_addr, l->virt_link_addr);
+		i++;
+		l = l->virt_link_addr;
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y)
+
+static struct coh901318_base *debugfs_dma_base;
+static struct dentry *dma_dentry;
+
+static ssize_t coh901318_debugfs_read(struct file *file, char __user *buf,
+				  size_t count, loff_t *f_pos)
+{
+	u64 started_channels = debugfs_dma_base->pm.started_channels;
+	int pool_count = debugfs_dma_base->pool.debugfs_pool_counter;
+	char *dev_buf;
+	char *tmp;
+	int ret;
+	int i;
+
+	dev_buf = kmalloc(4*1024, GFP_KERNEL);
+	if (dev_buf == NULL)
+		return -ENOMEM;
+	tmp = dev_buf;
+
+	tmp += sprintf(tmp, "DMA -- enabled dma channels\n");
+
+	for (i = 0; i < U300_DMA_CHANNELS; i++) {
+		if (started_channels & (1ULL << i))
+			tmp += sprintf(tmp, "channel %d\n", i);
+	}
+
+	tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count);
+
+	ret = simple_read_from_buffer(buf, count, f_pos, dev_buf, 
+					tmp - dev_buf);
+	kfree(dev_buf);
+	return ret;
+}
+
+static const struct file_operations coh901318_debugfs_status_operations = {
+	.open		= simple_open,
+	.read		= coh901318_debugfs_read,
+	.llseek		= default_llseek,
+};
+
+
+static int __init init_coh901318_debugfs(void)
+{
+
+	dma_dentry = debugfs_create_dir("dma", NULL);
+
+	(void) debugfs_create_file("status",
+				   S_IFREG | S_IRUGO,
+				   dma_dentry, NULL,
+				   &coh901318_debugfs_status_operations);
+	return 0;
+}
+
+static void __exit exit_coh901318_debugfs(void)
+{
+	debugfs_remove_recursive(dma_dentry);
+}
+
+module_init(init_coh901318_debugfs);
+module_exit(exit_coh901318_debugfs);
+#else
+
+#define COH901318_DEBUGFS_ASSIGN(x, y)
+
+#endif /* CONFIG_DEBUG_FS */
+
+static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct coh901318_chan, chan);
+}
+
+static inline const struct coh901318_params *
+cohc_chan_param(struct coh901318_chan *cohc)
+{
+	return &chan_config[cohc->id].param;
+}
+
+static inline const struct coh_dma_channel *
+cohc_chan_conf(struct coh901318_chan *cohc)
+{
+	return &chan_config[cohc->id];
+}
+
+static void enable_powersave(struct coh901318_chan *cohc)
+{
+	unsigned long flags;
+	struct powersave *pm = &cohc->base->pm;
+
+	spin_lock_irqsave(&pm->lock, flags);
+
+	pm->started_channels &= ~(1ULL << cohc->id);
+
+	spin_unlock_irqrestore(&pm->lock, flags);
+}
+static void disable_powersave(struct coh901318_chan *cohc)
+{
+	unsigned long flags;
+	struct powersave *pm = &cohc->base->pm;
+
+	spin_lock_irqsave(&pm->lock, flags);
+
+	pm->started_channels |= (1ULL << cohc->id);
+
+	spin_unlock_irqrestore(&pm->lock, flags);
+}
+
+static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	writel(control,
+	       virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING * channel);
+	return 0;
+}
+
+static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	writel(conf,
+	       virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING*channel);
+	return 0;
+}
+
+
+static int coh901318_start(struct coh901318_chan *cohc)
+{
+	u32 val;
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	disable_powersave(cohc);
+
+	val = readl(virtbase + COH901318_CX_CFG +
+		    COH901318_CX_CFG_SPACING * channel);
+
+	/* Enable channel */
+	val |= COH901318_CX_CFG_CH_ENABLE;
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+
+	return 0;
+}
+
+static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
+				      struct coh901318_lli *lli)
+{
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	BUG_ON(readl(virtbase + COH901318_CX_STAT +
+		     COH901318_CX_STAT_SPACING*channel) &
+	       COH901318_CX_STAT_ACTIVE);
+
+	writel(lli->src_addr,
+	       virtbase + COH901318_CX_SRC_ADDR +
+	       COH901318_CX_SRC_ADDR_SPACING * channel);
+
+	writel(lli->dst_addr, virtbase +
+	       COH901318_CX_DST_ADDR +
+	       COH901318_CX_DST_ADDR_SPACING * channel);
+
+	writel(lli->link_addr, virtbase + COH901318_CX_LNK_ADDR +
+	       COH901318_CX_LNK_ADDR_SPACING * channel);
+
+	writel(lli->control, virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING * channel);
+
+	return 0;
+}
+
+static struct coh901318_desc *
+coh901318_desc_get(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *desc;
+
+	if (list_empty(&cohc->free)) {
+		/* alloc new desc because we're out of used ones
+		 * TODO: alloc a pile of descs instead of just one,
+		 * avoid many small allocations.
+		 */
+		desc = kzalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
+		if (desc == NULL)
+			goto out;
+		INIT_LIST_HEAD(&desc->node);
+		dma_async_tx_descriptor_init(&desc->desc, &cohc->chan);
+	} else {
+		/* Reuse an old desc. */
+		desc = list_first_entry(&cohc->free,
+					struct coh901318_desc,
+					node);
+		list_del(&desc->node);
+		/* Initialize it a bit so it's not insane */
+		desc->sg = NULL;
+		desc->sg_len = 0;
+		desc->desc.callback = NULL;
+		desc->desc.callback_param = NULL;
+	}
+
+ out:
+	return desc;
+}
+
+static void
+coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd)
+{
+	list_add_tail(&cohd->node, &cohc->free);
+}
+
+/* call with irq lock held */
+static void
+coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+	list_add_tail(&desc->node, &cohc->active);
+}
+
+static struct coh901318_desc *
+coh901318_first_active_get(struct coh901318_chan *cohc)
+{
+	return list_first_entry_or_null(&cohc->active, struct coh901318_desc,
+					node);
+}
+
+static void
+coh901318_desc_remove(struct coh901318_desc *cohd)
+{
+	list_del(&cohd->node);
+}
+
+static void
+coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc)
+{
+	list_add_tail(&desc->node, &cohc->queue);
+}
+
+static struct coh901318_desc *
+coh901318_first_queued(struct coh901318_chan *cohc)
+{
+	return list_first_entry_or_null(&cohc->queue, struct coh901318_desc,
+					node);
+}
+
+static inline u32 coh901318_get_bytes_in_lli(struct coh901318_lli *in_lli)
+{
+	struct coh901318_lli *lli = in_lli;
+	u32 bytes = 0;
+
+	while (lli) {
+		bytes += lli->control & COH901318_CX_CTRL_TC_VALUE_MASK;
+		lli = lli->virt_link_addr;
+	}
+	return bytes;
+}
+
+/*
+ * Get the number of bytes left to transfer on this channel,
+ * it is unwise to call this before stopping the channel for
+ * absolute measures, but for a rough guess you can still call
+ * it.
+ */
+static u32 coh901318_get_bytes_left(struct dma_chan *chan)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_desc *cohd;
+	struct list_head *pos;
+	unsigned long flags;
+	u32 left = 0;
+	int i = 0;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * If there are many queued jobs, we iterate and add the
+	 * size of them all. We take a special look on the first
+	 * job though, since it is probably active.
+	 */
+	list_for_each(pos, &cohc->active) {
+		/*
+		 * The first job in the list will be working on the
+		 * hardware. The job can be stopped but still active,
+		 * so that the transfer counter is somewhere inside
+		 * the buffer.
+		 */
+		cohd = list_entry(pos, struct coh901318_desc, node);
+
+		if (i == 0) {
+			struct coh901318_lli *lli;
+			dma_addr_t ladd;
+
+			/* Read current transfer count value */
+			left = readl(cohc->base->virtbase +
+				     COH901318_CX_CTRL +
+				     COH901318_CX_CTRL_SPACING * cohc->id) &
+				COH901318_CX_CTRL_TC_VALUE_MASK;
+
+			/* See if the transfer is linked... */
+			ladd = readl(cohc->base->virtbase +
+				     COH901318_CX_LNK_ADDR +
+				     COH901318_CX_LNK_ADDR_SPACING *
+				     cohc->id) &
+				~COH901318_CX_LNK_LINK_IMMEDIATE;
+			/* Single transaction */
+			if (!ladd)
+				continue;
+
+			/*
+			 * Linked transaction, follow the lli, find the
+			 * currently processing lli, and proceed to the next
+			 */
+			lli = cohd->lli;
+			while (lli && lli->link_addr != ladd)
+				lli = lli->virt_link_addr;
+
+			if (lli)
+				lli = lli->virt_link_addr;
+
+			/*
+			 * Follow remaining lli links around to count the total
+			 * number of bytes left
+			 */
+			left += coh901318_get_bytes_in_lli(lli);
+		} else {
+			left += coh901318_get_bytes_in_lli(cohd->lli);
+		}
+		i++;
+	}
+
+	/* Also count bytes in the queued jobs */
+	list_for_each(pos, &cohc->queue) {
+		cohd = list_entry(pos, struct coh901318_desc, node);
+		left += coh901318_get_bytes_in_lli(cohd->lli);
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return left;
+}
+
+/*
+ * Pauses a transfer without losing data. Enables power save.
+ * Use this function in conjunction with coh901318_resume.
+ */
+static int coh901318_pause(struct dma_chan *chan)
+{
+	u32 val;
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Disable channel in HW */
+	val = readl(virtbase + COH901318_CX_CFG +
+		    COH901318_CX_CFG_SPACING * channel);
+
+	/* Stopping infinite transfer */
+	if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 &&
+	    (val & COH901318_CX_CFG_CH_ENABLE))
+		cohc->stopped = 1;
+
+
+	val &= ~COH901318_CX_CFG_CH_ENABLE;
+	/* Enable twice, HW bug work around */
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+	writel(val, virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING * channel);
+
+	/* Spin-wait for it to actually go inactive */
+	while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING *
+		     channel) & COH901318_CX_STAT_ACTIVE)
+		cpu_relax();
+
+	/* Check if we stopped an active job */
+	if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING *
+		   channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0)
+		cohc->stopped = 1;
+
+	enable_powersave(cohc);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+	return 0;
+}
+
+/* Resumes a transfer that has been stopped via 300_dma_stop(..).
+   Power save is handled.
+*/
+static int coh901318_resume(struct dma_chan *chan)
+{
+	u32 val;
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	disable_powersave(cohc);
+
+	if (cohc->stopped) {
+		/* Enable channel in HW */
+		val = readl(cohc->base->virtbase + COH901318_CX_CFG +
+			    COH901318_CX_CFG_SPACING * channel);
+
+		val |= COH901318_CX_CFG_CH_ENABLE;
+
+		writel(val, cohc->base->virtbase + COH901318_CX_CFG +
+		       COH901318_CX_CFG_SPACING*channel);
+
+		cohc->stopped = 0;
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+	return 0;
+}
+
+bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned long ch_nr = (unsigned long) chan_id;
+
+	if (ch_nr == to_coh901318_chan(chan)->id)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(coh901318_filter_id);
+
+struct coh901318_filter_args {
+	struct coh901318_base *base;
+	unsigned int ch_nr;
+};
+
+static bool coh901318_filter_base_and_id(struct dma_chan *chan, void *data)
+{
+	struct coh901318_filter_args *args = data;
+
+	if (&args->base->dma_slave == chan->device &&
+	    args->ch_nr == to_coh901318_chan(chan)->id)
+		return true;
+
+	return false;
+}
+
+static struct dma_chan *coh901318_xlate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
+{
+	struct coh901318_filter_args args = {
+		.base = ofdma->of_dma_data,
+		.ch_nr = dma_spec->args[0],
+	};
+	dma_cap_mask_t cap;
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	return dma_request_channel(cap, coh901318_filter_base_and_id, &args);
+}
+/*
+ * DMA channel allocation
+ */
+static int coh901318_config(struct coh901318_chan *cohc,
+			    struct coh901318_params *param)
+{
+	unsigned long flags;
+	const struct coh901318_params *p;
+	int channel = cohc->id;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	if (param)
+		p = param;
+	else
+		p = cohc_chan_param(cohc);
+
+	/* Clear any pending BE or TC interrupt */
+	if (channel < 32) {
+		writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1);
+		writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1);
+	} else {
+		writel(1 << (channel - 32), virtbase +
+		       COH901318_BE_INT_CLEAR2);
+		writel(1 << (channel - 32), virtbase +
+		       COH901318_TC_INT_CLEAR2);
+	}
+
+	coh901318_set_conf(cohc, p->config);
+	coh901318_set_ctrl(cohc, p->ctrl_lli_last);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 0;
+}
+
+/* must lock when calling this function
+ * start queued jobs, if any
+ * TODO: start all queued jobs in one go
+ *
+ * Returns descriptor if queued job is started otherwise NULL.
+ * If the queue is empty NULL is returned.
+ */
+static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
+{
+	struct coh901318_desc *cohd;
+
+	/*
+	 * start queued jobs, if any
+	 * TODO: transmit all queued jobs in one go
+	 */
+	cohd = coh901318_first_queued(cohc);
+
+	if (cohd != NULL) {
+		/* Remove from queue */
+		coh901318_desc_remove(cohd);
+		/* initiate DMA job */
+		cohc->busy = 1;
+
+		coh901318_desc_submit(cohc, cohd);
+
+		/* Program the transaction head */
+		coh901318_set_conf(cohc, cohd->head_config);
+		coh901318_set_ctrl(cohc, cohd->head_ctrl);
+		coh901318_prep_linked_list(cohc, cohd->lli);
+
+		/* start dma job on this channel */
+		coh901318_start(cohc);
+
+	}
+
+	return cohd;
+}
+
+/*
+ * This tasklet is called from the interrupt handler to
+ * handle each descriptor (DMA job) that is sent to a channel.
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct coh901318_chan *cohc = (struct coh901318_chan *) data;
+	struct coh901318_desc *cohd_fin;
+	unsigned long flags;
+	struct dmaengine_desc_callback cb;
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d"
+		 " nbr_active_done %ld\n", __func__,
+		 cohc->id, cohc->nbr_active_done);
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* get first active descriptor entry from list */
+	cohd_fin = coh901318_first_active_get(cohc);
+
+	if (cohd_fin == NULL)
+		goto err;
+
+	/* locate callback to client */
+	dmaengine_desc_get_callback(&cohd_fin->desc, &cb);
+
+	/* sign this job as completed on the channel */
+	dma_cookie_complete(&cohd_fin->desc);
+
+	/* release the lli allocation and remove the descriptor */
+	coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli);
+
+	/* return desc to free-list */
+	coh901318_desc_remove(cohd_fin);
+	coh901318_desc_free(cohc, cohd_fin);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	/* Call the callback when we're done */
+	dmaengine_desc_callback_invoke(&cb, NULL);
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * If another interrupt fired while the tasklet was scheduling,
+	 * we don't get called twice, so we have this number of active
+	 * counter that keep track of the number of IRQs expected to
+	 * be handled for this channel. If there happen to be more than
+	 * one IRQ to be ack:ed, we simply schedule this tasklet again.
+	 */
+	cohc->nbr_active_done--;
+	if (cohc->nbr_active_done) {
+		dev_dbg(COHC_2_DEV(cohc), "scheduling tasklet again, new IRQs "
+			"came in while we were scheduling this tasklet\n");
+		if (cohc_chan_conf(cohc)->priority_high)
+			tasklet_hi_schedule(&cohc->tasklet);
+		else
+			tasklet_schedule(&cohc->tasklet);
+	}
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return;
+
+ err:
+	spin_unlock_irqrestore(&cohc->lock, flags);
+	dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__);
+}
+
+
+/* called from interrupt context */
+static void dma_tc_handle(struct coh901318_chan *cohc)
+{
+	/*
+	 * If the channel is not allocated, then we shouldn't have
+	 * any TC interrupts on it.
+	 */
+	if (!cohc->allocated) {
+		dev_err(COHC_2_DEV(cohc), "spurious interrupt from "
+			"unallocated channel\n");
+		return;
+	}
+
+	spin_lock(&cohc->lock);
+
+	/*
+	 * When we reach this point, at least one queue item
+	 * should have been moved over from cohc->queue to
+	 * cohc->active and run to completion, that is why we're
+	 * getting a terminal count interrupt is it not?
+	 * If you get this BUG() the most probable cause is that
+	 * the individual nodes in the lli chain have IRQ enabled,
+	 * so check your platform config for lli chain ctrl.
+	 */
+	BUG_ON(list_empty(&cohc->active));
+
+	cohc->nbr_active_done++;
+
+	/*
+	 * This attempt to take a job from cohc->queue, put it
+	 * into cohc->active and start it.
+	 */
+	if (coh901318_queue_start(cohc) == NULL)
+		cohc->busy = 0;
+
+	spin_unlock(&cohc->lock);
+
+	/*
+	 * This tasklet will remove items from cohc->active
+	 * and thus terminates them.
+	 */
+	if (cohc_chan_conf(cohc)->priority_high)
+		tasklet_hi_schedule(&cohc->tasklet);
+	else
+		tasklet_schedule(&cohc->tasklet);
+}
+
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	u32 status1;
+	u32 status2;
+	int i;
+	int ch;
+	struct coh901318_base *base  = dev_id;
+	struct coh901318_chan *cohc;
+	void __iomem *virtbase = base->virtbase;
+
+	status1 = readl(virtbase + COH901318_INT_STATUS1);
+	status2 = readl(virtbase + COH901318_INT_STATUS2);
+
+	if (unlikely(status1 == 0 && status2 == 0)) {
+		dev_warn(base->dev, "spurious DMA IRQ from no channel!\n");
+		return IRQ_HANDLED;
+	}
+
+	/* TODO: consider handle IRQ in tasklet here to
+	 *       minimize interrupt latency */
+
+	/* Check the first 32 DMA channels for IRQ */
+	while (status1) {
+		/* Find first bit set, return as a number. */
+		i = ffs(status1) - 1;
+		ch = i;
+
+		cohc = &base->chans[ch];
+		spin_lock(&cohc->lock);
+
+		/* Mask off this bit */
+		status1 &= ~(1 << i);
+		/* Check the individual channel bits */
+		if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) {
+			dev_crit(COHC_2_DEV(cohc),
+				 "DMA bus error on channel %d!\n", ch);
+			BUG_ON(1);
+			/* Clear BE interrupt */
+			__set_bit(i, virtbase + COH901318_BE_INT_CLEAR1);
+		} else {
+			/* Caused by TC, really? */
+			if (unlikely(!test_bit(i, virtbase +
+					       COH901318_TC_INT_STATUS1))) {
+				dev_warn(COHC_2_DEV(cohc),
+					 "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+				/* Clear TC interrupt */
+				BUG_ON(1);
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+			} else {
+				/* Enable powersave if transfer has finished */
+				if (!(readl(virtbase + COH901318_CX_STAT +
+					    COH901318_CX_STAT_SPACING*ch) &
+				      COH901318_CX_STAT_ENABLED)) {
+					enable_powersave(cohc);
+				}
+
+				/* Must clear TC interrupt before calling
+				 * dma_tc_handle
+				 * in case tc_handle initiate a new dma job
+				 */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
+
+				dma_tc_handle(cohc);
+			}
+		}
+		spin_unlock(&cohc->lock);
+	}
+
+	/* Check the remaining 32 DMA channels for IRQ */
+	while (status2) {
+		/* Find first bit set, return as a number. */
+		i = ffs(status2) - 1;
+		ch = i + 32;
+		cohc = &base->chans[ch];
+		spin_lock(&cohc->lock);
+
+		/* Mask off this bit */
+		status2 &= ~(1 << i);
+		/* Check the individual channel bits */
+		if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) {
+			dev_crit(COHC_2_DEV(cohc),
+				 "DMA bus error on channel %d!\n", ch);
+			/* Clear BE interrupt */
+			BUG_ON(1);
+			__set_bit(i, virtbase + COH901318_BE_INT_CLEAR2);
+		} else {
+			/* Caused by TC, really? */
+			if (unlikely(!test_bit(i, virtbase +
+					       COH901318_TC_INT_STATUS2))) {
+				dev_warn(COHC_2_DEV(cohc),
+					 "ignoring interrupt not caused by terminal count on channel %d\n", ch);
+				/* Clear TC interrupt */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+				BUG_ON(1);
+			} else {
+				/* Enable powersave if transfer has finished */
+				if (!(readl(virtbase + COH901318_CX_STAT +
+					    COH901318_CX_STAT_SPACING*ch) &
+				      COH901318_CX_STAT_ENABLED)) {
+					enable_powersave(cohc);
+				}
+				/* Must clear TC interrupt before calling
+				 * dma_tc_handle
+				 * in case tc_handle initiate a new dma job
+				 */
+				__set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
+
+				dma_tc_handle(cohc);
+			}
+		}
+		spin_unlock(&cohc->lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int coh901318_terminate_all(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_desc *cohd;
+	void __iomem *virtbase = cohc->base->virtbase;
+
+	/* The remainder of this function terminates the transfer */
+	coh901318_pause(chan);
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Clear any pending BE or TC interrupt */
+	if (cohc->id < 32) {
+		writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1);
+		writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1);
+	} else {
+		writel(1 << (cohc->id - 32), virtbase +
+		       COH901318_BE_INT_CLEAR2);
+		writel(1 << (cohc->id - 32), virtbase +
+		       COH901318_TC_INT_CLEAR2);
+	}
+
+	enable_powersave(cohc);
+
+	while ((cohd = coh901318_first_active_get(cohc))) {
+		/* release the lli allocation*/
+		coh901318_lli_free(&cohc->base->pool, &cohd->lli);
+
+		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
+		coh901318_desc_free(cohc, cohd);
+	}
+
+	while ((cohd = coh901318_first_queued(cohc))) {
+		/* release the lli allocation*/
+		coh901318_lli_free(&cohc->base->pool, &cohd->lli);
+
+		/* return desc to free-list */
+		coh901318_desc_remove(cohd);
+		coh901318_desc_free(cohc, cohd);
+	}
+
+
+	cohc->nbr_active_done = 0;
+	cohc->busy = 0;
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 0;
+}
+
+static int coh901318_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct coh901318_chan	*cohc = to_coh901318_chan(chan);
+	unsigned long flags;
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n",
+		 __func__, cohc->id);
+
+	if (chan->client_count > 1)
+		return -EBUSY;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	coh901318_config(cohc, NULL);
+
+	cohc->allocated = 1;
+	dma_cookie_init(chan);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return 1;
+}
+
+static void
+coh901318_free_chan_resources(struct dma_chan *chan)
+{
+	struct coh901318_chan	*cohc = to_coh901318_chan(chan);
+	int channel = cohc->id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/* Disable HW */
+	writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG +
+	       COH901318_CX_CFG_SPACING*channel);
+	writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL +
+	       COH901318_CX_CTRL_SPACING*channel);
+
+	cohc->allocated = 0;
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	coh901318_terminate_all(chan);
+}
+
+
+static dma_cookie_t
+coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc,
+						   desc);
+	struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	coh901318_desc_queue(cohc, cohd);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		      size_t size, unsigned long flags)
+{
+	struct coh901318_lli *lli;
+	struct coh901318_desc *cohd;
+	unsigned long flg;
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	int lli_len;
+	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	int ret;
+
+	spin_lock_irqsave(&cohc->lock, flg);
+
+	dev_vdbg(COHC_2_DEV(cohc),
+		 "[%s] channel %d src %pad dest %pad size %zu\n",
+		 __func__, cohc->id, &src, &dest, size);
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last lli */
+		ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+	lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+	if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+		lli_len++;
+
+	lli = coh901318_lli_alloc(&cohc->base->pool, lli_len);
+
+	if (lli == NULL)
+		goto err;
+
+	ret = coh901318_lli_fill_memcpy(
+		&cohc->base->pool, lli, src, size, dest,
+		cohc_chan_param(cohc)->ctrl_lli_chained,
+		ctrl_last);
+	if (ret)
+		goto err;
+
+	COH_DBG(coh901318_list_print(cohc, lli));
+
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->lli = lli;
+	cohd->flags = flags;
+	cohd->desc.tx_submit = coh901318_tx_submit;
+
+	spin_unlock_irqrestore(&cohc->lock, flg);
+
+	return &cohd->desc;
+ err:
+	spin_unlock_irqrestore(&cohc->lock, flg);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction direction,
+			unsigned long flags, void *context)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	struct coh901318_lli *lli;
+	struct coh901318_desc *cohd;
+	const struct coh901318_params *params;
+	struct scatterlist *sg;
+	int len = 0;
+	int size;
+	int i;
+	u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained;
+	u32 ctrl = cohc_chan_param(cohc)->ctrl_lli;
+	u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
+	u32 config;
+	unsigned long flg;
+	int ret;
+
+	if (!sgl)
+		goto out;
+	if (sg_dma_len(sgl) == 0)
+		goto out;
+
+	spin_lock_irqsave(&cohc->lock, flg);
+
+	dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n",
+		 __func__, sg_len, direction);
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last lli */
+		ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
+
+	params = cohc_chan_param(cohc);
+	config = params->config;
+	/*
+	 * Add runtime-specific control on top, make
+	 * sure the bits you set per peripheral channel are
+	 * cleared in the default config from the platform.
+	 */
+	ctrl_chained |= cohc->ctrl;
+	ctrl_last |= cohc->ctrl;
+	ctrl |= cohc->ctrl;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
+			COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
+
+		config |= COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY;
+		ctrl_chained |= tx_flags;
+		ctrl_last |= tx_flags;
+		ctrl |= tx_flags;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
+			COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
+
+		config |= COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY;
+		ctrl_chained |= rx_flags;
+		ctrl_last |= rx_flags;
+		ctrl |= rx_flags;
+	} else
+		goto err_direction;
+
+	/* The dma only supports transmitting packages up to
+	 * MAX_DMA_PACKET_SIZE. Calculate to total number of
+	 * dma elemts required to send the entire sg list
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		unsigned int factor;
+		size = sg_dma_len(sg);
+
+		if (size <= MAX_DMA_PACKET_SIZE) {
+			len++;
+			continue;
+		}
+
+		factor = size >> MAX_DMA_PACKET_SIZE_SHIFT;
+		if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size)
+			factor++;
+
+		len += factor;
+	}
+
+	pr_debug("Allocate %d lli:s for this transfer\n", len);
+	lli = coh901318_lli_alloc(&cohc->base->pool, len);
+
+	if (lli == NULL)
+		goto err_dma_alloc;
+
+	/* initiate allocated lli list */
+	ret = coh901318_lli_fill_sg(&cohc->base->pool, lli, sgl, sg_len,
+				    cohc->addr,
+				    ctrl_chained,
+				    ctrl,
+				    ctrl_last,
+				    direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
+	if (ret)
+		goto err_lli_fill;
+
+
+	COH_DBG(coh901318_list_print(cohc, lli));
+
+	/* Pick a descriptor to handle this transfer */
+	cohd = coh901318_desc_get(cohc);
+	cohd->head_config = config;
+	/*
+	 * Set the default head ctrl for the channel to the one from the
+	 * lli, things may have changed due to odd buffer alignment
+	 * etc.
+	 */
+	cohd->head_ctrl = lli->control;
+	cohd->dir = direction;
+	cohd->flags = flags;
+	cohd->desc.tx_submit = coh901318_tx_submit;
+	cohd->lli = lli;
+
+	spin_unlock_irqrestore(&cohc->lock, flg);
+
+	return &cohd->desc;
+ err_lli_fill:
+ err_dma_alloc:
+ err_direction:
+	spin_unlock_irqrestore(&cohc->lock, flg);
+ out:
+	return NULL;
+}
+
+static enum dma_status
+coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		 struct dma_tx_state *txstate)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	dma_set_residue(txstate, coh901318_get_bytes_left(chan));
+
+	if (ret == DMA_IN_PROGRESS && cohc->stopped)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void
+coh901318_issue_pending(struct dma_chan *chan)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cohc->lock, flags);
+
+	/*
+	 * Busy means that pending jobs are already being processed,
+	 * and then there is no point in starting the queue: the
+	 * terminal count interrupt on the channel will take the next
+	 * job on the queue and execute it anyway.
+	 */
+	if (!cohc->busy)
+		coh901318_queue_start(cohc);
+
+	spin_unlock_irqrestore(&cohc->lock, flags);
+}
+
+/*
+ * Here we wrap in the runtime dma control interface
+ */
+struct burst_table {
+	int burst_8bit;
+	int burst_16bit;
+	int burst_32bit;
+	u32 reg;
+};
+
+static const struct burst_table burst_sizes[] = {
+	{
+		.burst_8bit = 64,
+		.burst_16bit = 32,
+		.burst_32bit = 16,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_64_BYTES,
+	},
+	{
+		.burst_8bit = 48,
+		.burst_16bit = 24,
+		.burst_32bit = 12,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_48_BYTES,
+	},
+	{
+		.burst_8bit = 32,
+		.burst_16bit = 16,
+		.burst_32bit = 8,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_32_BYTES,
+	},
+	{
+		.burst_8bit = 16,
+		.burst_16bit = 8,
+		.burst_32bit = 4,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_16_BYTES,
+	},
+	{
+		.burst_8bit = 8,
+		.burst_16bit = 4,
+		.burst_32bit = 2,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_8_BYTES,
+	},
+	{
+		.burst_8bit = 4,
+		.burst_16bit = 2,
+		.burst_32bit = 1,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_4_BYTES,
+	},
+	{
+		.burst_8bit = 2,
+		.burst_16bit = 1,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_2_BYTES,
+	},
+	{
+		.burst_8bit = 1,
+		.burst_16bit = 0,
+		.burst_32bit = 0,
+		.reg = COH901318_CX_CTRL_BURST_COUNT_1_BYTE,
+	},
+};
+
+static int coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
+					   struct dma_slave_config *config)
+{
+	struct coh901318_chan *cohc = to_coh901318_chan(chan);
+	dma_addr_t addr;
+	enum dma_slave_buswidth addr_width;
+	u32 maxburst;
+	u32 ctrl = 0;
+	int i = 0;
+
+	/* We only support mem to per or per to mem transfers */
+	if (config->direction == DMA_DEV_TO_MEM) {
+		addr = config->src_addr;
+		addr_width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	} else if (config->direction == DMA_MEM_TO_DEV) {
+		addr = config->dst_addr;
+		addr_width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else {
+		dev_err(COHC_2_DEV(cohc), "illegal channel mode\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(COHC_2_DEV(cohc), "configure channel for %d byte transfers\n",
+		addr_width);
+	switch (addr_width)  {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_8bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_16bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		/* Direction doesn't matter here, it's 32/32 bits */
+		ctrl |=
+			COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
+			COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS;
+
+		while (i < ARRAY_SIZE(burst_sizes)) {
+			if (burst_sizes[i].burst_32bit <= maxburst)
+				break;
+			i++;
+		}
+
+		break;
+	default:
+		dev_err(COHC_2_DEV(cohc),
+			"bad runtimeconfig: alien address width\n");
+		return -EINVAL;
+	}
+
+	ctrl |= burst_sizes[i].reg;
+	dev_dbg(COHC_2_DEV(cohc),
+		"selected burst size %d bytes for address width %d bytes, maxburst %d\n",
+		burst_sizes[i].burst_8bit, addr_width, maxburst);
+
+	cohc->addr = addr;
+	cohc->ctrl = ctrl;
+
+	return 0;
+}
+
+static void coh901318_base_init(struct dma_device *dma, const int *pick_chans,
+				struct coh901318_base *base)
+{
+	int chans_i;
+	int i = 0;
+	struct coh901318_chan *cohc;
+
+	INIT_LIST_HEAD(&dma->channels);
+
+	for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
+		for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
+			cohc = &base->chans[i];
+
+			cohc->base = base;
+			cohc->chan.device = dma;
+			cohc->id = i;
+
+			/* TODO: do we really need this lock if only one
+			 * client is connected to each channel?
+			 */
+
+			spin_lock_init(&cohc->lock);
+
+			cohc->nbr_active_done = 0;
+			cohc->busy = 0;
+			INIT_LIST_HEAD(&cohc->free);
+			INIT_LIST_HEAD(&cohc->active);
+			INIT_LIST_HEAD(&cohc->queue);
+
+			tasklet_init(&cohc->tasklet, dma_tasklet,
+				     (unsigned long) cohc);
+
+			list_add_tail(&cohc->chan.device_node,
+				      &dma->channels);
+		}
+	}
+}
+
+static int __init coh901318_probe(struct platform_device *pdev)
+{
+	int err = 0;
+	struct coh901318_base *base;
+	int irq;
+	struct resource *io;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -ENODEV;
+
+	/* Map DMA controller registers to virtual memory */
+	if (devm_request_mem_region(&pdev->dev,
+				    io->start,
+				    resource_size(io),
+				    pdev->dev.driver->name) == NULL)
+		return -ENOMEM;
+
+	base = devm_kzalloc(&pdev->dev,
+			    ALIGN(sizeof(struct coh901318_base), 4) +
+			    U300_DMA_CHANNELS *
+			    sizeof(struct coh901318_chan),
+			    GFP_KERNEL);
+	if (!base)
+		return -ENOMEM;
+
+	base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4);
+
+	base->virtbase = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+	if (!base->virtbase)
+		return -ENOMEM;
+
+	base->dev = &pdev->dev;
+	spin_lock_init(&base->pm.lock);
+	base->pm.started_channels = 0;
+
+	COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
+			       "coh901318", base);
+	if (err)
+		return err;
+
+	base->irq = irq;
+
+	err = coh901318_pool_create(&base->pool, &pdev->dev,
+				    sizeof(struct coh901318_lli),
+				    32);
+	if (err)
+		return err;
+
+	/* init channels for device transfers */
+	coh901318_base_init(&base->dma_slave, dma_slave_channels,
+			    base);
+
+	dma_cap_zero(base->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+
+	base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+	base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources;
+	base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg;
+	base->dma_slave.device_tx_status = coh901318_tx_status;
+	base->dma_slave.device_issue_pending = coh901318_issue_pending;
+	base->dma_slave.device_config = coh901318_dma_set_runtimeconfig;
+	base->dma_slave.device_pause = coh901318_pause;
+	base->dma_slave.device_resume = coh901318_resume;
+	base->dma_slave.device_terminate_all = coh901318_terminate_all;
+	base->dma_slave.dev = &pdev->dev;
+
+	err = dma_async_device_register(&base->dma_slave);
+
+	if (err)
+		goto err_register_slave;
+
+	/* init channels for memcpy */
+	coh901318_base_init(&base->dma_memcpy, dma_memcpy_channels,
+			    base);
+
+	dma_cap_zero(base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+
+	base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources;
+	base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources;
+	base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy;
+	base->dma_memcpy.device_tx_status = coh901318_tx_status;
+	base->dma_memcpy.device_issue_pending = coh901318_issue_pending;
+	base->dma_memcpy.device_config = coh901318_dma_set_runtimeconfig;
+	base->dma_memcpy.device_pause = coh901318_pause;
+	base->dma_memcpy.device_resume = coh901318_resume;
+	base->dma_memcpy.device_terminate_all = coh901318_terminate_all;
+	base->dma_memcpy.dev = &pdev->dev;
+	/*
+	 * This controller can only access address at even 32bit boundaries,
+	 * i.e. 2^2
+	 */
+	base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
+	err = dma_async_device_register(&base->dma_memcpy);
+
+	if (err)
+		goto err_register_memcpy;
+
+	err = of_dma_controller_register(pdev->dev.of_node, coh901318_xlate,
+					 base);
+	if (err)
+		goto err_register_of_dma;
+
+	platform_set_drvdata(pdev, base);
+	dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%p\n",
+		base->virtbase);
+
+	return err;
+
+ err_register_of_dma:
+	dma_async_device_unregister(&base->dma_memcpy);
+ err_register_memcpy:
+	dma_async_device_unregister(&base->dma_slave);
+ err_register_slave:
+	coh901318_pool_destroy(&base->pool);
+	return err;
+}
+static void coh901318_base_remove(struct coh901318_base *base, const int *pick_chans)
+{
+	int chans_i;
+	int i = 0;
+	struct coh901318_chan *cohc;
+
+	for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
+		for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
+			cohc = &base->chans[i];
+
+			tasklet_kill(&cohc->tasklet);
+		}
+	}
+
+}
+
+static int coh901318_remove(struct platform_device *pdev)
+{
+	struct coh901318_base *base = platform_get_drvdata(pdev);
+
+	devm_free_irq(&pdev->dev, base->irq, base);
+
+	coh901318_base_remove(base, dma_slave_channels);
+	coh901318_base_remove(base, dma_memcpy_channels);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&base->dma_memcpy);
+	dma_async_device_unregister(&base->dma_slave);
+	coh901318_pool_destroy(&base->pool);
+	return 0;
+}
+
+static const struct of_device_id coh901318_dt_match[] = {
+	{ .compatible = "stericsson,coh901318" },
+	{},
+};
+
+static struct platform_driver coh901318_driver = {
+	.remove = coh901318_remove,
+	.driver = {
+		.name	= "coh901318",
+		.of_match_table = coh901318_dt_match,
+	},
+};
+
+static int __init coh901318_init(void)
+{
+	return platform_driver_probe(&coh901318_driver, coh901318_probe);
+}
+subsys_initcall(coh901318_init);
+
+static void __exit coh901318_exit(void)
+{
+	platform_driver_unregister(&coh901318_driver);
+}
+module_exit(coh901318_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Per Friden");
diff --git a/drivers/dma/coh901318.h b/drivers/dma/coh901318.h
new file mode 100644
index 0000000..95ce1e2
--- /dev/null
+++ b/drivers/dma/coh901318.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2007-2013 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * DMA driver for COH 901 318
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#ifndef COH901318_H
+#define COH901318_H
+
+#define MAX_DMA_PACKET_SIZE_SHIFT 11
+#define MAX_DMA_PACKET_SIZE (1 << MAX_DMA_PACKET_SIZE_SHIFT)
+
+struct device;
+
+struct coh901318_pool {
+	spinlock_t lock;
+	struct dma_pool *dmapool;
+	struct device *dev;
+
+#ifdef CONFIG_DEBUG_FS
+	int debugfs_pool_counter;
+#endif
+};
+
+/**
+ * struct coh901318_lli - linked list item for DMAC
+ * @control: control settings for DMAC
+ * @src_addr: transfer source address
+ * @dst_addr: transfer destination address
+ * @link_addr:  physical address to next lli
+ * @virt_link_addr: virtual address of next lli (only used by pool_free)
+ * @phy_this: physical address of current lli (only used by pool_free)
+ */
+struct coh901318_lli {
+	u32 control;
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	dma_addr_t link_addr;
+
+	void *virt_link_addr;
+	dma_addr_t phy_this;
+};
+
+/**
+ * coh901318_pool_create() - Creates an dma pool for lli:s
+ * @pool: pool handle
+ * @dev: dma device
+ * @lli_nbr: number of lli:s in the pool
+ * @algin: address alignemtn of lli:s
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_create(struct coh901318_pool *pool,
+			  struct device *dev,
+			  size_t lli_nbr, size_t align);
+
+/**
+ * coh901318_pool_destroy() - Destroys the dma pool
+ * @pool: pool handle
+ * returns 0 on success otherwise none zero
+ */
+int coh901318_pool_destroy(struct coh901318_pool *pool);
+
+/**
+ * coh901318_lli_alloc() - Allocates a linked list
+ *
+ * @pool: pool handle
+ * @len: length to list
+ * return: none NULL if success otherwise NULL
+ */
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool,
+		    unsigned int len);
+
+/**
+ * coh901318_lli_free() - Returns the linked list items to the pool
+ * @pool: pool handle
+ * @lli: reference to lli pointer to be freed
+ */
+void coh901318_lli_free(struct coh901318_pool *pool,
+			struct coh901318_lli **lli);
+
+/**
+ * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @src: src address
+ * @size: transfer size
+ * @dst: destination address
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t src, unsigned int size,
+			  dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @buf: transfer buffer
+ * @size: transfer size
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t buf, unsigned int size,
+			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
+			  enum dma_transfer_direction dir);
+
+/**
+ * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
+ * @pool: pool handle
+ * @lli: allocated lli
+ * @sg: scatter gather list
+ * @nents: number of entries in sg
+ * @dev_addr: address of periphal
+ * @ctrl_chained: ctrl for chained lli
+ * @ctrl: ctrl of middle lli
+ * @ctrl_last: ctrl for the last lli
+ * @dir: direction of transfer (to or from device)
+ * @ctrl_irq_mask: ctrl mask for CPU interrupt
+ * returns number of CPU interrupts for the lli, negative on error.
+ */
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+		      struct coh901318_lli *lli,
+		      struct scatterlist *sg, unsigned int nents,
+		      dma_addr_t dev_addr, u32 ctrl_chained,
+		      u32 ctrl, u32 ctrl_last,
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask);
+
+#endif /* COH901318_H */
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
new file mode 100644
index 0000000..d612b2e
--- /dev/null
+++ b/drivers/dma/coh901318_lli.c
@@ -0,0 +1,313 @@
+/*
+ * driver/dma/coh901318_lli.c
+ *
+ * Copyright (C) 2007-2009 ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ * Support functions for handling lli for dma
+ * Author: Per Friden <per.friden@stericsson.com>
+ */
+
+#include <linux/spinlock.h>
+#include <linux/memory.h>
+#include <linux/gfp.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+#include "coh901318.h"
+
+#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
+#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add)
+#else
+#define DEBUGFS_POOL_COUNTER_RESET(pool)
+#define DEBUGFS_POOL_COUNTER_ADD(pool, add)
+#endif
+
+static struct coh901318_lli *
+coh901318_lli_next(struct coh901318_lli *data)
+{
+	if (data == NULL || data->link_addr == 0)
+		return NULL;
+
+	return (struct coh901318_lli *) data->virt_link_addr;
+}
+
+int coh901318_pool_create(struct coh901318_pool *pool,
+			  struct device *dev,
+			  size_t size, size_t align)
+{
+	spin_lock_init(&pool->lock);
+	pool->dev = dev;
+	pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0);
+
+	DEBUGFS_POOL_COUNTER_RESET(pool);
+	return 0;
+}
+
+int coh901318_pool_destroy(struct coh901318_pool *pool)
+{
+
+	dma_pool_destroy(pool->dmapool);
+	return 0;
+}
+
+struct coh901318_lli *
+coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len)
+{
+	int i;
+	struct coh901318_lli *head;
+	struct coh901318_lli *lli;
+	struct coh901318_lli *lli_prev;
+	dma_addr_t phy;
+
+	if (len == 0)
+		return NULL;
+
+	spin_lock(&pool->lock);
+
+	head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+	if (head == NULL)
+		goto err;
+
+	DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+
+	lli = head;
+	lli->phy_this = phy;
+	lli->link_addr = 0x00000000;
+	lli->virt_link_addr = NULL;
+
+	for (i = 1; i < len; i++) {
+		lli_prev = lli;
+
+		lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
+
+		if (lli == NULL)
+			goto err_clean_up;
+
+		DEBUGFS_POOL_COUNTER_ADD(pool, 1);
+		lli->phy_this = phy;
+		lli->link_addr = 0x00000000;
+		lli->virt_link_addr = NULL;
+
+		lli_prev->link_addr = phy;
+		lli_prev->virt_link_addr = lli;
+	}
+
+	spin_unlock(&pool->lock);
+
+	return head;
+
+ err:
+	spin_unlock(&pool->lock);
+	return NULL;
+
+ err_clean_up:
+	lli_prev->link_addr = 0x00000000U;
+	spin_unlock(&pool->lock);
+	coh901318_lli_free(pool, &head);
+	return NULL;
+}
+
+void coh901318_lli_free(struct coh901318_pool *pool,
+			struct coh901318_lli **lli)
+{
+	struct coh901318_lli *l;
+	struct coh901318_lli *next;
+
+	if (lli == NULL)
+		return;
+
+	l = *lli;
+
+	if (l == NULL)
+		return;
+
+	spin_lock(&pool->lock);
+
+	while (l->link_addr) {
+		next = l->virt_link_addr;
+		dma_pool_free(pool->dmapool, l, l->phy_this);
+		DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+		l = next;
+	}
+	dma_pool_free(pool->dmapool, l, l->phy_this);
+	DEBUGFS_POOL_COUNTER_ADD(pool, -1);
+
+	spin_unlock(&pool->lock);
+	*lli = NULL;
+}
+
+int
+coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t source, unsigned int size,
+			  dma_addr_t destination, u32 ctrl_chained,
+			  u32 ctrl_eom)
+{
+	int s = size;
+	dma_addr_t src = source;
+	dma_addr_t dst = destination;
+
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	while (lli->link_addr) {
+		lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+		lli->src_addr = src;
+		lli->dst_addr = dst;
+
+		s -= MAX_DMA_PACKET_SIZE;
+		lli = coh901318_lli_next(lli);
+
+		src += MAX_DMA_PACKET_SIZE;
+		dst += MAX_DMA_PACKET_SIZE;
+	}
+
+	lli->control = ctrl_eom | s;
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	return 0;
+}
+
+int
+coh901318_lli_fill_single(struct coh901318_pool *pool,
+			  struct coh901318_lli *lli,
+			  dma_addr_t buf, unsigned int size,
+			  dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
+			  enum dma_transfer_direction dir)
+{
+	int s = size;
+	dma_addr_t src;
+	dma_addr_t dst;
+
+
+	if (dir == DMA_MEM_TO_DEV) {
+		src = buf;
+		dst = dev_addr;
+
+	} else if (dir == DMA_DEV_TO_MEM) {
+
+		src = dev_addr;
+		dst = buf;
+	} else {
+		return -EINVAL;
+	}
+
+	while (lli->link_addr) {
+		size_t block_size = MAX_DMA_PACKET_SIZE;
+		lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
+
+		/* If we are on the next-to-final block and there will
+		 * be less than half a DMA packet left for the last
+		 * block, then we want to make this block a little
+		 * smaller to balance the sizes. This is meant to
+		 * avoid too small transfers if the buffer size is
+		 * (MAX_DMA_PACKET_SIZE*N + 1) */
+		if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2))
+			block_size = MAX_DMA_PACKET_SIZE/2;
+
+		s -= block_size;
+		lli->src_addr = src;
+		lli->dst_addr = dst;
+
+		lli = coh901318_lli_next(lli);
+
+		if (dir == DMA_MEM_TO_DEV)
+			src += block_size;
+		else if (dir == DMA_DEV_TO_MEM)
+			dst += block_size;
+	}
+
+	lli->control = ctrl_eom | s;
+	lli->src_addr = src;
+	lli->dst_addr = dst;
+
+	return 0;
+}
+
+int
+coh901318_lli_fill_sg(struct coh901318_pool *pool,
+		      struct coh901318_lli *lli,
+		      struct scatterlist *sgl, unsigned int nents,
+		      dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
+		      u32 ctrl_last,
+		      enum dma_transfer_direction dir, u32 ctrl_irq_mask)
+{
+	int i;
+	struct scatterlist *sg;
+	u32 ctrl_sg;
+	dma_addr_t src = 0;
+	dma_addr_t dst = 0;
+	u32 bytes_to_transfer;
+	u32 elem_size;
+
+	if (lli == NULL)
+		goto err;
+
+	spin_lock(&pool->lock);
+
+	if (dir == DMA_MEM_TO_DEV)
+		dst = dev_addr;
+	else if (dir == DMA_DEV_TO_MEM)
+		src = dev_addr;
+	else
+		goto err;
+
+	for_each_sg(sgl, sg, nents, i) {
+		if (sg_is_chain(sg)) {
+			/* sg continues to the next sg-element don't
+			 * send ctrl_finish until the last
+			 * sg-element in the chain
+			 */
+			ctrl_sg = ctrl_chained;
+		} else if (i == nents - 1)
+			ctrl_sg = ctrl_last;
+		else
+			ctrl_sg = ctrl ? ctrl : ctrl_last;
+
+
+		if (dir == DMA_MEM_TO_DEV)
+			/* increment source address */
+			src = sg_dma_address(sg);
+		else
+			/* increment destination address */
+			dst = sg_dma_address(sg);
+
+		bytes_to_transfer = sg_dma_len(sg);
+
+		while (bytes_to_transfer) {
+			u32 val;
+
+			if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) {
+				elem_size = MAX_DMA_PACKET_SIZE;
+				val = ctrl_chained;
+			} else {
+				elem_size = bytes_to_transfer;
+				val = ctrl_sg;
+			}
+
+			lli->control = val | elem_size;
+			lli->src_addr = src;
+			lli->dst_addr = dst;
+
+			if (dir == DMA_DEV_TO_MEM)
+				dst += elem_size;
+			else
+				src += elem_size;
+
+			BUG_ON(lli->link_addr & 3);
+
+			bytes_to_transfer -= elem_size;
+			lli = coh901318_lli_next(lli);
+		}
+
+	}
+	spin_unlock(&pool->lock);
+
+	return 0;
+ err:
+	spin_unlock(&pool->lock);
+	return -EINVAL;
+}
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
new file mode 100644
index 0000000..15b2453
--- /dev/null
+++ b/drivers/dma/dma-axi-dmac.c
@@ -0,0 +1,740 @@
+/*
+ * Driver for the Analog Devices AXI-DMAC core
+ *
+ * Copyright 2013-2015 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/dma/axi-dmac.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/*
+ * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has
+ * various instantiation parameters which decided the exact feature set support
+ * by the core.
+ *
+ * Each channel of the core has a source interface and a destination interface.
+ * The number of channels and the type of the channel interfaces is selected at
+ * configuration time. A interface can either be a connected to a central memory
+ * interconnect, which allows access to system memory, or it can be connected to
+ * a dedicated bus which is directly connected to a data port on a peripheral.
+ * Given that those are configuration options of the core that are selected when
+ * it is instantiated this means that they can not be changed by software at
+ * runtime. By extension this means that each channel is uni-directional. It can
+ * either be device to memory or memory to device, but not both. Also since the
+ * device side is a dedicated data bus only connected to a single peripheral
+ * there is no address than can or needs to be configured for the device side.
+ */
+
+#define AXI_DMAC_REG_IRQ_MASK		0x80
+#define AXI_DMAC_REG_IRQ_PENDING	0x84
+#define AXI_DMAC_REG_IRQ_SOURCE		0x88
+
+#define AXI_DMAC_REG_CTRL		0x400
+#define AXI_DMAC_REG_TRANSFER_ID	0x404
+#define AXI_DMAC_REG_START_TRANSFER	0x408
+#define AXI_DMAC_REG_FLAGS		0x40c
+#define AXI_DMAC_REG_DEST_ADDRESS	0x410
+#define AXI_DMAC_REG_SRC_ADDRESS	0x414
+#define AXI_DMAC_REG_X_LENGTH		0x418
+#define AXI_DMAC_REG_Y_LENGTH		0x41c
+#define AXI_DMAC_REG_DEST_STRIDE	0x420
+#define AXI_DMAC_REG_SRC_STRIDE		0x424
+#define AXI_DMAC_REG_TRANSFER_DONE	0x428
+#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c
+#define AXI_DMAC_REG_STATUS		0x430
+#define AXI_DMAC_REG_CURRENT_SRC_ADDR	0x434
+#define AXI_DMAC_REG_CURRENT_DEST_ADDR	0x438
+
+#define AXI_DMAC_CTRL_ENABLE		BIT(0)
+#define AXI_DMAC_CTRL_PAUSE		BIT(1)
+
+#define AXI_DMAC_IRQ_SOT		BIT(0)
+#define AXI_DMAC_IRQ_EOT		BIT(1)
+
+#define AXI_DMAC_FLAG_CYCLIC		BIT(0)
+
+/* The maximum ID allocated by the hardware is 31 */
+#define AXI_DMAC_SG_UNUSED 32U
+
+struct axi_dmac_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dest_addr;
+	unsigned int x_len;
+	unsigned int y_len;
+	unsigned int dest_stride;
+	unsigned int src_stride;
+	unsigned int id;
+	bool schedule_when_free;
+};
+
+struct axi_dmac_desc {
+	struct virt_dma_desc vdesc;
+	bool cyclic;
+
+	unsigned int num_submitted;
+	unsigned int num_completed;
+	unsigned int num_sgs;
+	struct axi_dmac_sg sg[];
+};
+
+struct axi_dmac_chan {
+	struct virt_dma_chan vchan;
+
+	struct axi_dmac_desc *next_desc;
+	struct list_head active_descs;
+	enum dma_transfer_direction direction;
+
+	unsigned int src_width;
+	unsigned int dest_width;
+	unsigned int src_type;
+	unsigned int dest_type;
+
+	unsigned int max_length;
+	unsigned int align_mask;
+
+	bool hw_cyclic;
+	bool hw_2d;
+};
+
+struct axi_dmac {
+	void __iomem *base;
+	int irq;
+
+	struct clk *clk;
+
+	struct dma_device dma_dev;
+	struct axi_dmac_chan chan;
+
+	struct device_dma_parameters dma_parms;
+};
+
+static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct axi_dmac,
+		dma_dev);
+}
+
+static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c)
+{
+	return container_of(c, struct axi_dmac_chan, vchan.chan);
+}
+
+static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct axi_dmac_desc, vdesc);
+}
+
+static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg,
+	unsigned int val)
+{
+	writel(val, axi_dmac->base + reg);
+}
+
+static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg)
+{
+	return readl(axi_dmac->base + reg);
+}
+
+static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan)
+{
+	return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan)
+{
+	return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len)
+{
+	if (len == 0 || len > chan->max_length)
+		return false;
+	if ((len & chan->align_mask) != 0) /* Not aligned */
+		return false;
+	return true;
+}
+
+static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr)
+{
+	if ((addr & chan->align_mask) != 0) /* Not aligned */
+		return false;
+	return true;
+}
+
+static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
+{
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	struct virt_dma_desc *vdesc;
+	struct axi_dmac_desc *desc;
+	struct axi_dmac_sg *sg;
+	unsigned int flags = 0;
+	unsigned int val;
+
+	val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+	if (val) /* Queue is full, wait for the next SOT IRQ */
+		return;
+
+	desc = chan->next_desc;
+
+	if (!desc) {
+		vdesc = vchan_next_desc(&chan->vchan);
+		if (!vdesc)
+			return;
+		list_move_tail(&vdesc->node, &chan->active_descs);
+		desc = to_axi_dmac_desc(vdesc);
+	}
+	sg = &desc->sg[desc->num_submitted];
+
+	/* Already queued in cyclic mode. Wait for it to finish */
+	if (sg->id != AXI_DMAC_SG_UNUSED) {
+		sg->schedule_when_free = true;
+		return;
+	}
+
+	desc->num_submitted++;
+	if (desc->num_submitted == desc->num_sgs) {
+		if (desc->cyclic)
+			desc->num_submitted = 0; /* Start again */
+		else
+			chan->next_desc = NULL;
+	} else {
+		chan->next_desc = desc;
+	}
+
+	sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr);
+		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride);
+	}
+
+	if (axi_dmac_src_is_mem(chan)) {
+		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr);
+		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride);
+	}
+
+	/*
+	 * If the hardware supports cyclic transfers and there is no callback to
+	 * call and only a single segment, enable hw cyclic mode to avoid
+	 * unnecessary interrupts.
+	 */
+	if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback &&
+		desc->num_sgs == 1)
+		flags |= AXI_DMAC_FLAG_CYCLIC;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1);
+	axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1);
+	axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
+	axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
+}
+
+static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan)
+{
+	return list_first_entry_or_null(&chan->active_descs,
+		struct axi_dmac_desc, vdesc.node);
+}
+
+static bool axi_dmac_transfer_done(struct axi_dmac_chan *chan,
+	unsigned int completed_transfers)
+{
+	struct axi_dmac_desc *active;
+	struct axi_dmac_sg *sg;
+	bool start_next = false;
+
+	active = axi_dmac_active_desc(chan);
+	if (!active)
+		return false;
+
+	do {
+		sg = &active->sg[active->num_completed];
+		if (sg->id == AXI_DMAC_SG_UNUSED) /* Not yet submitted */
+			break;
+		if (!(BIT(sg->id) & completed_transfers))
+			break;
+		active->num_completed++;
+		sg->id = AXI_DMAC_SG_UNUSED;
+		if (sg->schedule_when_free) {
+			sg->schedule_when_free = false;
+			start_next = true;
+		}
+
+		if (active->cyclic)
+			vchan_cyclic_callback(&active->vdesc);
+
+		if (active->num_completed == active->num_sgs) {
+			if (active->cyclic) {
+				active->num_completed = 0; /* wrap around */
+			} else {
+				list_del(&active->vdesc.node);
+				vchan_cookie_complete(&active->vdesc);
+				active = axi_dmac_active_desc(chan);
+			}
+		}
+	} while (active);
+
+	return start_next;
+}
+
+static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid)
+{
+	struct axi_dmac *dmac = devid;
+	unsigned int pending;
+	bool start_next = false;
+
+	pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING);
+	if (!pending)
+		return IRQ_NONE;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending);
+
+	spin_lock(&dmac->chan.vchan.lock);
+	/* One or more transfers have finished */
+	if (pending & AXI_DMAC_IRQ_EOT) {
+		unsigned int completed;
+
+		completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE);
+		start_next = axi_dmac_transfer_done(&dmac->chan, completed);
+	}
+	/* Space has become available in the descriptor queue */
+	if ((pending & AXI_DMAC_IRQ_SOT) || start_next)
+		axi_dmac_start_transfer(&dmac->chan);
+	spin_unlock(&dmac->chan.vchan.lock);
+
+	return IRQ_HANDLED;
+}
+
+static int axi_dmac_terminate_all(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0);
+	chan->next_desc = NULL;
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	list_splice_tail_init(&chan->active_descs, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void axi_dmac_synchronize(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
+static void axi_dmac_issue_pending(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	unsigned long flags;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (vchan_issue_pending(&chan->vchan))
+		axi_dmac_start_transfer(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
+{
+	struct axi_dmac_desc *desc;
+	unsigned int i;
+
+	desc = kzalloc(sizeof(struct axi_dmac_desc) +
+		sizeof(struct axi_dmac_sg) * num_sgs, GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_sgs; i++)
+		desc->sg[i].id = AXI_DMAC_SG_UNUSED;
+
+	desc->num_sgs = num_sgs;
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
+	struct dma_chan *c, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	if (direction != chan->direction)
+		return NULL;
+
+	desc = axi_dmac_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) ||
+		    !axi_dmac_check_len(chan, sg_dma_len(sg))) {
+			kfree(desc);
+			return NULL;
+		}
+
+		if (direction == DMA_DEV_TO_MEM)
+			desc->sg[i].dest_addr = sg_dma_address(sg);
+		else
+			desc->sg[i].src_addr = sg_dma_address(sg);
+		desc->sg[i].x_len = sg_dma_len(sg);
+		desc->sg[i].y_len = 1;
+	}
+
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
+	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	unsigned int num_periods, i;
+
+	if (direction != chan->direction)
+		return NULL;
+
+	if (!axi_dmac_check_len(chan, buf_len) ||
+	    !axi_dmac_check_addr(chan, buf_addr))
+		return NULL;
+
+	if (period_len == 0 || buf_len % period_len)
+		return NULL;
+
+	num_periods = buf_len / period_len;
+
+	desc = axi_dmac_alloc_desc(num_periods);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_periods; i++) {
+		if (direction == DMA_DEV_TO_MEM)
+			desc->sg[i].dest_addr = buf_addr;
+		else
+			desc->sg[i].src_addr = buf_addr;
+		desc->sg[i].x_len = period_len;
+		desc->sg[i].y_len = 1;
+		buf_addr += period_len;
+	}
+
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved(
+	struct dma_chan *c, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	size_t dst_icg, src_icg;
+
+	if (xt->frame_size != 1)
+		return NULL;
+
+	if (xt->dir != chan->direction)
+		return NULL;
+
+	if (axi_dmac_src_is_mem(chan)) {
+		if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start))
+			return NULL;
+	}
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start))
+			return NULL;
+	}
+
+	dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+	src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+
+	if (chan->hw_2d) {
+		if (!axi_dmac_check_len(chan, xt->sgl[0].size) ||
+		    !axi_dmac_check_len(chan, xt->numf))
+			return NULL;
+		if (xt->sgl[0].size + dst_icg > chan->max_length ||
+		    xt->sgl[0].size + src_icg > chan->max_length)
+			return NULL;
+	} else {
+		if (dst_icg != 0 || src_icg != 0)
+			return NULL;
+		if (chan->max_length / xt->sgl[0].size < xt->numf)
+			return NULL;
+		if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf))
+			return NULL;
+	}
+
+	desc = axi_dmac_alloc_desc(1);
+	if (!desc)
+		return NULL;
+
+	if (axi_dmac_src_is_mem(chan)) {
+		desc->sg[0].src_addr = xt->src_start;
+		desc->sg[0].src_stride = xt->sgl[0].size + src_icg;
+	}
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		desc->sg[0].dest_addr = xt->dst_start;
+		desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg;
+	}
+
+	if (chan->hw_2d) {
+		desc->sg[0].x_len = xt->sgl[0].size;
+		desc->sg[0].y_len = xt->numf;
+	} else {
+		desc->sg[0].x_len = xt->sgl[0].size * xt->numf;
+		desc->sg[0].y_len = 1;
+	}
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static void axi_dmac_free_chan_resources(struct dma_chan *c)
+{
+	vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct axi_dmac_desc, vdesc));
+}
+
+/*
+ * The configuration stored in the devicetree matches the configuration
+ * parameters of the peripheral instance and allows the driver to know which
+ * features are implemented and how it should behave.
+ */
+static int axi_dmac_parse_chan_dt(struct device_node *of_chan,
+	struct axi_dmac_chan *chan)
+{
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(of_chan, "reg", &val);
+	if (ret)
+		return ret;
+
+	/* We only support 1 channel for now */
+	if (val != 0)
+		return -EINVAL;
+
+	ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val);
+	if (ret)
+		return ret;
+	if (val > AXI_DMAC_BUS_TYPE_FIFO)
+		return -EINVAL;
+	chan->src_type = val;
+
+	ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val);
+	if (ret)
+		return ret;
+	if (val > AXI_DMAC_BUS_TYPE_FIFO)
+		return -EINVAL;
+	chan->dest_type = val;
+
+	ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val);
+	if (ret)
+		return ret;
+	chan->src_width = val / 8;
+
+	ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val);
+	if (ret)
+		return ret;
+	chan->dest_width = val / 8;
+
+	ret = of_property_read_u32(of_chan, "adi,length-width", &val);
+	if (ret)
+		return ret;
+
+	if (val >= 32)
+		chan->max_length = UINT_MAX;
+	else
+		chan->max_length = (1ULL << val) - 1;
+
+	chan->align_mask = max(chan->dest_width, chan->src_width) - 1;
+
+	if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_MEM_TO_MEM;
+	else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_MEM_TO_DEV;
+	else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_DEV_TO_MEM;
+	else
+		chan->direction = DMA_DEV_TO_DEV;
+
+	chan->hw_cyclic = of_property_read_bool(of_chan, "adi,cyclic");
+	chan->hw_2d = of_property_read_bool(of_chan, "adi,2d");
+
+	return 0;
+}
+
+static int axi_dmac_probe(struct platform_device *pdev)
+{
+	struct device_node *of_channels, *of_chan;
+	struct dma_device *dma_dev;
+	struct axi_dmac *dmac;
+	struct resource *res;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->irq = platform_get_irq(pdev, 0);
+	if (dmac->irq < 0)
+		return dmac->irq;
+	if (dmac->irq == 0)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmac->base))
+		return PTR_ERR(dmac->base);
+
+	dmac->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmac->clk))
+		return PTR_ERR(dmac->clk);
+
+	INIT_LIST_HEAD(&dmac->chan.active_descs);
+
+	of_channels = of_get_child_by_name(pdev->dev.of_node, "adi,channels");
+	if (of_channels == NULL)
+		return -ENODEV;
+
+	for_each_child_of_node(of_channels, of_chan) {
+		ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan);
+		if (ret) {
+			of_node_put(of_chan);
+			of_node_put(of_channels);
+			return -EINVAL;
+		}
+	}
+	of_node_put(of_channels);
+
+	pdev->dev.dma_parms = &dmac->dma_parms;
+	dma_set_max_seg_size(&pdev->dev, dmac->chan.max_length);
+
+	dma_dev = &dmac->dma_dev;
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+	dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources;
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_issue_pending = axi_dmac_issue_pending;
+	dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic;
+	dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved;
+	dma_dev->device_terminate_all = axi_dmac_terminate_all;
+	dma_dev->device_synchronize = axi_dmac_synchronize;
+	dma_dev->dev = &pdev->dev;
+	dma_dev->chancnt = 1;
+	dma_dev->src_addr_widths = BIT(dmac->chan.src_width);
+	dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width);
+	dma_dev->directions = BIT(dmac->chan.direction);
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	dmac->chan.vchan.desc_free = axi_dmac_desc_free;
+	vchan_init(&dmac->chan.vchan, dma_dev);
+
+	ret = clk_prepare_enable(dmac->clk);
+	if (ret < 0)
+		return ret;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto err_clk_disable;
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+		of_dma_xlate_by_chan_id, dma_dev);
+	if (ret)
+		goto err_unregister_device;
+
+	ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, IRQF_SHARED,
+		dev_name(&pdev->dev), dmac);
+	if (ret)
+		goto err_unregister_of;
+
+	platform_set_drvdata(pdev, dmac);
+
+	return 0;
+
+err_unregister_of:
+	of_dma_controller_free(pdev->dev.of_node);
+err_unregister_device:
+	dma_async_device_unregister(&dmac->dma_dev);
+err_clk_disable:
+	clk_disable_unprepare(dmac->clk);
+
+	return ret;
+}
+
+static int axi_dmac_remove(struct platform_device *pdev)
+{
+	struct axi_dmac *dmac = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	free_irq(dmac->irq, dmac);
+	tasklet_kill(&dmac->chan.vchan.task);
+	dma_async_device_unregister(&dmac->dma_dev);
+	clk_disable_unprepare(dmac->clk);
+
+	return 0;
+}
+
+static const struct of_device_id axi_dmac_of_match_table[] = {
+	{ .compatible = "adi,axi-dmac-1.00.a" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, axi_dmac_of_match_table);
+
+static struct platform_driver axi_dmac_driver = {
+	.driver = {
+		.name = "dma-axi-dmac",
+		.of_match_table = axi_dmac_of_match_table,
+	},
+	.probe = axi_dmac_probe,
+	.remove = axi_dmac_remove,
+};
+module_platform_driver(axi_dmac_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c
new file mode 100644
index 0000000..afd5e10
--- /dev/null
+++ b/drivers/dma/dma-jz4740.c
@@ -0,0 +1,613 @@
+/*
+ *  Copyright (C) 2013, Lars-Peter Clausen <lars@metafoo.de>
+ *  JZ4740 DMAC support
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under  the terms of the GNU General	 Public License as published by the
+ *  Free Software Foundation;  either version 2 of the License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/irq.h>
+#include <linux/clk.h>
+
+#include "virt-dma.h"
+
+#define JZ_DMA_NR_CHANS 6
+
+#define JZ_REG_DMA_SRC_ADDR(x)		(0x00 + (x) * 0x20)
+#define JZ_REG_DMA_DST_ADDR(x)		(0x04 + (x) * 0x20)
+#define JZ_REG_DMA_TRANSFER_COUNT(x)	(0x08 + (x) * 0x20)
+#define JZ_REG_DMA_REQ_TYPE(x)		(0x0C + (x) * 0x20)
+#define JZ_REG_DMA_STATUS_CTRL(x)	(0x10 + (x) * 0x20)
+#define JZ_REG_DMA_CMD(x)		(0x14 + (x) * 0x20)
+#define JZ_REG_DMA_DESC_ADDR(x)		(0x18 + (x) * 0x20)
+
+#define JZ_REG_DMA_CTRL			0x300
+#define JZ_REG_DMA_IRQ			0x304
+#define JZ_REG_DMA_DOORBELL		0x308
+#define JZ_REG_DMA_DOORBELL_SET		0x30C
+
+#define JZ_DMA_STATUS_CTRL_NO_DESC		BIT(31)
+#define JZ_DMA_STATUS_CTRL_DESC_INV		BIT(6)
+#define JZ_DMA_STATUS_CTRL_ADDR_ERR		BIT(4)
+#define JZ_DMA_STATUS_CTRL_TRANSFER_DONE	BIT(3)
+#define JZ_DMA_STATUS_CTRL_HALT			BIT(2)
+#define JZ_DMA_STATUS_CTRL_COUNT_TERMINATE	BIT(1)
+#define JZ_DMA_STATUS_CTRL_ENABLE		BIT(0)
+
+#define JZ_DMA_CMD_SRC_INC			BIT(23)
+#define JZ_DMA_CMD_DST_INC			BIT(22)
+#define JZ_DMA_CMD_RDIL_MASK			(0xf << 16)
+#define JZ_DMA_CMD_SRC_WIDTH_MASK		(0x3 << 14)
+#define JZ_DMA_CMD_DST_WIDTH_MASK		(0x3 << 12)
+#define JZ_DMA_CMD_INTERVAL_LENGTH_MASK		(0x7 << 8)
+#define JZ_DMA_CMD_BLOCK_MODE			BIT(7)
+#define JZ_DMA_CMD_DESC_VALID			BIT(4)
+#define JZ_DMA_CMD_DESC_VALID_MODE		BIT(3)
+#define JZ_DMA_CMD_VALID_IRQ_ENABLE		BIT(2)
+#define JZ_DMA_CMD_TRANSFER_IRQ_ENABLE		BIT(1)
+#define JZ_DMA_CMD_LINK_ENABLE			BIT(0)
+
+#define JZ_DMA_CMD_FLAGS_OFFSET 22
+#define JZ_DMA_CMD_RDIL_OFFSET 16
+#define JZ_DMA_CMD_SRC_WIDTH_OFFSET 14
+#define JZ_DMA_CMD_DST_WIDTH_OFFSET 12
+#define JZ_DMA_CMD_TRANSFER_SIZE_OFFSET 8
+#define JZ_DMA_CMD_MODE_OFFSET 7
+
+#define JZ_DMA_CTRL_PRIORITY_MASK		(0x3 << 8)
+#define JZ_DMA_CTRL_HALT			BIT(3)
+#define JZ_DMA_CTRL_ADDRESS_ERROR		BIT(2)
+#define JZ_DMA_CTRL_ENABLE			BIT(0)
+
+enum jz4740_dma_width {
+	JZ4740_DMA_WIDTH_32BIT	= 0,
+	JZ4740_DMA_WIDTH_8BIT	= 1,
+	JZ4740_DMA_WIDTH_16BIT	= 2,
+};
+
+enum jz4740_dma_transfer_size {
+	JZ4740_DMA_TRANSFER_SIZE_4BYTE	= 0,
+	JZ4740_DMA_TRANSFER_SIZE_1BYTE	= 1,
+	JZ4740_DMA_TRANSFER_SIZE_2BYTE	= 2,
+	JZ4740_DMA_TRANSFER_SIZE_16BYTE = 3,
+	JZ4740_DMA_TRANSFER_SIZE_32BYTE = 4,
+};
+
+enum jz4740_dma_flags {
+	JZ4740_DMA_SRC_AUTOINC = 0x2,
+	JZ4740_DMA_DST_AUTOINC = 0x1,
+};
+
+enum jz4740_dma_mode {
+	JZ4740_DMA_MODE_SINGLE	= 0,
+	JZ4740_DMA_MODE_BLOCK	= 1,
+};
+
+struct jz4740_dma_sg {
+	dma_addr_t addr;
+	unsigned int len;
+};
+
+struct jz4740_dma_desc {
+	struct virt_dma_desc vdesc;
+
+	enum dma_transfer_direction direction;
+	bool cyclic;
+
+	unsigned int num_sgs;
+	struct jz4740_dma_sg sg[];
+};
+
+struct jz4740_dmaengine_chan {
+	struct virt_dma_chan vchan;
+	unsigned int id;
+
+	dma_addr_t fifo_addr;
+	unsigned int transfer_shift;
+
+	struct jz4740_dma_desc *desc;
+	unsigned int next_sg;
+};
+
+struct jz4740_dma_dev {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+
+	struct jz4740_dmaengine_chan chan[JZ_DMA_NR_CHANS];
+};
+
+static struct jz4740_dma_dev *jz4740_dma_chan_get_dev(
+	struct jz4740_dmaengine_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct jz4740_dma_dev,
+		ddev);
+}
+
+static struct jz4740_dmaengine_chan *to_jz4740_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct jz4740_dmaengine_chan, vchan.chan);
+}
+
+static struct jz4740_dma_desc *to_jz4740_dma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct jz4740_dma_desc, vdesc);
+}
+
+static inline uint32_t jz4740_dma_read(struct jz4740_dma_dev *dmadev,
+	unsigned int reg)
+{
+	return readl(dmadev->base + reg);
+}
+
+static inline void jz4740_dma_write(struct jz4740_dma_dev *dmadev,
+	unsigned reg, uint32_t val)
+{
+	writel(val, dmadev->base + reg);
+}
+
+static inline void jz4740_dma_write_mask(struct jz4740_dma_dev *dmadev,
+	unsigned int reg, uint32_t val, uint32_t mask)
+{
+	uint32_t tmp;
+
+	tmp = jz4740_dma_read(dmadev, reg);
+	tmp &= ~mask;
+	tmp |= val;
+	jz4740_dma_write(dmadev, reg, tmp);
+}
+
+static struct jz4740_dma_desc *jz4740_dma_alloc_desc(unsigned int num_sgs)
+{
+	return kzalloc(sizeof(struct jz4740_dma_desc) +
+		sizeof(struct jz4740_dma_sg) * num_sgs, GFP_ATOMIC);
+}
+
+static enum jz4740_dma_width jz4740_dma_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return JZ4740_DMA_WIDTH_8BIT;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return JZ4740_DMA_WIDTH_16BIT;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return JZ4740_DMA_WIDTH_32BIT;
+	default:
+		return JZ4740_DMA_WIDTH_32BIT;
+	}
+}
+
+static enum jz4740_dma_transfer_size jz4740_dma_maxburst(u32 maxburst)
+{
+	if (maxburst <= 1)
+		return JZ4740_DMA_TRANSFER_SIZE_1BYTE;
+	else if (maxburst <= 3)
+		return JZ4740_DMA_TRANSFER_SIZE_2BYTE;
+	else if (maxburst <= 15)
+		return JZ4740_DMA_TRANSFER_SIZE_4BYTE;
+	else if (maxburst <= 31)
+		return JZ4740_DMA_TRANSFER_SIZE_16BYTE;
+
+	return JZ4740_DMA_TRANSFER_SIZE_32BYTE;
+}
+
+static int jz4740_dma_slave_config(struct dma_chan *c,
+				   struct dma_slave_config *config)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
+	enum jz4740_dma_width src_width;
+	enum jz4740_dma_width dst_width;
+	enum jz4740_dma_transfer_size transfer_size;
+	enum jz4740_dma_flags flags;
+	uint32_t cmd;
+
+	switch (config->direction) {
+	case DMA_MEM_TO_DEV:
+		flags = JZ4740_DMA_SRC_AUTOINC;
+		transfer_size = jz4740_dma_maxburst(config->dst_maxburst);
+		chan->fifo_addr = config->dst_addr;
+		break;
+	case DMA_DEV_TO_MEM:
+		flags = JZ4740_DMA_DST_AUTOINC;
+		transfer_size = jz4740_dma_maxburst(config->src_maxburst);
+		chan->fifo_addr = config->src_addr;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	src_width = jz4740_dma_width(config->src_addr_width);
+	dst_width = jz4740_dma_width(config->dst_addr_width);
+
+	switch (transfer_size) {
+	case JZ4740_DMA_TRANSFER_SIZE_2BYTE:
+		chan->transfer_shift = 1;
+		break;
+	case JZ4740_DMA_TRANSFER_SIZE_4BYTE:
+		chan->transfer_shift = 2;
+		break;
+	case JZ4740_DMA_TRANSFER_SIZE_16BYTE:
+		chan->transfer_shift = 4;
+		break;
+	case JZ4740_DMA_TRANSFER_SIZE_32BYTE:
+		chan->transfer_shift = 5;
+		break;
+	default:
+		chan->transfer_shift = 0;
+		break;
+	}
+
+	cmd = flags << JZ_DMA_CMD_FLAGS_OFFSET;
+	cmd |= src_width << JZ_DMA_CMD_SRC_WIDTH_OFFSET;
+	cmd |= dst_width << JZ_DMA_CMD_DST_WIDTH_OFFSET;
+	cmd |= transfer_size << JZ_DMA_CMD_TRANSFER_SIZE_OFFSET;
+	cmd |= JZ4740_DMA_MODE_SINGLE << JZ_DMA_CMD_MODE_OFFSET;
+	cmd |= JZ_DMA_CMD_TRANSFER_IRQ_ENABLE;
+
+	jz4740_dma_write(dmadev, JZ_REG_DMA_CMD(chan->id), cmd);
+	jz4740_dma_write(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0);
+	jz4740_dma_write(dmadev, JZ_REG_DMA_REQ_TYPE(chan->id),
+		config->slave_id);
+
+	return 0;
+}
+
+static int jz4740_dma_terminate_all(struct dma_chan *c)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0,
+			JZ_DMA_STATUS_CTRL_ENABLE);
+	chan->desc = NULL;
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static int jz4740_dma_start_transfer(struct jz4740_dmaengine_chan *chan)
+{
+	struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
+	dma_addr_t src_addr, dst_addr;
+	struct virt_dma_desc *vdesc;
+	struct jz4740_dma_sg *sg;
+
+	jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id), 0,
+			JZ_DMA_STATUS_CTRL_ENABLE);
+
+	if (!chan->desc) {
+		vdesc = vchan_next_desc(&chan->vchan);
+		if (!vdesc)
+			return 0;
+		chan->desc = to_jz4740_dma_desc(vdesc);
+		chan->next_sg = 0;
+	}
+
+	if (chan->next_sg == chan->desc->num_sgs)
+		chan->next_sg = 0;
+
+	sg = &chan->desc->sg[chan->next_sg];
+
+	if (chan->desc->direction == DMA_MEM_TO_DEV) {
+		src_addr = sg->addr;
+		dst_addr = chan->fifo_addr;
+	} else {
+		src_addr = chan->fifo_addr;
+		dst_addr = sg->addr;
+	}
+	jz4740_dma_write(dmadev, JZ_REG_DMA_SRC_ADDR(chan->id), src_addr);
+	jz4740_dma_write(dmadev, JZ_REG_DMA_DST_ADDR(chan->id), dst_addr);
+	jz4740_dma_write(dmadev, JZ_REG_DMA_TRANSFER_COUNT(chan->id),
+			sg->len >> chan->transfer_shift);
+
+	chan->next_sg++;
+
+	jz4740_dma_write_mask(dmadev, JZ_REG_DMA_STATUS_CTRL(chan->id),
+			JZ_DMA_STATUS_CTRL_NO_DESC | JZ_DMA_STATUS_CTRL_ENABLE,
+			JZ_DMA_STATUS_CTRL_HALT | JZ_DMA_STATUS_CTRL_NO_DESC |
+			JZ_DMA_STATUS_CTRL_ENABLE);
+
+	jz4740_dma_write_mask(dmadev, JZ_REG_DMA_CTRL,
+			JZ_DMA_CTRL_ENABLE,
+			JZ_DMA_CTRL_HALT | JZ_DMA_CTRL_ENABLE);
+
+	return 0;
+}
+
+static void jz4740_dma_chan_irq(struct jz4740_dmaengine_chan *chan)
+{
+	spin_lock(&chan->vchan.lock);
+	if (chan->desc) {
+		if (chan->desc->cyclic) {
+			vchan_cyclic_callback(&chan->desc->vdesc);
+		} else {
+			if (chan->next_sg == chan->desc->num_sgs) {
+				list_del(&chan->desc->vdesc.node);
+				vchan_cookie_complete(&chan->desc->vdesc);
+				chan->desc = NULL;
+			}
+		}
+	}
+	jz4740_dma_start_transfer(chan);
+	spin_unlock(&chan->vchan.lock);
+}
+
+static irqreturn_t jz4740_dma_irq(int irq, void *devid)
+{
+	struct jz4740_dma_dev *dmadev = devid;
+	uint32_t irq_status;
+	unsigned int i;
+
+	irq_status = readl(dmadev->base + JZ_REG_DMA_IRQ);
+
+	for (i = 0; i < 6; ++i) {
+		if (irq_status & (1 << i)) {
+			jz4740_dma_write_mask(dmadev,
+				JZ_REG_DMA_STATUS_CTRL(i), 0,
+				JZ_DMA_STATUS_CTRL_ENABLE |
+				JZ_DMA_STATUS_CTRL_TRANSFER_DONE);
+
+			jz4740_dma_chan_irq(&dmadev->chan[i]);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void jz4740_dma_issue_pending(struct dma_chan *c)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (vchan_issue_pending(&chan->vchan) && !chan->desc)
+		jz4740_dma_start_transfer(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4740_dma_prep_slave_sg(
+	struct dma_chan *c, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	struct jz4740_dma_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	desc = jz4740_dma_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc->sg[i].addr = sg_dma_address(sg);
+		desc->sg[i].len = sg_dma_len(sg);
+	}
+
+	desc->num_sgs = sg_len;
+	desc->direction = direction;
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4740_dma_prep_dma_cyclic(
+	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	struct jz4740_dma_desc *desc;
+	unsigned int num_periods, i;
+
+	if (buf_len % period_len)
+		return NULL;
+
+	num_periods = buf_len / period_len;
+
+	desc = jz4740_dma_alloc_desc(num_periods);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_periods; i++) {
+		desc->sg[i].addr = buf_addr;
+		desc->sg[i].len = period_len;
+		buf_addr += period_len;
+	}
+
+	desc->num_sgs = num_periods;
+	desc->direction = direction;
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static size_t jz4740_dma_desc_residue(struct jz4740_dmaengine_chan *chan,
+	struct jz4740_dma_desc *desc, unsigned int next_sg)
+{
+	struct jz4740_dma_dev *dmadev = jz4740_dma_chan_get_dev(chan);
+	unsigned int residue, count;
+	unsigned int i;
+
+	residue = 0;
+
+	for (i = next_sg; i < desc->num_sgs; i++)
+		residue += desc->sg[i].len;
+
+	if (next_sg != 0) {
+		count = jz4740_dma_read(dmadev,
+			JZ_REG_DMA_TRANSFER_COUNT(chan->id));
+		residue += count << chan->transfer_shift;
+	}
+
+	return residue;
+}
+
+static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct jz4740_dmaengine_chan *chan = to_jz4740_dma_chan(c);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+
+	status = dma_cookie_status(c, cookie, state);
+	if (status == DMA_COMPLETE || !state)
+		return status;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&chan->vchan, cookie);
+	if (cookie == chan->desc->vdesc.tx.cookie) {
+		state->residue = jz4740_dma_desc_residue(chan, chan->desc,
+				chan->next_sg);
+	} else if (vdesc) {
+		state->residue = jz4740_dma_desc_residue(chan,
+				to_jz4740_dma_desc(vdesc), 0);
+	} else {
+		state->residue = 0;
+	}
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	return status;
+}
+
+static void jz4740_dma_free_chan_resources(struct dma_chan *c)
+{
+	vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void jz4740_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct jz4740_dma_desc, vdesc));
+}
+
+#define JZ4740_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static int jz4740_dma_probe(struct platform_device *pdev)
+{
+	struct jz4740_dmaengine_chan *chan;
+	struct jz4740_dma_dev *dmadev;
+	struct dma_device *dd;
+	unsigned int i;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+	if (!dmadev)
+		return -EINVAL;
+
+	dd = &dmadev->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmadev->base))
+		return PTR_ERR(dmadev->base);
+
+	dmadev->clk = clk_get(&pdev->dev, "dma");
+	if (IS_ERR(dmadev->clk))
+		return PTR_ERR(dmadev->clk);
+
+	clk_prepare_enable(dmadev->clk);
+
+	dma_cap_set(DMA_SLAVE, dd->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+	dd->device_free_chan_resources = jz4740_dma_free_chan_resources;
+	dd->device_tx_status = jz4740_dma_tx_status;
+	dd->device_issue_pending = jz4740_dma_issue_pending;
+	dd->device_prep_slave_sg = jz4740_dma_prep_slave_sg;
+	dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
+	dd->device_config = jz4740_dma_slave_config;
+	dd->device_terminate_all = jz4740_dma_terminate_all;
+	dd->src_addr_widths = JZ4740_DMA_BUSWIDTHS;
+	dd->dst_addr_widths = JZ4740_DMA_BUSWIDTHS;
+	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	for (i = 0; i < JZ_DMA_NR_CHANS; i++) {
+		chan = &dmadev->chan[i];
+		chan->id = i;
+		chan->vchan.desc_free = jz4740_dma_desc_free;
+		vchan_init(&chan->vchan, dd);
+	}
+
+	ret = dma_async_device_register(dd);
+	if (ret)
+		goto err_clk;
+
+	irq = platform_get_irq(pdev, 0);
+	ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
+	if (ret)
+		goto err_unregister;
+
+	platform_set_drvdata(pdev, dmadev);
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+err_clk:
+	clk_disable_unprepare(dmadev->clk);
+	return ret;
+}
+
+static void jz4740_cleanup_vchan(struct dma_device *dmadev)
+{
+	struct jz4740_dmaengine_chan *chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan,
+				&dmadev->channels, vchan.chan.device_node) {
+		list_del(&chan->vchan.chan.device_node);
+		tasklet_kill(&chan->vchan.task);
+	}
+}
+
+
+static int jz4740_dma_remove(struct platform_device *pdev)
+{
+	struct jz4740_dma_dev *dmadev = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	free_irq(irq, dmadev);
+
+	jz4740_cleanup_vchan(&dmadev->ddev);
+	dma_async_device_unregister(&dmadev->ddev);
+	clk_disable_unprepare(dmadev->clk);
+
+	return 0;
+}
+
+static struct platform_driver jz4740_dma_driver = {
+	.probe = jz4740_dma_probe,
+	.remove = jz4740_dma_remove,
+	.driver = {
+		.name = "jz4740-dma",
+	},
+};
+module_platform_driver(jz4740_dma_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("JZ4740 DMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
new file mode 100644
index 0000000..9878996
--- /dev/null
+++ b/drivers/dma/dma-jz4780.c
@@ -0,0 +1,928 @@
+/*
+ * Ingenic JZ4780 DMA controller
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Author: Alex Smith <alex@alex-smith.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define JZ_DMA_NR_CHANNELS	32
+
+/* Global registers. */
+#define JZ_DMA_REG_DMAC		0x1000
+#define JZ_DMA_REG_DIRQP	0x1004
+#define JZ_DMA_REG_DDR		0x1008
+#define JZ_DMA_REG_DDRS		0x100c
+#define JZ_DMA_REG_DMACP	0x101c
+#define JZ_DMA_REG_DSIRQP	0x1020
+#define JZ_DMA_REG_DSIRQM	0x1024
+#define JZ_DMA_REG_DCIRQP	0x1028
+#define JZ_DMA_REG_DCIRQM	0x102c
+
+/* Per-channel registers. */
+#define JZ_DMA_REG_CHAN(n)	(n * 0x20)
+#define JZ_DMA_REG_DSA(n)	(0x00 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTA(n)	(0x04 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTC(n)	(0x08 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DRT(n)	(0x0c + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCS(n)	(0x10 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCM(n)	(0x14 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DDA(n)	(0x18 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DSD(n)	(0x1c + JZ_DMA_REG_CHAN(n))
+
+#define JZ_DMA_DMAC_DMAE	BIT(0)
+#define JZ_DMA_DMAC_AR		BIT(2)
+#define JZ_DMA_DMAC_HLT		BIT(3)
+#define JZ_DMA_DMAC_FMSC	BIT(31)
+
+#define JZ_DMA_DRT_AUTO		0x8
+
+#define JZ_DMA_DCS_CTE		BIT(0)
+#define JZ_DMA_DCS_HLT		BIT(2)
+#define JZ_DMA_DCS_TT		BIT(3)
+#define JZ_DMA_DCS_AR		BIT(4)
+#define JZ_DMA_DCS_DES8		BIT(30)
+
+#define JZ_DMA_DCM_LINK		BIT(0)
+#define JZ_DMA_DCM_TIE		BIT(1)
+#define JZ_DMA_DCM_STDE		BIT(2)
+#define JZ_DMA_DCM_TSZ_SHIFT	8
+#define JZ_DMA_DCM_TSZ_MASK	(0x7 << JZ_DMA_DCM_TSZ_SHIFT)
+#define JZ_DMA_DCM_DP_SHIFT	12
+#define JZ_DMA_DCM_SP_SHIFT	14
+#define JZ_DMA_DCM_DAI		BIT(22)
+#define JZ_DMA_DCM_SAI		BIT(23)
+
+#define JZ_DMA_SIZE_4_BYTE	0x0
+#define JZ_DMA_SIZE_1_BYTE	0x1
+#define JZ_DMA_SIZE_2_BYTE	0x2
+#define JZ_DMA_SIZE_16_BYTE	0x3
+#define JZ_DMA_SIZE_32_BYTE	0x4
+#define JZ_DMA_SIZE_64_BYTE	0x5
+#define JZ_DMA_SIZE_128_BYTE	0x6
+
+#define JZ_DMA_WIDTH_32_BIT	0x0
+#define JZ_DMA_WIDTH_8_BIT	0x1
+#define JZ_DMA_WIDTH_16_BIT	0x2
+
+#define JZ_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)	 | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+/**
+ * struct jz4780_dma_hwdesc - descriptor structure read by the DMA controller.
+ * @dcm: value for the DCM (channel command) register
+ * @dsa: source address
+ * @dta: target address
+ * @dtc: transfer count (number of blocks of the transfer size specified in DCM
+ * to transfer) in the low 24 bits, offset of the next descriptor from the
+ * descriptor base address in the upper 8 bits.
+ * @sd: target/source stride difference (in stride transfer mode).
+ * @drt: request type
+ */
+struct jz4780_dma_hwdesc {
+	uint32_t dcm;
+	uint32_t dsa;
+	uint32_t dta;
+	uint32_t dtc;
+	uint32_t sd;
+	uint32_t drt;
+	uint32_t reserved[2];
+};
+
+/* Size of allocations for hardware descriptor blocks. */
+#define JZ_DMA_DESC_BLOCK_SIZE	PAGE_SIZE
+#define JZ_DMA_MAX_DESC		\
+	(JZ_DMA_DESC_BLOCK_SIZE / sizeof(struct jz4780_dma_hwdesc))
+
+struct jz4780_dma_desc {
+	struct virt_dma_desc vdesc;
+
+	struct jz4780_dma_hwdesc *desc;
+	dma_addr_t desc_phys;
+	unsigned int count;
+	enum dma_transaction_type type;
+	uint32_t status;
+};
+
+struct jz4780_dma_chan {
+	struct virt_dma_chan vchan;
+	unsigned int id;
+	struct dma_pool *desc_pool;
+
+	uint32_t transfer_type;
+	uint32_t transfer_shift;
+	struct dma_slave_config	config;
+
+	struct jz4780_dma_desc *desc;
+	unsigned int curr_hwdesc;
+};
+
+struct jz4780_dma_dev {
+	struct dma_device dma_device;
+	void __iomem *base;
+	struct clk *clk;
+	unsigned int irq;
+
+	uint32_t chan_reserved;
+	struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS];
+};
+
+struct jz4780_dma_filter_data {
+	struct device_node *of_node;
+	uint32_t transfer_type;
+	int channel;
+};
+
+static inline struct jz4780_dma_chan *to_jz4780_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct jz4780_dma_chan, vchan.chan);
+}
+
+static inline struct jz4780_dma_desc *to_jz4780_dma_desc(
+	struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct jz4780_dma_desc, vdesc);
+}
+
+static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
+	struct jz4780_dma_chan *jzchan)
+{
+	return container_of(jzchan->vchan.chan.device, struct jz4780_dma_dev,
+			    dma_device);
+}
+
+static inline uint32_t jz4780_dma_readl(struct jz4780_dma_dev *jzdma,
+	unsigned int reg)
+{
+	return readl(jzdma->base + reg);
+}
+
+static inline void jz4780_dma_writel(struct jz4780_dma_dev *jzdma,
+	unsigned int reg, uint32_t val)
+{
+	writel(val, jzdma->base + reg);
+}
+
+static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
+	struct jz4780_dma_chan *jzchan, unsigned int count,
+	enum dma_transaction_type type)
+{
+	struct jz4780_dma_desc *desc;
+
+	if (count > JZ_DMA_MAX_DESC)
+		return NULL;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->desc = dma_pool_alloc(jzchan->desc_pool, GFP_NOWAIT,
+				    &desc->desc_phys);
+	if (!desc->desc) {
+		kfree(desc);
+		return NULL;
+	}
+
+	desc->count = count;
+	desc->type = type;
+	return desc;
+}
+
+static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	struct jz4780_dma_desc *desc = to_jz4780_dma_desc(vdesc);
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(vdesc->tx.chan);
+
+	dma_pool_free(jzchan->desc_pool, desc->desc, desc->desc_phys);
+	kfree(desc);
+}
+
+static uint32_t jz4780_dma_transfer_size(unsigned long val, uint32_t *shift)
+{
+	int ord = ffs(val) - 1;
+
+	/*
+	 * 8 byte transfer sizes unsupported so fall back on 4. If it's larger
+	 * than the maximum, just limit it. It is perfectly safe to fall back
+	 * in this way since we won't exceed the maximum burst size supported
+	 * by the device, the only effect is reduced efficiency. This is better
+	 * than refusing to perform the request at all.
+	 */
+	if (ord == 3)
+		ord = 2;
+	else if (ord > 7)
+		ord = 7;
+
+	*shift = ord;
+
+	switch (ord) {
+	case 0:
+		return JZ_DMA_SIZE_1_BYTE;
+	case 1:
+		return JZ_DMA_SIZE_2_BYTE;
+	case 2:
+		return JZ_DMA_SIZE_4_BYTE;
+	case 4:
+		return JZ_DMA_SIZE_16_BYTE;
+	case 5:
+		return JZ_DMA_SIZE_32_BYTE;
+	case 6:
+		return JZ_DMA_SIZE_64_BYTE;
+	default:
+		return JZ_DMA_SIZE_128_BYTE;
+	}
+}
+
+static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
+	struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len,
+	enum dma_transfer_direction direction)
+{
+	struct dma_slave_config *config = &jzchan->config;
+	uint32_t width, maxburst, tsz;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		desc->dcm = JZ_DMA_DCM_SAI;
+		desc->dsa = addr;
+		desc->dta = config->dst_addr;
+		desc->drt = jzchan->transfer_type;
+
+		width = config->dst_addr_width;
+		maxburst = config->dst_maxburst;
+	} else {
+		desc->dcm = JZ_DMA_DCM_DAI;
+		desc->dsa = config->src_addr;
+		desc->dta = addr;
+		desc->drt = jzchan->transfer_type;
+
+		width = config->src_addr_width;
+		maxburst = config->src_maxburst;
+	}
+
+	/*
+	 * This calculates the maximum transfer size that can be used with the
+	 * given address, length, width and maximum burst size. The address
+	 * must be aligned to the transfer size, the total length must be
+	 * divisible by the transfer size, and we must not use more than the
+	 * maximum burst specified by the user.
+	 */
+	tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst),
+				       &jzchan->transfer_shift);
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		width = JZ_DMA_WIDTH_32_BIT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	desc->dcm |= tsz << JZ_DMA_DCM_TSZ_SHIFT;
+	desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT;
+	desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT;
+
+	desc->dtc = len >> jzchan->transfer_shift;
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct jz4780_dma_desc *desc;
+	unsigned int i;
+	int err;
+
+	desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < sg_len; i++) {
+		err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i],
+					      sg_dma_address(&sgl[i]),
+					      sg_dma_len(&sgl[i]),
+					      direction);
+		if (err < 0) {
+			jz4780_dma_desc_free(&jzchan->desc->vdesc);
+			return NULL;
+		}
+
+		desc->desc[i].dcm |= JZ_DMA_DCM_TIE;
+
+		if (i != (sg_len - 1)) {
+			/* Automatically proceeed to the next descriptor. */
+			desc->desc[i].dcm |= JZ_DMA_DCM_LINK;
+
+			/*
+			 * The upper 8 bits of the DTC field in the descriptor
+			 * must be set to (offset from descriptor base of next
+			 * descriptor >> 4).
+			 */
+			desc->desc[i].dtc |=
+				(((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+		}
+	}
+
+	return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct jz4780_dma_desc *desc;
+	unsigned int periods, i;
+	int err;
+
+	if (buf_len % period_len)
+		return NULL;
+
+	periods = buf_len / period_len;
+
+	desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < periods; i++) {
+		err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr,
+					      period_len, direction);
+		if (err < 0) {
+			jz4780_dma_desc_free(&jzchan->desc->vdesc);
+			return NULL;
+		}
+
+		buf_addr += period_len;
+
+		/*
+		 * Set the link bit to indicate that the controller should
+		 * automatically proceed to the next descriptor. In
+		 * jz4780_dma_begin(), this will be cleared if we need to issue
+		 * an interrupt after each period.
+		 */
+		desc->desc[i].dcm |= JZ_DMA_DCM_TIE | JZ_DMA_DCM_LINK;
+
+		/*
+		 * The upper 8 bits of the DTC field in the descriptor must be
+		 * set to (offset from descriptor base of next descriptor >> 4).
+		 * If this is the last descriptor, link it back to the first,
+		 * i.e. leave offset set to 0, otherwise point to the next one.
+		 */
+		if (i != (periods - 1)) {
+			desc->desc[i].dtc |=
+				(((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+		}
+	}
+
+	return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct jz4780_dma_desc *desc;
+	uint32_t tsz;
+
+	desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
+	if (!desc)
+		return NULL;
+
+	tsz = jz4780_dma_transfer_size(dest | src | len,
+				       &jzchan->transfer_shift);
+
+	desc->desc[0].dsa = src;
+	desc->desc[0].dta = dest;
+	desc->desc[0].drt = JZ_DMA_DRT_AUTO;
+	desc->desc[0].dcm = JZ_DMA_DCM_TIE | JZ_DMA_DCM_SAI | JZ_DMA_DCM_DAI |
+			    tsz << JZ_DMA_DCM_TSZ_SHIFT |
+			    JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT |
+			    JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT;
+	desc->desc[0].dtc = len >> jzchan->transfer_shift;
+
+	return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
+{
+	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+	struct virt_dma_desc *vdesc;
+	unsigned int i;
+	dma_addr_t desc_phys;
+
+	if (!jzchan->desc) {
+		vdesc = vchan_next_desc(&jzchan->vchan);
+		if (!vdesc)
+			return;
+
+		list_del(&vdesc->node);
+
+		jzchan->desc = to_jz4780_dma_desc(vdesc);
+		jzchan->curr_hwdesc = 0;
+
+		if (jzchan->desc->type == DMA_CYCLIC && vdesc->tx.callback) {
+			/*
+			 * The DMA controller doesn't support triggering an
+			 * interrupt after processing each descriptor, only
+			 * after processing an entire terminated list of
+			 * descriptors. For a cyclic DMA setup the list of
+			 * descriptors is not terminated so we can never get an
+			 * interrupt.
+			 *
+			 * If the user requested a callback for a cyclic DMA
+			 * setup then we workaround this hardware limitation
+			 * here by degrading to a set of unlinked descriptors
+			 * which we will submit in sequence in response to the
+			 * completion of processing the previous descriptor.
+			 */
+			for (i = 0; i < jzchan->desc->count; i++)
+				jzchan->desc->desc[i].dcm &= ~JZ_DMA_DCM_LINK;
+		}
+	} else {
+		/*
+		 * There is an existing transfer, therefore this must be one
+		 * for which we unlinked the descriptors above. Advance to the
+		 * next one in the list.
+		 */
+		jzchan->curr_hwdesc =
+			(jzchan->curr_hwdesc + 1) % jzchan->desc->count;
+	}
+
+	/* Use 8-word descriptors. */
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), JZ_DMA_DCS_DES8);
+
+	/* Write descriptor address and initiate descriptor fetch. */
+	desc_phys = jzchan->desc->desc_phys +
+		    (jzchan->curr_hwdesc * sizeof(*jzchan->desc->desc));
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DDA(jzchan->id), desc_phys);
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DDRS, BIT(jzchan->id));
+
+	/* Enable the channel. */
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id),
+			  JZ_DMA_DCS_DES8 | JZ_DMA_DCS_CTE);
+}
+
+static void jz4780_dma_issue_pending(struct dma_chan *chan)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+	if (vchan_issue_pending(&jzchan->vchan) && !jzchan->desc)
+		jz4780_dma_begin(jzchan);
+
+	spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+}
+
+static int jz4780_dma_terminate_all(struct dma_chan *chan)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+	/* Clear the DMA status and stop the transfer. */
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+	if (jzchan->desc) {
+		vchan_terminate_vdesc(&jzchan->desc->vdesc);
+		jzchan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&jzchan->vchan, &head);
+
+	spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&jzchan->vchan, &head);
+	return 0;
+}
+
+static void jz4780_dma_synchronize(struct dma_chan *chan)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+	vchan_synchronize(&jzchan->vchan);
+}
+
+static int jz4780_dma_config(struct dma_chan *chan,
+	struct dma_slave_config *config)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+	if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+	   || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES))
+		return -EINVAL;
+
+	/* Copy the reset of the slave configuration, it is used later. */
+	memcpy(&jzchan->config, config, sizeof(jzchan->config));
+
+	return 0;
+}
+
+static size_t jz4780_dma_desc_residue(struct jz4780_dma_chan *jzchan,
+	struct jz4780_dma_desc *desc, unsigned int next_sg)
+{
+	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+	unsigned int residue, count;
+	unsigned int i;
+
+	residue = 0;
+
+	for (i = next_sg; i < desc->count; i++)
+		residue += desc->desc[i].dtc << jzchan->transfer_shift;
+
+	if (next_sg != 0) {
+		count = jz4780_dma_readl(jzdma,
+					 JZ_DMA_REG_DTC(jzchan->id));
+		residue += count << jzchan->transfer_shift;
+	}
+
+	return residue;
+}
+
+static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	if ((status == DMA_COMPLETE) || (txstate == NULL))
+		return status;
+
+	spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+	vdesc = vchan_find_desc(&jzchan->vchan, cookie);
+	if (vdesc) {
+		/* On the issued list, so hasn't been processed yet */
+		txstate->residue = jz4780_dma_desc_residue(jzchan,
+					to_jz4780_dma_desc(vdesc), 0);
+	} else if (cookie == jzchan->desc->vdesc.tx.cookie) {
+		txstate->residue = jz4780_dma_desc_residue(jzchan, jzchan->desc,
+			  (jzchan->curr_hwdesc + 1) % jzchan->desc->count);
+	} else
+		txstate->residue = 0;
+
+	if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc
+	    && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
+		status = DMA_ERROR;
+
+	spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+	return status;
+}
+
+static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
+	struct jz4780_dma_chan *jzchan)
+{
+	uint32_t dcs;
+
+	spin_lock(&jzchan->vchan.lock);
+
+	dcs = jz4780_dma_readl(jzdma, JZ_DMA_REG_DCS(jzchan->id));
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+
+	if (dcs & JZ_DMA_DCS_AR) {
+		dev_warn(&jzchan->vchan.chan.dev->device,
+			 "address error (DCS=0x%x)\n", dcs);
+	}
+
+	if (dcs & JZ_DMA_DCS_HLT) {
+		dev_warn(&jzchan->vchan.chan.dev->device,
+			 "channel halt (DCS=0x%x)\n", dcs);
+	}
+
+	if (jzchan->desc) {
+		jzchan->desc->status = dcs;
+
+		if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) {
+			if (jzchan->desc->type == DMA_CYCLIC) {
+				vchan_cyclic_callback(&jzchan->desc->vdesc);
+			} else {
+				vchan_cookie_complete(&jzchan->desc->vdesc);
+				jzchan->desc = NULL;
+			}
+
+			jz4780_dma_begin(jzchan);
+		}
+	} else {
+		dev_err(&jzchan->vchan.chan.dev->device,
+			"channel IRQ with no active transfer\n");
+	}
+
+	spin_unlock(&jzchan->vchan.lock);
+}
+
+static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
+{
+	struct jz4780_dma_dev *jzdma = data;
+	uint32_t pending, dmac;
+	int i;
+
+	pending = jz4780_dma_readl(jzdma, JZ_DMA_REG_DIRQP);
+
+	for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+		if (!(pending & (1<<i)))
+			continue;
+
+		jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]);
+	}
+
+	/* Clear halt and address error status of all channels. */
+	dmac = jz4780_dma_readl(jzdma, JZ_DMA_REG_DMAC);
+	dmac &= ~(JZ_DMA_DMAC_HLT | JZ_DMA_DMAC_AR);
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC, dmac);
+
+	/* Clear interrupt pending status. */
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DIRQP, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int jz4780_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+	jzchan->desc_pool = dma_pool_create(dev_name(&chan->dev->device),
+					    chan->device->dev,
+					    JZ_DMA_DESC_BLOCK_SIZE,
+					    PAGE_SIZE, 0);
+	if (!jzchan->desc_pool) {
+		dev_err(&chan->dev->device,
+			"failed to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void jz4780_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+	vchan_free_chan_resources(&jzchan->vchan);
+	dma_pool_destroy(jzchan->desc_pool);
+	jzchan->desc_pool = NULL;
+}
+
+static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+	struct jz4780_dma_filter_data *data = param;
+
+	if (jzdma->dma_device.dev->of_node != data->of_node)
+		return false;
+
+	if (data->channel > -1) {
+		if (data->channel != jzchan->id)
+			return false;
+	} else if (jzdma->chan_reserved & BIT(jzchan->id)) {
+		return false;
+	}
+
+	jzchan->transfer_type = data->transfer_type;
+
+	return true;
+}
+
+static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
+	struct of_dma *ofdma)
+{
+	struct jz4780_dma_dev *jzdma = ofdma->of_dma_data;
+	dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
+	struct jz4780_dma_filter_data data;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	data.of_node = ofdma->of_node;
+	data.transfer_type = dma_spec->args[0];
+	data.channel = dma_spec->args[1];
+
+	if (data.channel > -1) {
+		if (data.channel >= JZ_DMA_NR_CHANNELS) {
+			dev_err(jzdma->dma_device.dev,
+				"device requested non-existent channel %u\n",
+				data.channel);
+			return NULL;
+		}
+
+		/* Can only select a channel marked as reserved. */
+		if (!(jzdma->chan_reserved & BIT(data.channel))) {
+			dev_err(jzdma->dma_device.dev,
+				"device requested unreserved channel %u\n",
+				data.channel);
+			return NULL;
+		}
+
+		jzdma->chan[data.channel].transfer_type = data.transfer_type;
+
+		return dma_get_slave_channel(
+			&jzdma->chan[data.channel].vchan.chan);
+	} else {
+		return dma_request_channel(mask, jz4780_dma_filter_fn, &data);
+	}
+}
+
+static int jz4780_dma_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct jz4780_dma_dev *jzdma;
+	struct jz4780_dma_chan *jzchan;
+	struct dma_device *dd;
+	struct resource *res;
+	int i, ret;
+
+	if (!dev->of_node) {
+		dev_err(dev, "This driver must be probed from devicetree\n");
+		return -EINVAL;
+	}
+
+	jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL);
+	if (!jzdma)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, jzdma);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to get I/O memory\n");
+		return -EINVAL;
+	}
+
+	jzdma->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(jzdma->base))
+		return PTR_ERR(jzdma->base);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "failed to get IRQ: %d\n", ret);
+		return ret;
+	}
+
+	jzdma->irq = ret;
+
+	ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev),
+			  jzdma);
+	if (ret) {
+		dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq);
+		return ret;
+	}
+
+	jzdma->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(jzdma->clk)) {
+		dev_err(dev, "failed to get clock\n");
+		ret = PTR_ERR(jzdma->clk);
+		goto err_free_irq;
+	}
+
+	clk_prepare_enable(jzdma->clk);
+
+	/* Property is optional, if it doesn't exist the value will remain 0. */
+	of_property_read_u32_index(dev->of_node, "ingenic,reserved-channels",
+				   0, &jzdma->chan_reserved);
+
+	dd = &jzdma->dma_device;
+
+	dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+	dma_cap_set(DMA_SLAVE, dd->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+
+	dd->dev = dev;
+	dd->copy_align = DMAENGINE_ALIGN_4_BYTES;
+	dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources;
+	dd->device_free_chan_resources = jz4780_dma_free_chan_resources;
+	dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg;
+	dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic;
+	dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy;
+	dd->device_config = jz4780_dma_config;
+	dd->device_terminate_all = jz4780_dma_terminate_all;
+	dd->device_synchronize = jz4780_dma_synchronize;
+	dd->device_tx_status = jz4780_dma_tx_status;
+	dd->device_issue_pending = jz4780_dma_issue_pending;
+	dd->src_addr_widths = JZ_DMA_BUSWIDTHS;
+	dd->dst_addr_widths = JZ_DMA_BUSWIDTHS;
+	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	/*
+	 * Enable DMA controller, mark all channels as not programmable.
+	 * Also set the FMSC bit - it increases MSC performance, so it makes
+	 * little sense not to enable it.
+	 */
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC,
+			  JZ_DMA_DMAC_DMAE | JZ_DMA_DMAC_FMSC);
+	jz4780_dma_writel(jzdma, JZ_DMA_REG_DMACP, 0);
+
+	INIT_LIST_HEAD(&dd->channels);
+
+	for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+		jzchan = &jzdma->chan[i];
+		jzchan->id = i;
+
+		vchan_init(&jzchan->vchan, dd);
+		jzchan->vchan.desc_free = jz4780_dma_desc_free;
+	}
+
+	ret = dma_async_device_register(dd);
+	if (ret) {
+		dev_err(dev, "failed to register device\n");
+		goto err_disable_clk;
+	}
+
+	/* Register with OF DMA helpers. */
+	ret = of_dma_controller_register(dev->of_node, jz4780_of_dma_xlate,
+					 jzdma);
+	if (ret) {
+		dev_err(dev, "failed to register OF DMA controller\n");
+		goto err_unregister_dev;
+	}
+
+	dev_info(dev, "JZ4780 DMA controller initialised\n");
+	return 0;
+
+err_unregister_dev:
+	dma_async_device_unregister(dd);
+
+err_disable_clk:
+	clk_disable_unprepare(jzdma->clk);
+
+err_free_irq:
+	free_irq(jzdma->irq, jzdma);
+	return ret;
+}
+
+static int jz4780_dma_remove(struct platform_device *pdev)
+{
+	struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev);
+	int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+
+	free_irq(jzdma->irq, jzdma);
+
+	for (i = 0; i < JZ_DMA_NR_CHANNELS; i++)
+		tasklet_kill(&jzdma->chan[i].vchan.task);
+
+	dma_async_device_unregister(&jzdma->dma_device);
+	return 0;
+}
+
+static const struct of_device_id jz4780_dma_dt_match[] = {
+	{ .compatible = "ingenic,jz4780-dma", .data = NULL },
+	{},
+};
+MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
+
+static struct platform_driver jz4780_dma_driver = {
+	.probe		= jz4780_dma_probe,
+	.remove		= jz4780_dma_remove,
+	.driver	= {
+		.name	= "jz4780-dma",
+		.of_match_table = of_match_ptr(jz4780_dma_dt_match),
+	},
+};
+
+static int __init jz4780_dma_init(void)
+{
+	return platform_driver_register(&jz4780_dma_driver);
+}
+subsys_initcall(jz4780_dma_init);
+
+static void __exit jz4780_dma_exit(void)
+{
+	platform_driver_unregister(&jz4780_dma_driver);
+}
+module_exit(jz4780_dma_exit);
+
+MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
+MODULE_DESCRIPTION("Ingenic JZ4780 DMA controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 0000000..f1a441a
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,1381 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps a global list of dma_device structs it is protected by a
+ * mutex, dma_list_mutex.
+ *
+ * A subsystem can get access to a channel by calling dmaengine_get() followed
+ * by dma_find_channel(), or if it has need for an exclusive channel it can call
+ * dma_request_channel().  Once a channel is allocated a reference is taken
+ * against its corresponding driver to disable removal.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * See Documentation/driver-api/dmaengine for more details
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+#include <linux/of_dma.h>
+#include <linux/mempool.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static DEFINE_IDA(dma_ida);
+static LIST_HEAD(dma_device_list);
+static long dmaengine_ref_count;
+
+/* --- sysfs implementation --- */
+
+/**
+ * dev_to_dma_chan - convert a device pointer to the its sysfs container object
+ * @dev - device node
+ *
+ * Must be called under dma_list_mutex
+ */
+static struct dma_chan *dev_to_dma_chan(struct device *dev)
+{
+	struct dma_chan_dev *chan_dev;
+
+	chan_dev = container_of(dev, typeof(*chan_dev), device);
+	return chan_dev->chan;
+}
+
+static ssize_t memcpy_count_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan;
+	unsigned long count = 0;
+	int i;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan) {
+		for_each_possible_cpu(i)
+			count += per_cpu_ptr(chan->local, i)->memcpy_count;
+		err = sprintf(buf, "%lu\n", count);
+	} else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+static DEVICE_ATTR_RO(memcpy_count);
+
+static ssize_t bytes_transferred_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan;
+	unsigned long count = 0;
+	int i;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan) {
+		for_each_possible_cpu(i)
+			count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+		err = sprintf(buf, "%lu\n", count);
+	} else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+static DEVICE_ATTR_RO(bytes_transferred);
+
+static ssize_t in_use_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct dma_chan *chan;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	chan = dev_to_dma_chan(dev);
+	if (chan)
+		err = sprintf(buf, "%d\n", chan->client_count);
+	else
+		err = -ENODEV;
+	mutex_unlock(&dma_list_mutex);
+
+	return err;
+}
+static DEVICE_ATTR_RO(in_use);
+
+static struct attribute *dma_dev_attrs[] = {
+	&dev_attr_memcpy_count.attr,
+	&dev_attr_bytes_transferred.attr,
+	&dev_attr_in_use.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(dma_dev);
+
+static void chan_dev_release(struct device *dev)
+{
+	struct dma_chan_dev *chan_dev;
+
+	chan_dev = container_of(dev, typeof(*chan_dev), device);
+	if (atomic_dec_and_test(chan_dev->idr_ref)) {
+		ida_free(&dma_ida, chan_dev->dev_id);
+		kfree(chan_dev->idr_ref);
+	}
+	kfree(chan_dev);
+}
+
+static struct class dma_devclass = {
+	.name		= "dma",
+	.dev_groups	= dma_dev_groups,
+	.dev_release	= chan_dev_release,
+};
+
+/* --- client and device registration --- */
+
+#define dma_device_satisfies_mask(device, mask) \
+	__dma_device_satisfies_mask((device), &(mask))
+static int
+__dma_device_satisfies_mask(struct dma_device *device,
+			    const dma_cap_mask_t *want)
+{
+	dma_cap_mask_t has;
+
+	bitmap_and(has.bits, want->bits, device->cap_mask.bits,
+		DMA_TX_TYPE_END);
+	return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+static struct module *dma_chan_to_owner(struct dma_chan *chan)
+{
+	return chan->device->dev->driver->owner;
+}
+
+/**
+ * balance_ref_count - catch up the channel reference count
+ * @chan - channel to balance ->client_count versus dmaengine_ref_count
+ *
+ * balance_ref_count must be called under dma_list_mutex
+ */
+static void balance_ref_count(struct dma_chan *chan)
+{
+	struct module *owner = dma_chan_to_owner(chan);
+
+	while (chan->client_count < dmaengine_ref_count) {
+		__module_get(owner);
+		chan->client_count++;
+	}
+}
+
+/**
+ * dma_chan_get - try to grab a dma channel's parent driver module
+ * @chan - channel to grab
+ *
+ * Must be called under dma_list_mutex
+ */
+static int dma_chan_get(struct dma_chan *chan)
+{
+	struct module *owner = dma_chan_to_owner(chan);
+	int ret;
+
+	/* The channel is already in use, update client count */
+	if (chan->client_count) {
+		__module_get(owner);
+		goto out;
+	}
+
+	if (!try_module_get(owner))
+		return -ENODEV;
+
+	/* allocate upon first client reference */
+	if (chan->device->device_alloc_chan_resources) {
+		ret = chan->device->device_alloc_chan_resources(chan);
+		if (ret < 0)
+			goto err_out;
+	}
+
+	if (!dma_has_cap(DMA_PRIVATE, chan->device->cap_mask))
+		balance_ref_count(chan);
+
+out:
+	chan->client_count++;
+	return 0;
+
+err_out:
+	module_put(owner);
+	return ret;
+}
+
+/**
+ * dma_chan_put - drop a reference to a dma channel's parent driver module
+ * @chan - channel to release
+ *
+ * Must be called under dma_list_mutex
+ */
+static void dma_chan_put(struct dma_chan *chan)
+{
+	/* This channel is not in use, bail out */
+	if (!chan->client_count)
+		return;
+
+	chan->client_count--;
+	module_put(dma_chan_to_owner(chan));
+
+	/* This channel is not in use anymore, free it */
+	if (!chan->client_count && chan->device->device_free_chan_resources) {
+		/* Make sure all operations have completed */
+		dmaengine_synchronize(chan);
+		chan->device->device_free_chan_resources(chan);
+	}
+
+	/* If the channel is used via a DMA request router, free the mapping */
+	if (chan->router && chan->router->route_free) {
+		chan->router->route_free(chan->router->dev, chan->route_data);
+		chan->router = NULL;
+		chan->route_data = NULL;
+	}
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+	enum dma_status status;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+	dma_async_issue_pending(chan);
+	do {
+		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			dev_err(chan->device->dev, "%s: timeout!\n", __func__);
+			return DMA_ERROR;
+		}
+		if (status != DMA_IN_PROGRESS)
+			break;
+		cpu_relax();
+	} while (1);
+
+	return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_cap_mask_all - enable iteration over all operation types
+ */
+static dma_cap_mask_t dma_cap_mask_all;
+
+/**
+ * dma_chan_tbl_ent - tracks channel allocations per core/operation
+ * @chan - associated channel for this entry
+ */
+struct dma_chan_tbl_ent {
+	struct dma_chan *chan;
+};
+
+/**
+ * channel_table - percpu lookup table for memory-to-memory offload providers
+ */
+static struct dma_chan_tbl_ent __percpu *channel_table[DMA_TX_TYPE_END];
+
+static int __init dma_channel_table_init(void)
+{
+	enum dma_transaction_type cap;
+	int err = 0;
+
+	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
+
+	/* 'interrupt', 'private', and 'slave' are channel capabilities,
+	 * but are not associated with an operation so they do not need
+	 * an entry in the channel_table
+	 */
+	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
+	clear_bit(DMA_PRIVATE, dma_cap_mask_all.bits);
+	clear_bit(DMA_SLAVE, dma_cap_mask_all.bits);
+
+	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
+		channel_table[cap] = alloc_percpu(struct dma_chan_tbl_ent);
+		if (!channel_table[cap]) {
+			err = -ENOMEM;
+			break;
+		}
+	}
+
+	if (err) {
+		pr_err("initialization failure\n");
+		for_each_dma_cap_mask(cap, dma_cap_mask_all)
+			free_percpu(channel_table[cap]);
+	}
+
+	return err;
+}
+arch_initcall(dma_channel_table_init);
+
+/**
+ * dma_find_channel - find a channel to carry out the operation
+ * @tx_type: transaction type
+ */
+struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
+{
+	return this_cpu_read(channel_table[tx_type]->chan);
+}
+EXPORT_SYMBOL(dma_find_channel);
+
+/**
+ * dma_issue_pending_all - flush all pending operations across all channels
+ */
+void dma_issue_pending_all(void)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			if (chan->client_count)
+				device->device_issue_pending(chan);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(dma_issue_pending_all);
+
+/**
+ * dma_chan_is_local - returns true if the channel is in the same numa-node as the cpu
+ */
+static bool dma_chan_is_local(struct dma_chan *chan, int cpu)
+{
+	int node = dev_to_node(chan->device->dev);
+	return node == -1 || cpumask_test_cpu(cpu, cpumask_of_node(node));
+}
+
+/**
+ * min_chan - returns the channel with min count and in the same numa-node as the cpu
+ * @cap: capability to match
+ * @cpu: cpu index which the channel should be close to
+ *
+ * If some channels are close to the given cpu, the one with the lowest
+ * reference count is returned. Otherwise, cpu is ignored and only the
+ * reference count is taken into account.
+ * Must be called under dma_list_mutex.
+ */
+static struct dma_chan *min_chan(enum dma_transaction_type cap, int cpu)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	struct dma_chan *min = NULL;
+	struct dma_chan *localmin = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (!dma_has_cap(cap, device->cap_mask) ||
+		    dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			if (!chan->client_count)
+				continue;
+			if (!min || chan->table_count < min->table_count)
+				min = chan;
+
+			if (dma_chan_is_local(chan, cpu))
+				if (!localmin ||
+				    chan->table_count < localmin->table_count)
+					localmin = chan;
+		}
+	}
+
+	chan = localmin ? localmin : min;
+
+	if (chan)
+		chan->table_count++;
+
+	return chan;
+}
+
+/**
+ * dma_channel_rebalance - redistribute the available channels
+ *
+ * Optimize for cpu isolation (each cpu gets a dedicated channel for an
+ * operation type) in the SMP case,  and operation isolation (avoid
+ * multi-tasking channels) in the non-SMP case.  Must be called under
+ * dma_list_mutex.
+ */
+static void dma_channel_rebalance(void)
+{
+	struct dma_chan *chan;
+	struct dma_device *device;
+	int cpu;
+	int cap;
+
+	/* undo the last distribution */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_possible_cpu(cpu)
+			per_cpu_ptr(channel_table[cap], cpu)->chan = NULL;
+
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			chan->table_count = 0;
+	}
+
+	/* don't populate the channel_table if no clients are available */
+	if (!dmaengine_ref_count)
+		return;
+
+	/* redistribute available channels */
+	for_each_dma_cap_mask(cap, dma_cap_mask_all)
+		for_each_online_cpu(cpu) {
+			chan = min_chan(cap, cpu);
+			per_cpu_ptr(channel_table[cap], cpu)->chan = chan;
+		}
+}
+
+int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+	struct dma_device *device;
+
+	if (!chan || !caps)
+		return -EINVAL;
+
+	device = chan->device;
+
+	/* check if the channel supports slave transactions */
+	if (!(test_bit(DMA_SLAVE, device->cap_mask.bits) ||
+	      test_bit(DMA_CYCLIC, device->cap_mask.bits)))
+		return -ENXIO;
+
+	/*
+	 * Check whether it reports it uses the generic slave
+	 * capabilities, if not, that means it doesn't support any
+	 * kind of slave capabilities reporting.
+	 */
+	if (!device->directions)
+		return -ENXIO;
+
+	caps->src_addr_widths = device->src_addr_widths;
+	caps->dst_addr_widths = device->dst_addr_widths;
+	caps->directions = device->directions;
+	caps->max_burst = device->max_burst;
+	caps->residue_granularity = device->residue_granularity;
+	caps->descriptor_reuse = device->descriptor_reuse;
+	caps->cmd_pause = !!device->device_pause;
+	caps->cmd_resume = !!device->device_resume;
+	caps->cmd_terminate = !!device->device_terminate_all;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_get_slave_caps);
+
+static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
+					  struct dma_device *dev,
+					  dma_filter_fn fn, void *fn_param)
+{
+	struct dma_chan *chan;
+
+	if (mask && !__dma_device_satisfies_mask(dev, mask)) {
+		dev_dbg(dev->dev, "%s: wrong capabilities\n", __func__);
+		return NULL;
+	}
+	/* devices with multiple channels need special handling as we need to
+	 * ensure that all channels are either private or public.
+	 */
+	if (dev->chancnt > 1 && !dma_has_cap(DMA_PRIVATE, dev->cap_mask))
+		list_for_each_entry(chan, &dev->channels, device_node) {
+			/* some channels are already publicly allocated */
+			if (chan->client_count)
+				return NULL;
+		}
+
+	list_for_each_entry(chan, &dev->channels, device_node) {
+		if (chan->client_count) {
+			dev_dbg(dev->dev, "%s: %s busy\n",
+				 __func__, dma_chan_name(chan));
+			continue;
+		}
+		if (fn && !fn(chan, fn_param)) {
+			dev_dbg(dev->dev, "%s: %s filter said false\n",
+				 __func__, dma_chan_name(chan));
+			continue;
+		}
+		return chan;
+	}
+
+	return NULL;
+}
+
+static struct dma_chan *find_candidate(struct dma_device *device,
+				       const dma_cap_mask_t *mask,
+				       dma_filter_fn fn, void *fn_param)
+{
+	struct dma_chan *chan = private_candidate(mask, device, fn, fn_param);
+	int err;
+
+	if (chan) {
+		/* Found a suitable channel, try to grab, prep, and return it.
+		 * We first set DMA_PRIVATE to disable balance_ref_count as this
+		 * channel will not be published in the general-purpose
+		 * allocator
+		 */
+		dma_cap_set(DMA_PRIVATE, device->cap_mask);
+		device->privatecnt++;
+		err = dma_chan_get(chan);
+
+		if (err) {
+			if (err == -ENODEV) {
+				dev_dbg(device->dev, "%s: %s module removed\n",
+					__func__, dma_chan_name(chan));
+				list_del_rcu(&device->global_node);
+			} else
+				dev_dbg(device->dev,
+					"%s: failed to get %s: (%d)\n",
+					 __func__, dma_chan_name(chan), err);
+
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+
+			chan = ERR_PTR(err);
+		}
+	}
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+
+/**
+ * dma_get_slave_channel - try to get specific channel exclusively
+ * @chan: target channel
+ */
+struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
+{
+	int err = -EBUSY;
+
+	/* lock against __dma_request_channel */
+	mutex_lock(&dma_list_mutex);
+
+	if (chan->client_count == 0) {
+		struct dma_device *device = chan->device;
+
+		dma_cap_set(DMA_PRIVATE, device->cap_mask);
+		device->privatecnt++;
+		err = dma_chan_get(chan);
+		if (err) {
+			dev_dbg(chan->device->dev,
+				"%s: failed to get %s: (%d)\n",
+				__func__, dma_chan_name(chan), err);
+			chan = NULL;
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+		}
+	} else
+		chan = NULL;
+
+	mutex_unlock(&dma_list_mutex);
+
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(dma_get_slave_channel);
+
+struct dma_chan *dma_get_any_slave_channel(struct dma_device *device)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* lock against __dma_request_channel */
+	mutex_lock(&dma_list_mutex);
+
+	chan = find_candidate(device, &mask, NULL, NULL);
+
+	mutex_unlock(&dma_list_mutex);
+
+	return IS_ERR(chan) ? NULL : chan;
+}
+EXPORT_SYMBOL_GPL(dma_get_any_slave_channel);
+
+/**
+ * __dma_request_channel - try to allocate an exclusive channel
+ * @mask: capabilities that the channel must satisfy
+ * @fn: optional callback to disposition available channels
+ * @fn_param: opaque parameter to pass to dma_filter_fn
+ *
+ * Returns pointer to appropriate DMA channel on success or NULL.
+ */
+struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
+				       dma_filter_fn fn, void *fn_param)
+{
+	struct dma_device *device, *_d;
+	struct dma_chan *chan = NULL;
+
+	/* Find a channel */
+	mutex_lock(&dma_list_mutex);
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+		chan = find_candidate(device, mask, fn, fn_param);
+		if (!IS_ERR(chan))
+			break;
+
+		chan = NULL;
+	}
+	mutex_unlock(&dma_list_mutex);
+
+	pr_debug("%s: %s (%s)\n",
+		 __func__,
+		 chan ? "success" : "fail",
+		 chan ? dma_chan_name(chan) : NULL);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(__dma_request_channel);
+
+static const struct dma_slave_map *dma_filter_match(struct dma_device *device,
+						    const char *name,
+						    struct device *dev)
+{
+	int i;
+
+	if (!device->filter.mapcnt)
+		return NULL;
+
+	for (i = 0; i < device->filter.mapcnt; i++) {
+		const struct dma_slave_map *map = &device->filter.map[i];
+
+		if (!strcmp(map->devname, dev_name(dev)) &&
+		    !strcmp(map->slave, name))
+			return map;
+	}
+
+	return NULL;
+}
+
+/**
+ * dma_request_chan - try to allocate an exclusive slave channel
+ * @dev:	pointer to client device structure
+ * @name:	slave channel name
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan(struct device *dev, const char *name)
+{
+	struct dma_device *d, *_d;
+	struct dma_chan *chan = NULL;
+
+	/* If device-tree is present get slave info from here */
+	if (dev->of_node)
+		chan = of_dma_request_slave_channel(dev->of_node, name);
+
+	/* If device was enumerated by ACPI get slave info from here */
+	if (has_acpi_companion(dev) && !chan)
+		chan = acpi_dma_request_slave_chan_by_name(dev, name);
+
+	if (chan) {
+		/* Valid channel found or requester need to be deferred */
+		if (!IS_ERR(chan) || PTR_ERR(chan) == -EPROBE_DEFER)
+			return chan;
+	}
+
+	/* Try to find the channel via the DMA filter map(s) */
+	mutex_lock(&dma_list_mutex);
+	list_for_each_entry_safe(d, _d, &dma_device_list, global_node) {
+		dma_cap_mask_t mask;
+		const struct dma_slave_map *map = dma_filter_match(d, name, dev);
+
+		if (!map)
+			continue;
+
+		dma_cap_zero(mask);
+		dma_cap_set(DMA_SLAVE, mask);
+
+		chan = find_candidate(d, &mask, d->filter.fn, map->param);
+		if (!IS_ERR(chan))
+			break;
+	}
+	mutex_unlock(&dma_list_mutex);
+
+	return chan ? chan : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL_GPL(dma_request_chan);
+
+/**
+ * dma_request_slave_channel - try to allocate an exclusive slave channel
+ * @dev:	pointer to client device structure
+ * @name:	slave channel name
+ *
+ * Returns pointer to appropriate DMA channel on success or NULL.
+ */
+struct dma_chan *dma_request_slave_channel(struct device *dev,
+					   const char *name)
+{
+	struct dma_chan *ch = dma_request_chan(dev, name);
+	if (IS_ERR(ch))
+		return NULL;
+
+	return ch;
+}
+EXPORT_SYMBOL_GPL(dma_request_slave_channel);
+
+/**
+ * dma_request_chan_by_mask - allocate a channel satisfying certain capabilities
+ * @mask: capabilities that the channel must satisfy
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *dma_request_chan_by_mask(const dma_cap_mask_t *mask)
+{
+	struct dma_chan *chan;
+
+	if (!mask)
+		return ERR_PTR(-ENODEV);
+
+	chan = __dma_request_channel(mask, NULL, NULL);
+	if (!chan) {
+		mutex_lock(&dma_list_mutex);
+		if (list_empty(&dma_device_list))
+			chan = ERR_PTR(-EPROBE_DEFER);
+		else
+			chan = ERR_PTR(-ENODEV);
+		mutex_unlock(&dma_list_mutex);
+	}
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(dma_request_chan_by_mask);
+
+void dma_release_channel(struct dma_chan *chan)
+{
+	mutex_lock(&dma_list_mutex);
+	WARN_ONCE(chan->client_count != 1,
+		  "chan reference count %d != 1\n", chan->client_count);
+	dma_chan_put(chan);
+	/* drop PRIVATE cap enabled by __dma_request_channel() */
+	if (--chan->device->privatecnt == 0)
+		dma_cap_clear(DMA_PRIVATE, chan->device->cap_mask);
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL_GPL(dma_release_channel);
+
+/**
+ * dmaengine_get - register interest in dma_channels
+ */
+void dmaengine_get(void)
+{
+	struct dma_device *device, *_d;
+	struct dma_chan *chan;
+	int err;
+
+	mutex_lock(&dma_list_mutex);
+	dmaengine_ref_count++;
+
+	/* try to grab channels */
+	list_for_each_entry_safe(device, _d, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node) {
+			err = dma_chan_get(chan);
+			if (err == -ENODEV) {
+				/* module removed before we could use it */
+				list_del_rcu(&device->global_node);
+				break;
+			} else if (err)
+				dev_dbg(chan->device->dev,
+					"%s: failed to get %s: (%d)\n",
+					__func__, dma_chan_name(chan), err);
+		}
+	}
+
+	/* if this is the first reference and there were channels
+	 * waiting we need to rebalance to get those channels
+	 * incorporated into the channel table
+	 */
+	if (dmaengine_ref_count == 1)
+		dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_get);
+
+/**
+ * dmaengine_put - let dma drivers be removed when ref_count == 0
+ */
+void dmaengine_put(void)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+
+	mutex_lock(&dma_list_mutex);
+	dmaengine_ref_count--;
+	BUG_ON(dmaengine_ref_count < 0);
+	/* drop channel references */
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+			continue;
+		list_for_each_entry(chan, &device->channels, device_node)
+			dma_chan_put(chan);
+	}
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dmaengine_put);
+
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+	/* A device that satisfies this test has channels that will never cause
+	 * an async_tx channel switch event as all possible operation types can
+	 * be handled.
+	 */
+	#ifdef CONFIG_ASYNC_TX_DMA
+	if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+		return false;
+	#endif
+
+	#if IS_ENABLED(CONFIG_ASYNC_MEMCPY)
+	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+		return false;
+	#endif
+
+	#if IS_ENABLED(CONFIG_ASYNC_XOR)
+	if (!dma_has_cap(DMA_XOR, device->cap_mask))
+		return false;
+
+	#ifndef CONFIG_ASYNC_TX_DISABLE_XOR_VAL_DMA
+	if (!dma_has_cap(DMA_XOR_VAL, device->cap_mask))
+		return false;
+	#endif
+	#endif
+
+	#if IS_ENABLED(CONFIG_ASYNC_PQ)
+	if (!dma_has_cap(DMA_PQ, device->cap_mask))
+		return false;
+
+	#ifndef CONFIG_ASYNC_TX_DISABLE_PQ_VAL_DMA
+	if (!dma_has_cap(DMA_PQ_VAL, device->cap_mask))
+		return false;
+	#endif
+	#endif
+
+	return true;
+}
+
+static int get_dma_id(struct dma_device *device)
+{
+	int rc = ida_alloc(&dma_ida, GFP_KERNEL);
+
+	if (rc < 0)
+		return rc;
+	device->dev_id = rc;
+	return 0;
+}
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+	int chancnt = 0, rc;
+	struct dma_chan* chan;
+	atomic_t *idr_ref;
+
+	if (!device)
+		return -ENODEV;
+
+	/* validate device routines */
+	if (!device->dev) {
+		pr_err("DMAdevice must have dev\n");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_MEMCPY");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_XOR, device->cap_mask) && !device->device_prep_dma_xor) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_XOR");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_XOR_VAL, device->cap_mask) && !device->device_prep_dma_xor_val) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_XOR_VAL");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_PQ, device->cap_mask) && !device->device_prep_dma_pq) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_PQ");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_PQ_VAL, device->cap_mask) && !device->device_prep_dma_pq_val) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_PQ_VAL");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_MEMSET, device->cap_mask) && !device->device_prep_dma_memset) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_MEMSET");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_INTERRUPT");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_CYCLIC, device->cap_mask) && !device->device_prep_dma_cyclic) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_CYCLIC");
+		return -EIO;
+	}
+
+	if (dma_has_cap(DMA_INTERLEAVE, device->cap_mask) && !device->device_prep_interleaved_dma) {
+		dev_err(device->dev,
+			"Device claims capability %s, but op is not defined\n",
+			"DMA_INTERLEAVE");
+		return -EIO;
+	}
+
+
+	if (!device->device_tx_status) {
+		dev_err(device->dev, "Device tx_status is not defined\n");
+		return -EIO;
+	}
+
+
+	if (!device->device_issue_pending) {
+		dev_err(device->dev, "Device issue_pending is not defined\n");
+		return -EIO;
+	}
+
+	/* note: this only matters in the
+	 * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
+	 */
+	if (device_has_all_tx_types(device))
+		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
+	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
+	if (!idr_ref)
+		return -ENOMEM;
+	rc = get_dma_id(device);
+	if (rc != 0) {
+		kfree(idr_ref);
+		return rc;
+	}
+
+	atomic_set(idr_ref, 0);
+
+	/* represent channels in sysfs. Probably want devs too */
+	list_for_each_entry(chan, &device->channels, device_node) {
+		rc = -ENOMEM;
+		chan->local = alloc_percpu(typeof(*chan->local));
+		if (chan->local == NULL)
+			goto err_out;
+		chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
+		if (chan->dev == NULL) {
+			free_percpu(chan->local);
+			chan->local = NULL;
+			goto err_out;
+		}
+
+		chan->chan_id = chancnt++;
+		chan->dev->device.class = &dma_devclass;
+		chan->dev->device.parent = device->dev;
+		chan->dev->chan = chan;
+		chan->dev->idr_ref = idr_ref;
+		chan->dev->dev_id = device->dev_id;
+		atomic_inc(idr_ref);
+		dev_set_name(&chan->dev->device, "dma%dchan%d",
+			     device->dev_id, chan->chan_id);
+
+		rc = device_register(&chan->dev->device);
+		if (rc) {
+			free_percpu(chan->local);
+			chan->local = NULL;
+			kfree(chan->dev);
+			atomic_dec(idr_ref);
+			goto err_out;
+		}
+		chan->client_count = 0;
+	}
+
+	if (!chancnt) {
+		dev_err(device->dev, "%s: device has no channels!\n", __func__);
+		rc = -ENODEV;
+		goto err_out;
+	}
+
+	device->chancnt = chancnt;
+
+	mutex_lock(&dma_list_mutex);
+	/* take references on public channels */
+	if (dmaengine_ref_count && !dma_has_cap(DMA_PRIVATE, device->cap_mask))
+		list_for_each_entry(chan, &device->channels, device_node) {
+			/* if clients are already waiting for channels we need
+			 * to take references on their behalf
+			 */
+			if (dma_chan_get(chan) == -ENODEV) {
+				/* note we can only get here for the first
+				 * channel as the remaining channels are
+				 * guaranteed to get a reference
+				 */
+				rc = -ENODEV;
+				mutex_unlock(&dma_list_mutex);
+				goto err_out;
+			}
+		}
+	list_add_tail_rcu(&device->global_node, &dma_device_list);
+	if (dma_has_cap(DMA_PRIVATE, device->cap_mask))
+		device->privatecnt++;	/* Always private */
+	dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+
+	return 0;
+
+err_out:
+	/* if we never registered a channel just release the idr */
+	if (atomic_read(idr_ref) == 0) {
+		ida_free(&dma_ida, device->dev_id);
+		kfree(idr_ref);
+		return rc;
+	}
+
+	list_for_each_entry(chan, &device->channels, device_node) {
+		if (chan->local == NULL)
+			continue;
+		mutex_lock(&dma_list_mutex);
+		chan->dev->chan = NULL;
+		mutex_unlock(&dma_list_mutex);
+		device_unregister(&chan->dev->device);
+		free_percpu(chan->local);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_unregister - unregister a DMA device
+ * @device: &dma_device
+ *
+ * This routine is called by dma driver exit routines, dmaengine holds module
+ * references to prevent it being called while channels are in use.
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+	struct dma_chan *chan;
+
+	mutex_lock(&dma_list_mutex);
+	list_del_rcu(&device->global_node);
+	dma_channel_rebalance();
+	mutex_unlock(&dma_list_mutex);
+
+	list_for_each_entry(chan, &device->channels, device_node) {
+		WARN_ONCE(chan->client_count,
+			  "%s called while %d clients hold a reference\n",
+			  __func__, chan->client_count);
+		mutex_lock(&dma_list_mutex);
+		chan->dev->chan = NULL;
+		mutex_unlock(&dma_list_mutex);
+		device_unregister(&chan->dev->device);
+		free_percpu(chan->local);
+	}
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+static void dmam_device_release(struct device *dev, void *res)
+{
+	struct dma_device *device;
+
+	device = *(struct dma_device **)res;
+	dma_async_device_unregister(device);
+}
+
+/**
+ * dmaenginem_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ *
+ * The operation is managed and will be undone on driver detach.
+ */
+int dmaenginem_async_device_register(struct dma_device *device)
+{
+	void *p;
+	int ret;
+
+	p = devres_alloc(dmam_device_release, sizeof(void *), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = dma_async_device_register(device);
+	if (!ret) {
+		*(struct dma_device **)p = device;
+		devres_add(device->dev, p);
+	} else {
+		devres_free(p);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(dmaenginem_async_device_register);
+
+struct dmaengine_unmap_pool {
+	struct kmem_cache *cache;
+	const char *name;
+	mempool_t *pool;
+	size_t size;
+};
+
+#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) }
+static struct dmaengine_unmap_pool unmap_pool[] = {
+	__UNMAP_POOL(2),
+	#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+	__UNMAP_POOL(16),
+	__UNMAP_POOL(128),
+	__UNMAP_POOL(256),
+	#endif
+};
+
+static struct dmaengine_unmap_pool *__get_unmap_pool(int nr)
+{
+	int order = get_count_order(nr);
+
+	switch (order) {
+	case 0 ... 1:
+		return &unmap_pool[0];
+#if IS_ENABLED(CONFIG_DMA_ENGINE_RAID)
+	case 2 ... 4:
+		return &unmap_pool[1];
+	case 5 ... 7:
+		return &unmap_pool[2];
+	case 8:
+		return &unmap_pool[3];
+#endif
+	default:
+		BUG();
+		return NULL;
+	}
+}
+
+static void dmaengine_unmap(struct kref *kref)
+{
+	struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref);
+	struct device *dev = unmap->dev;
+	int cnt, i;
+
+	cnt = unmap->to_cnt;
+	for (i = 0; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_TO_DEVICE);
+	cnt += unmap->from_cnt;
+	for (; i < cnt; i++)
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_FROM_DEVICE);
+	cnt += unmap->bidi_cnt;
+	for (; i < cnt; i++) {
+		if (unmap->addr[i] == 0)
+			continue;
+		dma_unmap_page(dev, unmap->addr[i], unmap->len,
+			       DMA_BIDIRECTIONAL);
+	}
+	cnt = unmap->map_cnt;
+	mempool_free(unmap, __get_unmap_pool(cnt)->pool);
+}
+
+void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap)
+{
+	if (unmap)
+		kref_put(&unmap->kref, dmaengine_unmap);
+}
+EXPORT_SYMBOL_GPL(dmaengine_unmap_put);
+
+static void dmaengine_destroy_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+
+		mempool_destroy(p->pool);
+		p->pool = NULL;
+		kmem_cache_destroy(p->cache);
+		p->cache = NULL;
+	}
+}
+
+static int __init dmaengine_init_unmap_pool(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
+		struct dmaengine_unmap_pool *p = &unmap_pool[i];
+		size_t size;
+
+		size = sizeof(struct dmaengine_unmap_data) +
+		       sizeof(dma_addr_t) * p->size;
+
+		p->cache = kmem_cache_create(p->name, size, 0,
+					     SLAB_HWCACHE_ALIGN, NULL);
+		if (!p->cache)
+			break;
+		p->pool = mempool_create_slab_pool(1, p->cache);
+		if (!p->pool)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(unmap_pool))
+		return 0;
+
+	dmaengine_destroy_unmap_pool();
+	return -ENOMEM;
+}
+
+struct dmaengine_unmap_data *
+dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
+{
+	struct dmaengine_unmap_data *unmap;
+
+	unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags);
+	if (!unmap)
+		return NULL;
+
+	memset(unmap, 0, sizeof(*unmap));
+	kref_init(&unmap->kref);
+	unmap->dev = dev;
+	unmap->map_cnt = nr;
+
+	return unmap;
+}
+EXPORT_SYMBOL(dmaengine_get_unmap_data);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+	struct dma_chan *chan)
+{
+	tx->chan = chan;
+	#ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	spin_lock_init(&tx->lock);
+	#endif
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+/* dma_wait_for_async_tx - spin wait for a transaction to complete
+ * @tx: in-flight transaction to wait on
+ */
+enum dma_status
+dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
+{
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+	if (!tx)
+		return DMA_COMPLETE;
+
+	while (tx->cookie == -EBUSY) {
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			dev_err(tx->chan->device->dev,
+				"%s timeout waiting for descriptor submission\n",
+				__func__);
+			return DMA_ERROR;
+		}
+		cpu_relax();
+	}
+	return dma_sync_wait(tx->chan, tx->cookie);
+}
+EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
+
+/* dma_run_dependencies - helper routine for dma drivers to process
+ *	(start) dependent operations on their target channel
+ * @tx: transaction with dependencies
+ */
+void dma_run_dependencies(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_async_tx_descriptor *dep = txd_next(tx);
+	struct dma_async_tx_descriptor *dep_next;
+	struct dma_chan *chan;
+
+	if (!dep)
+		return;
+
+	/* we'll submit tx->next now, so clear the link */
+	txd_clear_next(tx);
+	chan = dep->chan;
+
+	/* keep submitting up until a channel switch is detected
+	 * in that case we will be called again as a result of
+	 * processing the interrupt from async_tx_channel_switch
+	 */
+	for (; dep; dep = dep_next) {
+		txd_lock(dep);
+		txd_clear_parent(dep);
+		dep_next = txd_next(dep);
+		if (dep_next && dep_next->chan == chan)
+			txd_clear_next(dep); /* ->next will be submitted */
+		else
+			dep_next = NULL; /* submit current dep and terminate */
+		txd_unlock(dep);
+
+		dep->tx_submit(dep);
+	}
+
+	chan->device->device_issue_pending(chan);
+}
+EXPORT_SYMBOL_GPL(dma_run_dependencies);
+
+static int __init dma_bus_init(void)
+{
+	int err = dmaengine_init_unmap_pool();
+
+	if (err)
+		return err;
+	return class_register(&dma_devclass);
+}
+arch_initcall(dma_bus_init);
+
+
diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
new file mode 100644
index 0000000..501c0b0
--- /dev/null
+++ b/drivers/dma/dmaengine.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The contents of this file are private to DMA engine drivers, and is not
+ * part of the API to be used by DMA engine users.
+ */
+#ifndef DMAENGINE_H
+#define DMAENGINE_H
+
+#include <linux/bug.h>
+#include <linux/dmaengine.h>
+
+/**
+ * dma_cookie_init - initialize the cookies for a DMA channel
+ * @chan: dma channel to initialize
+ */
+static inline void dma_cookie_init(struct dma_chan *chan)
+{
+	chan->cookie = DMA_MIN_COOKIE;
+	chan->completed_cookie = DMA_MIN_COOKIE;
+}
+
+/**
+ * dma_cookie_assign - assign a DMA engine cookie to the descriptor
+ * @tx: descriptor needing cookie
+ *
+ * Assign a unique non-zero per-channel cookie to the descriptor.
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline dma_cookie_t dma_cookie_assign(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_chan *chan = tx->chan;
+	dma_cookie_t cookie;
+
+	cookie = chan->cookie + 1;
+	if (cookie < DMA_MIN_COOKIE)
+		cookie = DMA_MIN_COOKIE;
+	tx->cookie = chan->cookie = cookie;
+
+	return cookie;
+}
+
+/**
+ * dma_cookie_complete - complete a descriptor
+ * @tx: descriptor to complete
+ *
+ * Mark this descriptor complete by updating the channels completed
+ * cookie marker.  Zero the descriptors cookie to prevent accidental
+ * repeated completions.
+ *
+ * Note: caller is expected to hold a lock to prevent concurrency.
+ */
+static inline void dma_cookie_complete(struct dma_async_tx_descriptor *tx)
+{
+	BUG_ON(tx->cookie < DMA_MIN_COOKIE);
+	tx->chan->completed_cookie = tx->cookie;
+	tx->cookie = 0;
+}
+
+/**
+ * dma_cookie_status - report cookie status
+ * @chan: dma channel
+ * @cookie: cookie we are interested in
+ * @state: dma_tx_state structure to return last/used cookies
+ *
+ * Report the status of the cookie, filling in the state structure if
+ * non-NULL.  No locking is required.
+ */
+static inline enum dma_status dma_cookie_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	dma_cookie_t used, complete;
+
+	used = chan->cookie;
+	complete = chan->completed_cookie;
+	barrier();
+	if (state) {
+		state->last = complete;
+		state->used = used;
+		state->residue = 0;
+	}
+	return dma_async_is_complete(cookie, complete, used);
+}
+
+static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
+{
+	if (state)
+		state->residue = residue;
+}
+
+struct dmaengine_desc_callback {
+	dma_async_tx_callback callback;
+	dma_async_tx_callback_result callback_result;
+	void *callback_param;
+};
+
+/**
+ * dmaengine_desc_get_callback - get the passed in callback function
+ * @tx: tx descriptor
+ * @cb: temp struct to hold the callback info
+ *
+ * Fill the passed in cb struct with what's available in the passed in
+ * tx descriptor struct
+ * No locking is required.
+ */
+static inline void
+dmaengine_desc_get_callback(struct dma_async_tx_descriptor *tx,
+			    struct dmaengine_desc_callback *cb)
+{
+	cb->callback = tx->callback;
+	cb->callback_result = tx->callback_result;
+	cb->callback_param = tx->callback_param;
+}
+
+/**
+ * dmaengine_desc_callback_invoke - call the callback function in cb struct
+ * @cb: temp struct that is holding the callback info
+ * @result: transaction result
+ *
+ * Call the callback function provided in the cb struct with the parameter
+ * in the cb struct.
+ * Locking is dependent on the driver.
+ */
+static inline void
+dmaengine_desc_callback_invoke(struct dmaengine_desc_callback *cb,
+			       const struct dmaengine_result *result)
+{
+	struct dmaengine_result dummy_result = {
+		.result = DMA_TRANS_NOERROR,
+		.residue = 0
+	};
+
+	if (cb->callback_result) {
+		if (!result)
+			result = &dummy_result;
+		cb->callback_result(cb->callback_param, result);
+	} else if (cb->callback) {
+		cb->callback(cb->callback_param);
+	}
+}
+
+/**
+ * dmaengine_desc_get_callback_invoke - get the callback in tx descriptor and
+ * 					then immediately call the callback.
+ * @tx: dma async tx descriptor
+ * @result: transaction result
+ *
+ * Call dmaengine_desc_get_callback() and dmaengine_desc_callback_invoke()
+ * in a single function since no work is necessary in between for the driver.
+ * Locking is dependent on the driver.
+ */
+static inline void
+dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx,
+				   const struct dmaengine_result *result)
+{
+	struct dmaengine_desc_callback cb;
+
+	dmaengine_desc_get_callback(tx, &cb);
+	dmaengine_desc_callback_invoke(&cb, result);
+}
+
+/**
+ * dmaengine_desc_callback_valid - verify the callback is valid in cb
+ * @cb: callback info struct
+ *
+ * Return a bool that verifies whether callback in cb is valid or not.
+ * No locking is required.
+ */
+static inline bool
+dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb)
+{
+	return (cb->callback) ? true : false;
+}
+
+#endif
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 0000000..aa1712b
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,1107 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/freezer.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/sched/task.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[20];
+module_param_string(channel, test_channel, sizeof(test_channel),
+		S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[32];
+module_param_string(device, test_device, sizeof(test_device),
+		S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(threads_per_chan,
+		"Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(max_channels,
+		"Maximum number of channels to use (default: all)");
+
+static unsigned int iterations;
+module_param(iterations, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(iterations,
+		"Iterations before stopping test (default: infinite)");
+
+static unsigned int dmatest;
+module_param(dmatest, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dmatest,
+		"dmatest 0-memcpy 1-memset (default: 0)");
+
+static unsigned int xor_sources = 3;
+module_param(xor_sources, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(xor_sources,
+		"Number of xor source buffers (default: 3)");
+
+static unsigned int pq_sources = 3;
+module_param(pq_sources, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(pq_sources,
+		"Number of p+q source buffers (default: 3)");
+
+static int timeout = 3000;
+module_param(timeout, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), "
+		 "Pass -1 for infinite timeout");
+
+static bool noverify;
+module_param(noverify, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(noverify, "Disable data verification (default: verify)");
+
+static bool norandom;
+module_param(norandom, bool, 0644);
+MODULE_PARM_DESC(norandom, "Disable random offset setup (default: random)");
+
+static bool verbose;
+module_param(verbose, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)");
+
+/**
+ * struct dmatest_params - test parameters.
+ * @buf_size:		size of the memcpy test buffer
+ * @channel:		bus ID of the channel to test
+ * @device:		bus ID of the DMA Engine to test
+ * @threads_per_chan:	number of threads to start per channel
+ * @max_channels:	maximum number of channels to use
+ * @iterations:		iterations before stopping test
+ * @xor_sources:	number of xor source buffers
+ * @pq_sources:		number of p+q source buffers
+ * @timeout:		transfer timeout in msec, -1 for infinite timeout
+ */
+struct dmatest_params {
+	unsigned int	buf_size;
+	char		channel[20];
+	char		device[32];
+	unsigned int	threads_per_chan;
+	unsigned int	max_channels;
+	unsigned int	iterations;
+	unsigned int	xor_sources;
+	unsigned int	pq_sources;
+	int		timeout;
+	bool		noverify;
+	bool		norandom;
+};
+
+/**
+ * struct dmatest_info - test information.
+ * @params:		test parameters
+ * @lock:		access protection to the fields of this structure
+ */
+static struct dmatest_info {
+	/* Test parameters */
+	struct dmatest_params	params;
+
+	/* Internal state */
+	struct list_head	channels;
+	unsigned int		nr_channels;
+	struct mutex		lock;
+	bool			did_init;
+} test_info = {
+	.channels = LIST_HEAD_INIT(test_info.channels),
+	.lock = __MUTEX_INITIALIZER(test_info.lock),
+};
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp);
+static int dmatest_run_get(char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops run_ops = {
+	.set = dmatest_run_set,
+	.get = dmatest_run_get,
+};
+static bool dmatest_run;
+module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(run, "Run the test (default: false)");
+
+/* Maximum amount of mismatched bytes in buffer to print */
+#define MAX_ERROR_COUNT		32
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC		0x80
+#define PATTERN_DST		0x00
+#define PATTERN_COPY		0x40
+#define PATTERN_OVERWRITE	0x20
+#define PATTERN_COUNT_MASK	0x1f
+#define PATTERN_MEMSET_IDX	0x01
+
+/* poor man's completion - we want to use wait_event_freezable() on it */
+struct dmatest_done {
+	bool			done;
+	wait_queue_head_t	*wait;
+};
+
+struct dmatest_thread {
+	struct list_head	node;
+	struct dmatest_info	*info;
+	struct task_struct	*task;
+	struct dma_chan		*chan;
+	u8			**srcs;
+	u8			**usrcs;
+	u8			**dsts;
+	u8			**udsts;
+	enum dma_transaction_type type;
+	wait_queue_head_t done_wait;
+	struct dmatest_done test_done;
+	bool			done;
+};
+
+struct dmatest_chan {
+	struct list_head	node;
+	struct dma_chan		*chan;
+	struct list_head	threads;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(thread_wait);
+static bool wait;
+
+static bool is_threaded_test_run(struct dmatest_info *info)
+{
+	struct dmatest_chan *dtc;
+
+	list_for_each_entry(dtc, &info->channels, node) {
+		struct dmatest_thread *thread;
+
+		list_for_each_entry(thread, &dtc->threads, node) {
+			if (!thread->done)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+static int dmatest_wait_get(char *val, const struct kernel_param *kp)
+{
+	struct dmatest_info *info = &test_info;
+	struct dmatest_params *params = &info->params;
+
+	if (params->iterations)
+		wait_event(thread_wait, !is_threaded_test_run(info));
+	wait = true;
+	return param_get_bool(val, kp);
+}
+
+static const struct kernel_param_ops wait_ops = {
+	.get = dmatest_wait_get,
+	.set = param_set_bool,
+};
+module_param_cb(wait, &wait_ops, &wait, S_IRUGO);
+MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)");
+
+static bool dmatest_match_channel(struct dmatest_params *params,
+		struct dma_chan *chan)
+{
+	if (params->channel[0] == '\0')
+		return true;
+	return strcmp(dma_chan_name(chan), params->channel) == 0;
+}
+
+static bool dmatest_match_device(struct dmatest_params *params,
+		struct dma_device *device)
+{
+	if (params->device[0] == '\0')
+		return true;
+	return strcmp(dev_name(device->dev), params->device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+	unsigned long buf;
+
+	prandom_bytes(&buf, sizeof(buf));
+	return buf;
+}
+
+static inline u8 gen_inv_idx(u8 index, bool is_memset)
+{
+	u8 val = is_memset ? PATTERN_MEMSET_IDX : index;
+
+	return ~val & PATTERN_COUNT_MASK;
+}
+
+static inline u8 gen_src_value(u8 index, bool is_memset)
+{
+	return PATTERN_SRC | gen_inv_idx(index, is_memset);
+}
+
+static inline u8 gen_dst_value(u8 index, bool is_memset)
+{
+	return PATTERN_DST | gen_inv_idx(index, is_memset);
+}
+
+static void dmatest_init_srcs(u8 **bufs, unsigned int start, unsigned int len,
+		unsigned int buf_size, bool is_memset)
+{
+	unsigned int i;
+	u8 *buf;
+
+	for (; (buf = *bufs); bufs++) {
+		for (i = 0; i < start; i++)
+			buf[i] = gen_src_value(i, is_memset);
+		for ( ; i < start + len; i++)
+			buf[i] = gen_src_value(i, is_memset) | PATTERN_COPY;
+		for ( ; i < buf_size; i++)
+			buf[i] = gen_src_value(i, is_memset);
+		buf++;
+	}
+}
+
+static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len,
+		unsigned int buf_size, bool is_memset)
+{
+	unsigned int i;
+	u8 *buf;
+
+	for (; (buf = *bufs); bufs++) {
+		for (i = 0; i < start; i++)
+			buf[i] = gen_dst_value(i, is_memset);
+		for ( ; i < start + len; i++)
+			buf[i] = gen_dst_value(i, is_memset) |
+						PATTERN_OVERWRITE;
+		for ( ; i < buf_size; i++)
+			buf[i] = gen_dst_value(i, is_memset);
+	}
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+		unsigned int counter, bool is_srcbuf, bool is_memset)
+{
+	u8		diff = actual ^ pattern;
+	u8		expected = pattern | gen_inv_idx(counter, is_memset);
+	const char	*thread_name = current->comm;
+
+	if (is_srcbuf)
+		pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else if ((pattern & PATTERN_COPY)
+			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+		pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else if (diff & PATTERN_SRC)
+		pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+	else
+		pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n",
+			thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
+		unsigned int end, unsigned int counter, u8 pattern,
+		bool is_srcbuf, bool is_memset)
+{
+	unsigned int i;
+	unsigned int error_count = 0;
+	u8 actual;
+	u8 expected;
+	u8 *buf;
+	unsigned int counter_orig = counter;
+
+	for (; (buf = *bufs); bufs++) {
+		counter = counter_orig;
+		for (i = start; i < end; i++) {
+			actual = buf[i];
+			expected = pattern | gen_inv_idx(counter, is_memset);
+			if (actual != expected) {
+				if (error_count < MAX_ERROR_COUNT)
+					dmatest_mismatch(actual, pattern, i,
+							 counter, is_srcbuf,
+							 is_memset);
+				error_count++;
+			}
+			counter++;
+		}
+	}
+
+	if (error_count > MAX_ERROR_COUNT)
+		pr_warn("%s: %u errors suppressed\n",
+			current->comm, error_count - MAX_ERROR_COUNT);
+
+	return error_count;
+}
+
+
+static void dmatest_callback(void *arg)
+{
+	struct dmatest_done *done = arg;
+	struct dmatest_thread *thread =
+		container_of(done, struct dmatest_thread, test_done);
+	if (!thread->done) {
+		done->done = true;
+		wake_up_all(done->wait);
+	} else {
+		/*
+		 * If thread->done, it means that this callback occurred
+		 * after the parent thread has cleaned up. This can
+		 * happen in the case that driver doesn't implement
+		 * the terminate_all() functionality and a dma operation
+		 * did not occur within the timeout period
+		 */
+		WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
+	}
+}
+
+static unsigned int min_odd(unsigned int x, unsigned int y)
+{
+	unsigned int val = min(x, y);
+
+	return val % 2 ? val : val - 1;
+}
+
+static void result(const char *err, unsigned int n, unsigned int src_off,
+		   unsigned int dst_off, unsigned int len, unsigned long data)
+{
+	pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n",
+		current->comm, n, err, src_off, dst_off, len, data);
+}
+
+static void dbg_result(const char *err, unsigned int n, unsigned int src_off,
+		       unsigned int dst_off, unsigned int len,
+		       unsigned long data)
+{
+	pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)\n",
+		 current->comm, n, err, src_off, dst_off, len, data);
+}
+
+#define verbose_result(err, n, src_off, dst_off, len, data) ({	\
+	if (verbose)						\
+		result(err, n, src_off, dst_off, len, data);	\
+	else							\
+		dbg_result(err, n, src_off, dst_off, len, data);\
+})
+
+static unsigned long long dmatest_persec(s64 runtime, unsigned int val)
+{
+	unsigned long long per_sec = 1000000;
+
+	if (runtime <= 0)
+		return 0;
+
+	/* drop precision until runtime is 32-bits */
+	while (runtime > UINT_MAX) {
+		runtime >>= 1;
+		per_sec <<= 1;
+	}
+
+	per_sec *= val;
+	do_div(per_sec, runtime);
+	return per_sec;
+}
+
+static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
+{
+	return dmatest_persec(runtime, len >> 10);
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets for a given operation type until it is told to exit by
+ * kthread_stop(). There may be multiple threads running this function
+ * in parallel for a single channel, and there may be multiple channels
+ * being tested in parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+	struct dmatest_thread	*thread = data;
+	struct dmatest_done	*done = &thread->test_done;
+	struct dmatest_info	*info;
+	struct dmatest_params	*params;
+	struct dma_chan		*chan;
+	struct dma_device	*dev;
+	unsigned int		error_count;
+	unsigned int		failed_tests = 0;
+	unsigned int		total_tests = 0;
+	dma_cookie_t		cookie;
+	enum dma_status		status;
+	enum dma_ctrl_flags 	flags;
+	u8			*pq_coefs = NULL;
+	int			ret;
+	int			src_cnt;
+	int			dst_cnt;
+	int			i;
+	ktime_t			ktime, start, diff;
+	ktime_t			filltime = 0;
+	ktime_t			comparetime = 0;
+	s64			runtime = 0;
+	unsigned long long	total_len = 0;
+	u8			align = 0;
+	bool			is_memset = false;
+	dma_addr_t		*srcs;
+	dma_addr_t		*dma_pq;
+
+	set_freezable();
+
+	ret = -ENOMEM;
+
+	smp_rmb();
+	info = thread->info;
+	params = &info->params;
+	chan = thread->chan;
+	dev = chan->device;
+	if (thread->type == DMA_MEMCPY) {
+		align = dev->copy_align;
+		src_cnt = dst_cnt = 1;
+	} else if (thread->type == DMA_MEMSET) {
+		align = dev->fill_align;
+		src_cnt = dst_cnt = 1;
+		is_memset = true;
+	} else if (thread->type == DMA_XOR) {
+		/* force odd to ensure dst = src */
+		src_cnt = min_odd(params->xor_sources | 1, dev->max_xor);
+		dst_cnt = 1;
+		align = dev->xor_align;
+	} else if (thread->type == DMA_PQ) {
+		/* force odd to ensure dst = src */
+		src_cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
+		dst_cnt = 2;
+		align = dev->pq_align;
+
+		pq_coefs = kmalloc(params->pq_sources + 1, GFP_KERNEL);
+		if (!pq_coefs)
+			goto err_thread_type;
+
+		for (i = 0; i < src_cnt; i++)
+			pq_coefs[i] = 1;
+	} else
+		goto err_thread_type;
+
+	thread->srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->srcs)
+		goto err_srcs;
+
+	thread->usrcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->usrcs)
+		goto err_usrcs;
+
+	for (i = 0; i < src_cnt; i++) {
+		thread->usrcs[i] = kmalloc(params->buf_size + align,
+					   GFP_KERNEL);
+		if (!thread->usrcs[i])
+			goto err_srcbuf;
+
+		/* align srcs to alignment restriction */
+		if (align)
+			thread->srcs[i] = PTR_ALIGN(thread->usrcs[i], align);
+		else
+			thread->srcs[i] = thread->usrcs[i];
+	}
+	thread->srcs[i] = NULL;
+
+	thread->dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->dsts)
+		goto err_dsts;
+
+	thread->udsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!thread->udsts)
+		goto err_udsts;
+
+	for (i = 0; i < dst_cnt; i++) {
+		thread->udsts[i] = kmalloc(params->buf_size + align,
+					   GFP_KERNEL);
+		if (!thread->udsts[i])
+			goto err_dstbuf;
+
+		/* align dsts to alignment restriction */
+		if (align)
+			thread->dsts[i] = PTR_ALIGN(thread->udsts[i], align);
+		else
+			thread->dsts[i] = thread->udsts[i];
+	}
+	thread->dsts[i] = NULL;
+
+	set_user_nice(current, 10);
+
+	srcs = kcalloc(src_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!srcs)
+		goto err_dstbuf;
+
+	dma_pq = kcalloc(dst_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!dma_pq)
+		goto err_srcs_array;
+
+	/*
+	 * src and dst buffers are freed by ourselves below
+	 */
+	flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+
+	ktime = ktime_get();
+	while (!kthread_should_stop()
+	       && !(params->iterations && total_tests >= params->iterations)) {
+		struct dma_async_tx_descriptor *tx = NULL;
+		struct dmaengine_unmap_data *um;
+		dma_addr_t *dsts;
+		unsigned int src_off, dst_off, len;
+
+		total_tests++;
+
+		/* Check if buffer count fits into map count variable (u8) */
+		if ((src_cnt + dst_cnt) >= 255) {
+			pr_err("too many buffers (%d of 255 supported)\n",
+			       src_cnt + dst_cnt);
+			break;
+		}
+
+		if (1 << align > params->buf_size) {
+			pr_err("%u-byte buffer too small for %d-byte alignment\n",
+			       params->buf_size, 1 << align);
+			break;
+		}
+
+		if (params->norandom)
+			len = params->buf_size;
+		else
+			len = dmatest_random() % params->buf_size + 1;
+
+		len = (len >> align) << align;
+		if (!len)
+			len = 1 << align;
+
+		total_len += len;
+
+		if (params->norandom) {
+			src_off = 0;
+			dst_off = 0;
+		} else {
+			src_off = dmatest_random() % (params->buf_size - len + 1);
+			dst_off = dmatest_random() % (params->buf_size - len + 1);
+
+			src_off = (src_off >> align) << align;
+			dst_off = (dst_off >> align) << align;
+		}
+
+		if (!params->noverify) {
+			start = ktime_get();
+			dmatest_init_srcs(thread->srcs, src_off, len,
+					  params->buf_size, is_memset);
+			dmatest_init_dsts(thread->dsts, dst_off, len,
+					  params->buf_size, is_memset);
+
+			diff = ktime_sub(ktime_get(), start);
+			filltime = ktime_add(filltime, diff);
+		}
+
+		um = dmaengine_get_unmap_data(dev->dev, src_cnt + dst_cnt,
+					      GFP_KERNEL);
+		if (!um) {
+			failed_tests++;
+			result("unmap data NULL", total_tests,
+			       src_off, dst_off, len, ret);
+			continue;
+		}
+
+		um->len = params->buf_size;
+		for (i = 0; i < src_cnt; i++) {
+			void *buf = thread->srcs[i];
+			struct page *pg = virt_to_page(buf);
+			unsigned long pg_off = offset_in_page(buf);
+
+			um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
+						   um->len, DMA_TO_DEVICE);
+			srcs[i] = um->addr[i] + src_off;
+			ret = dma_mapping_error(dev->dev, um->addr[i]);
+			if (ret) {
+				dmaengine_unmap_put(um);
+				result("src mapping error", total_tests,
+				       src_off, dst_off, len, ret);
+				failed_tests++;
+				continue;
+			}
+			um->to_cnt++;
+		}
+		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
+		dsts = &um->addr[src_cnt];
+		for (i = 0; i < dst_cnt; i++) {
+			void *buf = thread->dsts[i];
+			struct page *pg = virt_to_page(buf);
+			unsigned long pg_off = offset_in_page(buf);
+
+			dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len,
+					       DMA_BIDIRECTIONAL);
+			ret = dma_mapping_error(dev->dev, dsts[i]);
+			if (ret) {
+				dmaengine_unmap_put(um);
+				result("dst mapping error", total_tests,
+				       src_off, dst_off, len, ret);
+				failed_tests++;
+				continue;
+			}
+			um->bidi_cnt++;
+		}
+
+		if (thread->type == DMA_MEMCPY)
+			tx = dev->device_prep_dma_memcpy(chan,
+							 dsts[0] + dst_off,
+							 srcs[0], len, flags);
+		else if (thread->type == DMA_MEMSET)
+			tx = dev->device_prep_dma_memset(chan,
+						dsts[0] + dst_off,
+						*(thread->srcs[0] + src_off),
+						len, flags);
+		else if (thread->type == DMA_XOR)
+			tx = dev->device_prep_dma_xor(chan,
+						      dsts[0] + dst_off,
+						      srcs, src_cnt,
+						      len, flags);
+		else if (thread->type == DMA_PQ) {
+			for (i = 0; i < dst_cnt; i++)
+				dma_pq[i] = dsts[i] + dst_off;
+			tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
+						     src_cnt, pq_coefs,
+						     len, flags);
+		}
+
+		if (!tx) {
+			dmaengine_unmap_put(um);
+			result("prep error", total_tests, src_off,
+			       dst_off, len, ret);
+			msleep(100);
+			failed_tests++;
+			continue;
+		}
+
+		done->done = false;
+		tx->callback = dmatest_callback;
+		tx->callback_param = done;
+		cookie = tx->tx_submit(tx);
+
+		if (dma_submit_error(cookie)) {
+			dmaengine_unmap_put(um);
+			result("submit error", total_tests, src_off,
+			       dst_off, len, ret);
+			msleep(100);
+			failed_tests++;
+			continue;
+		}
+		dma_async_issue_pending(chan);
+
+		wait_event_freezable_timeout(thread->done_wait, done->done,
+					     msecs_to_jiffies(params->timeout));
+
+		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+
+		if (!done->done) {
+			dmaengine_unmap_put(um);
+			result("test timed out", total_tests, src_off, dst_off,
+			       len, 0);
+			failed_tests++;
+			continue;
+		} else if (status != DMA_COMPLETE) {
+			dmaengine_unmap_put(um);
+			result(status == DMA_ERROR ?
+			       "completion error status" :
+			       "completion busy status", total_tests, src_off,
+			       dst_off, len, ret);
+			failed_tests++;
+			continue;
+		}
+
+		dmaengine_unmap_put(um);
+
+		if (params->noverify) {
+			verbose_result("test passed", total_tests, src_off,
+				       dst_off, len, 0);
+			continue;
+		}
+
+		start = ktime_get();
+		pr_debug("%s: verifying source buffer...\n", current->comm);
+		error_count = dmatest_verify(thread->srcs, 0, src_off,
+				0, PATTERN_SRC, true, is_memset);
+		error_count += dmatest_verify(thread->srcs, src_off,
+				src_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, true, is_memset);
+		error_count += dmatest_verify(thread->srcs, src_off + len,
+				params->buf_size, src_off + len,
+				PATTERN_SRC, true, is_memset);
+
+		pr_debug("%s: verifying dest buffer...\n", current->comm);
+		error_count += dmatest_verify(thread->dsts, 0, dst_off,
+				0, PATTERN_DST, false, is_memset);
+
+		error_count += dmatest_verify(thread->dsts, dst_off,
+				dst_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, false, is_memset);
+
+		error_count += dmatest_verify(thread->dsts, dst_off + len,
+				params->buf_size, dst_off + len,
+				PATTERN_DST, false, is_memset);
+
+		diff = ktime_sub(ktime_get(), start);
+		comparetime = ktime_add(comparetime, diff);
+
+		if (error_count) {
+			result("data error", total_tests, src_off, dst_off,
+			       len, error_count);
+			failed_tests++;
+		} else {
+			verbose_result("test passed", total_tests, src_off,
+				       dst_off, len, 0);
+		}
+	}
+	ktime = ktime_sub(ktime_get(), ktime);
+	ktime = ktime_sub(ktime, comparetime);
+	ktime = ktime_sub(ktime, filltime);
+	runtime = ktime_to_us(ktime);
+
+	ret = 0;
+	kfree(dma_pq);
+err_srcs_array:
+	kfree(srcs);
+err_dstbuf:
+	for (i = 0; thread->udsts[i]; i++)
+		kfree(thread->udsts[i]);
+	kfree(thread->udsts);
+err_udsts:
+	kfree(thread->dsts);
+err_dsts:
+err_srcbuf:
+	for (i = 0; thread->usrcs[i]; i++)
+		kfree(thread->usrcs[i]);
+	kfree(thread->usrcs);
+err_usrcs:
+	kfree(thread->srcs);
+err_srcs:
+	kfree(pq_coefs);
+err_thread_type:
+	pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n",
+		current->comm, total_tests, failed_tests,
+		dmatest_persec(runtime, total_tests),
+		dmatest_KBs(runtime, total_len), ret);
+
+	/* terminate all transfers on specified channels */
+	if (ret || failed_tests)
+		dmaengine_terminate_all(chan);
+
+	thread->done = true;
+	wake_up(&thread_wait);
+
+	return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+	struct dmatest_thread	*thread;
+	struct dmatest_thread	*_thread;
+	int			ret;
+
+	list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+		ret = kthread_stop(thread->task);
+		pr_debug("thread %s exited with status %d\n",
+			 thread->task->comm, ret);
+		list_del(&thread->node);
+		put_task_struct(thread->task);
+		kfree(thread);
+	}
+
+	/* terminate all transfers on specified channels */
+	dmaengine_terminate_all(dtc->chan);
+
+	kfree(dtc);
+}
+
+static int dmatest_add_threads(struct dmatest_info *info,
+		struct dmatest_chan *dtc, enum dma_transaction_type type)
+{
+	struct dmatest_params *params = &info->params;
+	struct dmatest_thread *thread;
+	struct dma_chan *chan = dtc->chan;
+	char *op;
+	unsigned int i;
+
+	if (type == DMA_MEMCPY)
+		op = "copy";
+	else if (type == DMA_MEMSET)
+		op = "set";
+	else if (type == DMA_XOR)
+		op = "xor";
+	else if (type == DMA_PQ)
+		op = "pq";
+	else
+		return -EINVAL;
+
+	for (i = 0; i < params->threads_per_chan; i++) {
+		thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+		if (!thread) {
+			pr_warn("No memory for %s-%s%u\n",
+				dma_chan_name(chan), op, i);
+			break;
+		}
+		thread->info = info;
+		thread->chan = dtc->chan;
+		thread->type = type;
+		thread->test_done.wait = &thread->done_wait;
+		init_waitqueue_head(&thread->done_wait);
+		smp_wmb();
+		thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
+				dma_chan_name(chan), op, i);
+		if (IS_ERR(thread->task)) {
+			pr_warn("Failed to create thread %s-%s%u\n",
+				dma_chan_name(chan), op, i);
+			kfree(thread);
+			break;
+		}
+
+		/* srcbuf and dstbuf are allocated by the thread itself */
+		get_task_struct(thread->task);
+		list_add_tail(&thread->node, &dtc->threads);
+		wake_up_process(thread->task);
+	}
+
+	return i;
+}
+
+static int dmatest_add_channel(struct dmatest_info *info,
+		struct dma_chan *chan)
+{
+	struct dmatest_chan	*dtc;
+	struct dma_device	*dma_dev = chan->device;
+	unsigned int		thread_count = 0;
+	int cnt;
+
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+	if (!dtc) {
+		pr_warn("No memory for %s\n", dma_chan_name(chan));
+		return -ENOMEM;
+	}
+
+	dtc->chan = chan;
+	INIT_LIST_HEAD(&dtc->threads);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		if (dmatest == 0) {
+			cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY);
+			thread_count += cnt > 0 ? cnt : 0;
+		}
+	}
+
+	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+		if (dmatest == 1) {
+			cnt = dmatest_add_threads(info, dtc, DMA_MEMSET);
+			thread_count += cnt > 0 ? cnt : 0;
+		}
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		cnt = dmatest_add_threads(info, dtc, DMA_XOR);
+		thread_count += cnt > 0 ? cnt : 0;
+	}
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		cnt = dmatest_add_threads(info, dtc, DMA_PQ);
+		thread_count += cnt > 0 ? cnt : 0;
+	}
+
+	pr_info("Started %u threads using %s\n",
+		thread_count, dma_chan_name(chan));
+
+	list_add_tail(&dtc->node, &info->channels);
+	info->nr_channels++;
+
+	return 0;
+}
+
+static bool filter(struct dma_chan *chan, void *param)
+{
+	struct dmatest_params *params = param;
+
+	if (!dmatest_match_channel(params, chan) ||
+	    !dmatest_match_device(params, chan->device))
+		return false;
+	else
+		return true;
+}
+
+static void request_channels(struct dmatest_info *info,
+			     enum dma_transaction_type type)
+{
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(type, mask);
+	for (;;) {
+		struct dmatest_params *params = &info->params;
+		struct dma_chan *chan;
+
+		chan = dma_request_channel(mask, filter, params);
+		if (chan) {
+			if (dmatest_add_channel(info, chan)) {
+				dma_release_channel(chan);
+				break; /* add_channel failed, punt */
+			}
+		} else
+			break; /* no more channels available */
+		if (params->max_channels &&
+		    info->nr_channels >= params->max_channels)
+			break; /* we have all we need */
+	}
+}
+
+static void run_threaded_test(struct dmatest_info *info)
+{
+	struct dmatest_params *params = &info->params;
+
+	/* Copy test parameters */
+	params->buf_size = test_buf_size;
+	strlcpy(params->channel, strim(test_channel), sizeof(params->channel));
+	strlcpy(params->device, strim(test_device), sizeof(params->device));
+	params->threads_per_chan = threads_per_chan;
+	params->max_channels = max_channels;
+	params->iterations = iterations;
+	params->xor_sources = xor_sources;
+	params->pq_sources = pq_sources;
+	params->timeout = timeout;
+	params->noverify = noverify;
+	params->norandom = norandom;
+
+	request_channels(info, DMA_MEMCPY);
+	request_channels(info, DMA_MEMSET);
+	request_channels(info, DMA_XOR);
+	request_channels(info, DMA_PQ);
+}
+
+static void stop_threaded_test(struct dmatest_info *info)
+{
+	struct dmatest_chan *dtc, *_dtc;
+	struct dma_chan *chan;
+
+	list_for_each_entry_safe(dtc, _dtc, &info->channels, node) {
+		list_del(&dtc->node);
+		chan = dtc->chan;
+		dmatest_cleanup_channel(dtc);
+		pr_debug("dropped channel %s\n", dma_chan_name(chan));
+		dma_release_channel(chan);
+	}
+
+	info->nr_channels = 0;
+}
+
+static void restart_threaded_test(struct dmatest_info *info, bool run)
+{
+	/* we might be called early to set run=, defer running until all
+	 * parameters have been evaluated
+	 */
+	if (!info->did_init)
+		return;
+
+	/* Stop any running test first */
+	stop_threaded_test(info);
+
+	/* Run test with new parameters */
+	run_threaded_test(info);
+}
+
+static int dmatest_run_get(char *val, const struct kernel_param *kp)
+{
+	struct dmatest_info *info = &test_info;
+
+	mutex_lock(&info->lock);
+	if (is_threaded_test_run(info)) {
+		dmatest_run = true;
+	} else {
+		stop_threaded_test(info);
+		dmatest_run = false;
+	}
+	mutex_unlock(&info->lock);
+
+	return param_get_bool(val, kp);
+}
+
+static int dmatest_run_set(const char *val, const struct kernel_param *kp)
+{
+	struct dmatest_info *info = &test_info;
+	int ret;
+
+	mutex_lock(&info->lock);
+	ret = param_set_bool(val, kp);
+	if (ret) {
+		mutex_unlock(&info->lock);
+		return ret;
+	}
+
+	if (is_threaded_test_run(info))
+		ret = -EBUSY;
+	else if (dmatest_run)
+		restart_threaded_test(info, dmatest_run);
+
+	mutex_unlock(&info->lock);
+
+	return ret;
+}
+
+static int __init dmatest_init(void)
+{
+	struct dmatest_info *info = &test_info;
+	struct dmatest_params *params = &info->params;
+
+	if (dmatest_run) {
+		mutex_lock(&info->lock);
+		run_threaded_test(info);
+		mutex_unlock(&info->lock);
+	}
+
+	if (params->iterations && wait)
+		wait_event(thread_wait, !is_threaded_test_run(info));
+
+	/* module parameters are stable, inittime tests are started,
+	 * let userspace take over 'run' control
+	 */
+	info->did_init = true;
+
+	return 0;
+}
+/* when compiled-in wait for drivers to load first */
+late_initcall(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+	struct dmatest_info *info = &test_info;
+
+	mutex_lock(&info->lock);
+	stop_threaded_test(info);
+	mutex_unlock(&info->lock);
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw-axi-dmac/Makefile b/drivers/dma/dw-axi-dmac/Makefile
new file mode 100644
index 0000000..4bfa462
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_DW_AXI_DMAC) += dw-axi-dmac-platform.o
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
new file mode 100644
index 0000000..c4eb55e
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
@@ -0,0 +1,1008 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/types.h>
+
+#include "dw-axi-dmac.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * The set of bus widths supported by the DMA controller. DW AXI DMAC supports
+ * master data bus width up to 512 bits (for both AXI master interfaces), but
+ * it depends on IP block configurarion.
+ */
+#define AXI_DMA_BUSWIDTHS		  \
+	(DMA_SLAVE_BUSWIDTH_1_BYTE	| \
+	DMA_SLAVE_BUSWIDTH_2_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_4_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_8_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_16_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_32_BYTES	| \
+	DMA_SLAVE_BUSWIDTH_64_BYTES)
+
+static inline void
+axi_dma_iowrite32(struct axi_dma_chip *chip, u32 reg, u32 val)
+{
+	iowrite32(val, chip->regs + reg);
+}
+
+static inline u32 axi_dma_ioread32(struct axi_dma_chip *chip, u32 reg)
+{
+	return ioread32(chip->regs + reg);
+}
+
+static inline void
+axi_chan_iowrite32(struct axi_dma_chan *chan, u32 reg, u32 val)
+{
+	iowrite32(val, chan->chan_regs + reg);
+}
+
+static inline u32 axi_chan_ioread32(struct axi_dma_chan *chan, u32 reg)
+{
+	return ioread32(chan->chan_regs + reg);
+}
+
+static inline void
+axi_chan_iowrite64(struct axi_dma_chan *chan, u32 reg, u64 val)
+{
+	/*
+	 * We split one 64 bit write for two 32 bit write as some HW doesn't
+	 * support 64 bit access.
+	 */
+	iowrite32(lower_32_bits(val), chan->chan_regs + reg);
+	iowrite32(upper_32_bits(val), chan->chan_regs + reg + 4);
+}
+
+static inline void axi_dma_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= DMAC_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_disable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val &= ~INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_dma_irq_enable(struct axi_dma_chip *chip)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chip, DMAC_CFG);
+	val |= INT_EN_MASK;
+	axi_dma_iowrite32(chip, DMAC_CFG, val);
+}
+
+static inline void axi_chan_irq_disable(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	u32 val;
+
+	if (likely(irq_mask == DWAXIDMAC_IRQ_ALL)) {
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, DWAXIDMAC_IRQ_NONE);
+	} else {
+		val = axi_chan_ioread32(chan, CH_INTSTATUS_ENA);
+		val &= ~irq_mask;
+		axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, val);
+	}
+}
+
+static inline void axi_chan_irq_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSTATUS_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_sig_set(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTSIGNAL_ENA, irq_mask);
+}
+
+static inline void axi_chan_irq_clear(struct axi_dma_chan *chan, u32 irq_mask)
+{
+	axi_chan_iowrite32(chan, CH_INTCLEAR, irq_mask);
+}
+
+static inline u32 axi_chan_irq_read(struct axi_dma_chan *chan)
+{
+	return axi_chan_ioread32(chan, CH_INTSTATUS);
+}
+
+static inline void axi_chan_disable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_EN_SHIFT);
+	val |=   BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline void axi_chan_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_EN_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_EN_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+}
+
+static inline bool axi_chan_is_hw_enable(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+
+	return !!(val & (BIT(chan->id) << DMAC_CHAN_EN_SHIFT));
+}
+
+static void axi_dma_hw_init(struct axi_dma_chip *chip)
+{
+	u32 i;
+
+	for (i = 0; i < chip->dw->hdata->nr_channels; i++) {
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+		axi_chan_disable(&chip->dw->chan[i]);
+	}
+}
+
+static u32 axi_chan_get_xfer_width(struct axi_dma_chan *chan, dma_addr_t src,
+				   dma_addr_t dst, size_t len)
+{
+	u32 max_width = chan->chip->dw->hdata->m_data_width;
+
+	return __ffs(src | dst | len | BIT(max_width));
+}
+
+static inline const char *axi_chan_name(struct axi_dma_chan *chan)
+{
+	return dma_chan_name(&chan->vc.chan);
+}
+
+static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
+{
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys);
+	if (unlikely(!desc)) {
+		dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
+			axi_chan_name(chan));
+		return NULL;
+	}
+
+	atomic_inc(&chan->descs_allocated);
+	INIT_LIST_HEAD(&desc->xfer_list);
+	desc->vd.tx.phys = phys;
+	desc->chan = chan;
+
+	return desc;
+}
+
+static void axi_desc_put(struct axi_dma_desc *desc)
+{
+	struct axi_dma_chan *chan = desc->chan;
+	struct dw_axi_dma *dw = chan->chip->dw;
+	struct axi_dma_desc *child, *_next;
+	unsigned int descs_put = 0;
+
+	list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
+		list_del(&child->xfer_list);
+		dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
+		descs_put++;
+	}
+
+	dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
+	descs_put++;
+
+	atomic_sub(descs_put, &chan->descs_allocated);
+	dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
+		axi_chan_name(chan), descs_put,
+		atomic_read(&chan->descs_allocated));
+}
+
+static void vchan_desc_put(struct virt_dma_desc *vdesc)
+{
+	axi_desc_put(vd_to_axi_desc(vdesc));
+}
+
+static enum dma_status
+dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
+		  struct dma_tx_state *txstate)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+
+	if (chan->is_paused && ret == DMA_IN_PROGRESS)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.llp = cpu_to_le64(adr);
+}
+
+static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
+{
+	axi_chan_iowrite64(chan, CH_LLP, adr);
+}
+
+/* Called in chan locked context */
+static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
+				      struct axi_dma_desc *first)
+{
+	u32 priority = chan->chip->dw->hdata->priority[chan->id];
+	u32 reg, irq_mask;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+		return;
+	}
+
+	axi_dma_enable(chan->chip);
+
+	reg = (DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_DST_MULTBLK_TYPE_POS |
+	       DWAXIDMAC_MBLK_TYPE_LL << CH_CFG_L_SRC_MULTBLK_TYPE_POS);
+	axi_chan_iowrite32(chan, CH_CFG_L, reg);
+
+	reg = (DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC << CH_CFG_H_TT_FC_POS |
+	       priority << CH_CFG_H_PRIORITY_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS |
+	       DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS);
+	axi_chan_iowrite32(chan, CH_CFG_H, reg);
+
+	write_chan_llp(chan, first->vd.tx.phys | lms);
+
+	irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
+	axi_chan_irq_sig_set(chan, irq_mask);
+
+	/* Generate 'suspend' status but don't generate interrupt */
+	irq_mask |= DWAXIDMAC_IRQ_SUSPENDED;
+	axi_chan_irq_set(chan, irq_mask);
+
+	axi_chan_enable(chan);
+}
+
+static void axi_chan_start_first_queued(struct axi_dma_chan *chan)
+{
+	struct axi_dma_desc *desc;
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd)
+		return;
+
+	desc = vd_to_axi_desc(vd);
+	dev_vdbg(chan2dev(chan), "%s: started %u\n", axi_chan_name(chan),
+		vd->tx.cookie);
+	axi_chan_block_xfer_start(chan, desc);
+}
+
+static void dma_chan_issue_pending(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (vchan_issue_pending(&chan->vc))
+		axi_chan_start_first_queued(chan);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan)) {
+		dev_err(chan2dev(chan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+		return -EBUSY;
+	}
+
+	dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
+
+	pm_runtime_get(chan->chip->dev);
+
+	return 0;
+}
+
+static void dma_chan_free_chan_resources(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+	/* ASSERT: channel is idle */
+	if (axi_chan_is_hw_enable(chan))
+		dev_err(dchan2dev(dchan), "%s is non-idle!\n",
+			axi_chan_name(chan));
+
+	axi_chan_disable(chan);
+	axi_chan_irq_disable(chan, DWAXIDMAC_IRQ_ALL);
+
+	vchan_free_chan_resources(&chan->vc);
+
+	dev_vdbg(dchan2dev(dchan),
+		 "%s: free resources, descriptor still allocated: %u\n",
+		 axi_chan_name(chan), atomic_read(&chan->descs_allocated));
+
+	pm_runtime_put(chan->chip->dev);
+}
+
+/*
+ * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
+ * as 1, it understands that the current block is the final block in the
+ * transfer and completes the DMA transfer operation at the end of current
+ * block transfer.
+ */
+static void set_desc_last(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	val = le32_to_cpu(desc->lli.ctl_hi);
+	val |= CH_CTL_H_LLI_LAST;
+	desc->lli.ctl_hi = cpu_to_le32(val);
+}
+
+static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.sar = cpu_to_le64(adr);
+}
+
+static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr)
+{
+	desc->lli.dar = cpu_to_le64(adr);
+}
+
+static void set_desc_src_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI0 for source master */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	val &= ~CH_CTL_L_SRC_MAST;
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static void set_desc_dest_master(struct axi_dma_desc *desc)
+{
+	u32 val;
+
+	/* Select AXI1 for source master if available */
+	val = le32_to_cpu(desc->lli.ctl_lo);
+	if (desc->chan->chip->dw->hdata->nr_masters > 1)
+		val |= CH_CTL_L_DST_MAST;
+	else
+		val &= ~CH_CTL_L_DST_MAST;
+
+	desc->lli.ctl_lo = cpu_to_le32(val);
+}
+
+static struct dma_async_tx_descriptor *
+dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
+			 dma_addr_t src_adr, size_t len, unsigned long flags)
+{
+	struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	size_t block_ts, max_block_ts, xfer_len;
+	u32 xfer_width, reg;
+	u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+	dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
+		axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
+
+	max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+	while (len) {
+		xfer_len = len;
+
+		/*
+		 * Take care for the alignment.
+		 * Actually source and destination widths can be different, but
+		 * make them same to be simpler.
+		 */
+		xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, xfer_len);
+
+		/*
+		 * block_ts indicates the total number of data of width
+		 * to be transferred in a DMA block transfer.
+		 * BLOCK_TS register should be set to block_ts - 1
+		 */
+		block_ts = xfer_len >> xfer_width;
+		if (block_ts > max_block_ts) {
+			block_ts = max_block_ts;
+			xfer_len = max_block_ts << xfer_width;
+		}
+
+		desc = axi_desc_get(chan);
+		if (unlikely(!desc))
+			goto err_desc_get;
+
+		write_desc_sar(desc, src_adr);
+		write_desc_dar(desc, dst_adr);
+		desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
+
+		reg = CH_CTL_H_LLI_VALID;
+		if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+			u32 burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+
+			reg |= (CH_CTL_H_ARLEN_EN |
+				burst_len << CH_CTL_H_ARLEN_POS |
+				CH_CTL_H_AWLEN_EN |
+				burst_len << CH_CTL_H_AWLEN_POS);
+		}
+		desc->lli.ctl_hi = cpu_to_le32(reg);
+
+		reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+		       DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
+		       xfer_width << CH_CTL_L_DST_WIDTH_POS |
+		       xfer_width << CH_CTL_L_SRC_WIDTH_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+		       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
+		desc->lli.ctl_lo = cpu_to_le32(reg);
+
+		set_desc_src_master(desc);
+		set_desc_dest_master(desc);
+
+		/* Manage transfer list (xfer_list) */
+		if (!first) {
+			first = desc;
+		} else {
+			list_add_tail(&desc->xfer_list, &first->xfer_list);
+			write_desc_llp(prev, desc->vd.tx.phys | lms);
+		}
+		prev = desc;
+
+		/* update the length and addresses for the next loop cycle */
+		len -= xfer_len;
+		dst_adr += xfer_len;
+		src_adr += xfer_len;
+	}
+
+	/* Total len of src/dest sg == 0, so no descriptor were allocated */
+	if (unlikely(!first))
+		return NULL;
+
+	/* Set end-of-link to the last link descriptor of list */
+	set_desc_last(desc);
+
+	return vchan_tx_prep(&chan->vc, &first->vd, flags);
+
+err_desc_get:
+	axi_desc_put(first);
+	return NULL;
+}
+
+static void axi_chan_dump_lli(struct axi_dma_chan *chan,
+			      struct axi_dma_desc *desc)
+{
+	dev_err(dchan2dev(&chan->vc.chan),
+		"SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
+		le64_to_cpu(desc->lli.sar),
+		le64_to_cpu(desc->lli.dar),
+		le64_to_cpu(desc->lli.llp),
+		le32_to_cpu(desc->lli.block_ts_lo),
+		le32_to_cpu(desc->lli.ctl_hi),
+		le32_to_cpu(desc->lli.ctl_lo));
+}
+
+static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
+				   struct axi_dma_desc *desc_head)
+{
+	struct axi_dma_desc *desc;
+
+	axi_chan_dump_lli(chan, desc_head);
+	list_for_each_entry(desc, &desc_head->xfer_list, xfer_list)
+		axi_chan_dump_lli(chan, desc);
+}
+
+static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	/* The bad descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list */
+	list_del(&vd->node);
+
+	/* WARN about bad descriptor */
+	dev_err(chan2dev(chan),
+		"Bad descriptor submitted for %s, cookie: %d, irq: 0x%08x\n",
+		axi_chan_name(chan), vd->tx.cookie, status);
+	axi_chan_list_dump_lli(chan, vd_to_axi_desc(vd));
+
+	vchan_cookie_complete(vd);
+
+	/* Try to restart the controller */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (unlikely(axi_chan_is_hw_enable(chan))) {
+		dev_err(chan2dev(chan), "BUG: %s caught DWAXIDMAC_IRQ_DMA_TRF, but channel not idle!\n",
+			axi_chan_name(chan));
+		axi_chan_disable(chan);
+	}
+
+	/* The completed descriptor currently is in the head of vc list */
+	vd = vchan_next_desc(&chan->vc);
+	/* Remove the completed descriptor from issued list before completing */
+	list_del(&vd->node);
+	vchan_cookie_complete(vd);
+
+	/* Submit queued descriptors after processing the completed ones */
+	axi_chan_start_first_queued(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
+{
+	struct axi_dma_chip *chip = dev_id;
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan;
+
+	u32 status, i;
+
+	/* Disable DMAC inerrupts. We'll enable them after processing chanels */
+	axi_dma_irq_disable(chip);
+
+	/* Poll, clear and process every chanel interrupt status */
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		chan = &dw->chan[i];
+		status = axi_chan_irq_read(chan);
+		axi_chan_irq_clear(chan, status);
+
+		dev_vdbg(chip->dev, "%s %u IRQ status: 0x%08x\n",
+			axi_chan_name(chan), i, status);
+
+		if (status & DWAXIDMAC_IRQ_ALL_ERR)
+			axi_chan_handle_err(chan, status);
+		else if (status & DWAXIDMAC_IRQ_DMA_TRF)
+			axi_chan_block_xfer_complete(chan);
+	}
+
+	/* Re-enable interrupts */
+	axi_dma_irq_enable(chip);
+
+	return IRQ_HANDLED;
+}
+
+static int dma_chan_terminate_all(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	axi_chan_disable(chan);
+
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	/*
+	 * As vchan_dma_desc_free_list can access to desc_allocated list
+	 * we need to call it in vc.lock context.
+	 */
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	dev_vdbg(dchan2dev(dchan), "terminated: %s\n", axi_chan_name(chan));
+
+	return 0;
+}
+
+static int dma_chan_pause(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+	unsigned int timeout = 20; /* timeout iterations */
+	u32 val;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val |= BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT |
+	       BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	do  {
+		if (axi_chan_irq_read(chan) & DWAXIDMAC_IRQ_SUSPENDED)
+			break;
+
+		udelay(2);
+	} while (--timeout);
+
+	axi_chan_irq_clear(chan, DWAXIDMAC_IRQ_SUSPENDED);
+
+	chan->is_paused = true;
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return timeout ? 0 : -EAGAIN;
+}
+
+/* Called in chan locked context */
+static inline void axi_chan_resume(struct axi_dma_chan *chan)
+{
+	u32 val;
+
+	val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
+	val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
+	val |=  (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
+	axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
+
+	chan->is_paused = false;
+}
+
+static int dma_chan_resume(struct dma_chan *dchan)
+{
+	struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	if (chan->is_paused)
+		axi_chan_resume(chan);
+
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return 0;
+}
+
+static int axi_dma_suspend(struct axi_dma_chip *chip)
+{
+	axi_dma_irq_disable(chip);
+	axi_dma_disable(chip);
+
+	clk_disable_unprepare(chip->core_clk);
+	clk_disable_unprepare(chip->cfgr_clk);
+
+	return 0;
+}
+
+static int axi_dma_resume(struct axi_dma_chip *chip)
+{
+	int ret;
+
+	ret = clk_prepare_enable(chip->cfgr_clk);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_prepare_enable(chip->core_clk);
+	if (ret < 0)
+		return ret;
+
+	axi_dma_enable(chip);
+	axi_dma_irq_enable(chip);
+
+	return 0;
+}
+
+static int __maybe_unused axi_dma_runtime_suspend(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_suspend(chip);
+}
+
+static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
+{
+	struct axi_dma_chip *chip = dev_get_drvdata(dev);
+
+	return axi_dma_resume(chip);
+}
+
+static int parse_device_properties(struct axi_dma_chip *chip)
+{
+	struct device *dev = chip->dev;
+	u32 tmp, carr[DMAC_MAX_CHANNELS];
+	int ret;
+
+	ret = device_property_read_u32(dev, "dma-channels", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_CHANNELS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_channels = tmp;
+
+	ret = device_property_read_u32(dev, "snps,dma-masters", &tmp);
+	if (ret)
+		return ret;
+	if (tmp == 0 || tmp > DMAC_MAX_MASTERS)
+		return -EINVAL;
+
+	chip->dw->hdata->nr_masters = tmp;
+
+	ret = device_property_read_u32(dev, "snps,data-width", &tmp);
+	if (ret)
+		return ret;
+	if (tmp > DWAXIDMAC_TRANS_WIDTH_MAX)
+		return -EINVAL;
+
+	chip->dw->hdata->m_data_width = tmp;
+
+	ret = device_property_read_u32_array(dev, "snps,block-size", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] == 0 || carr[tmp] > DMAC_MAX_BLK_SIZE)
+			return -EINVAL;
+
+		chip->dw->hdata->block_size[tmp] = carr[tmp];
+	}
+
+	ret = device_property_read_u32_array(dev, "snps,priority", carr,
+					     chip->dw->hdata->nr_channels);
+	if (ret)
+		return ret;
+	/* Priority value must be programmed within [0:nr_channels-1] range */
+	for (tmp = 0; tmp < chip->dw->hdata->nr_channels; tmp++) {
+		if (carr[tmp] >= chip->dw->hdata->nr_channels)
+			return -EINVAL;
+
+		chip->dw->hdata->priority[tmp] = carr[tmp];
+	}
+
+	/* axi-max-burst-len is optional property */
+	ret = device_property_read_u32(dev, "snps,axi-max-burst-len", &tmp);
+	if (!ret) {
+		if (tmp > DWAXIDMAC_ARWLEN_MAX + 1)
+			return -EINVAL;
+		if (tmp < DWAXIDMAC_ARWLEN_MIN + 1)
+			return -EINVAL;
+
+		chip->dw->hdata->restrict_axi_burst_len = true;
+		chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+	}
+
+	return 0;
+}
+
+static int dw_probe(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip;
+	struct resource *mem;
+	struct dw_axi_dma *dw;
+	struct dw_axi_dma_hcfg *hdata;
+	u32 i;
+	int ret;
+
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	dw = devm_kzalloc(&pdev->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	hdata = devm_kzalloc(&pdev->dev, sizeof(*hdata), GFP_KERNEL);
+	if (!hdata)
+		return -ENOMEM;
+
+	chip->dw = dw;
+	chip->dev = &pdev->dev;
+	chip->dw->hdata = hdata;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(chip->dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	chip->core_clk = devm_clk_get(chip->dev, "core-clk");
+	if (IS_ERR(chip->core_clk))
+		return PTR_ERR(chip->core_clk);
+
+	chip->cfgr_clk = devm_clk_get(chip->dev, "cfgr-clk");
+	if (IS_ERR(chip->cfgr_clk))
+		return PTR_ERR(chip->cfgr_clk);
+
+	ret = parse_device_properties(chip);
+	if (ret)
+		return ret;
+
+	dw->chan = devm_kcalloc(chip->dev, hdata->nr_channels,
+				sizeof(*dw->chan), GFP_KERNEL);
+	if (!dw->chan)
+		return -ENOMEM;
+
+	ret = devm_request_irq(chip->dev, chip->irq, dw_axi_dma_interrupt,
+			       IRQF_SHARED, KBUILD_MODNAME, chip);
+	if (ret)
+		return ret;
+
+	/* Lli address must be aligned to a 64-byte boundary */
+	dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev,
+					 sizeof(struct axi_dma_desc), 64, 0);
+	if (!dw->desc_pool) {
+		dev_err(chip->dev, "No memory for descriptors dma pool\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < hdata->nr_channels; i++) {
+		struct axi_dma_chan *chan = &dw->chan[i];
+
+		chan->chip = chip;
+		chan->id = i;
+		chan->chan_regs = chip->regs + COMMON_REG_LEN + i * CHAN_REG_LEN;
+		atomic_set(&chan->descs_allocated, 0);
+
+		chan->vc.desc_free = vchan_desc_put;
+		vchan_init(&chan->vc, &dw->dma);
+	}
+
+	/* Set capabilities */
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+	/* DMA capabilities */
+	dw->dma.chancnt = hdata->nr_channels;
+	dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
+	dw->dma.directions = BIT(DMA_MEM_TO_MEM);
+	dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	dw->dma.dev = chip->dev;
+	dw->dma.device_tx_status = dma_chan_tx_status;
+	dw->dma.device_issue_pending = dma_chan_issue_pending;
+	dw->dma.device_terminate_all = dma_chan_terminate_all;
+	dw->dma.device_pause = dma_chan_pause;
+	dw->dma.device_resume = dma_chan_resume;
+
+	dw->dma.device_alloc_chan_resources = dma_chan_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+
+	platform_set_drvdata(pdev, chip);
+
+	pm_runtime_enable(chip->dev);
+
+	/*
+	 * We can't just call pm_runtime_get here instead of
+	 * pm_runtime_get_noresume + axi_dma_resume because we need
+	 * driver to work also without Runtime PM.
+	 */
+	pm_runtime_get_noresume(chip->dev);
+	ret = axi_dma_resume(chip);
+	if (ret < 0)
+		goto err_pm_disable;
+
+	axi_dma_hw_init(chip);
+
+	pm_runtime_put(chip->dev);
+
+	ret = dma_async_device_register(&dw->dma);
+	if (ret)
+		goto err_pm_disable;
+
+	dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
+		 dw->hdata->nr_channels);
+
+	return 0;
+
+err_pm_disable:
+	pm_runtime_disable(chip->dev);
+
+	return ret;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+	struct axi_dma_chip *chip = platform_get_drvdata(pdev);
+	struct dw_axi_dma *dw = chip->dw;
+	struct axi_dma_chan *chan, *_chan;
+	u32 i;
+
+	/* Enable clk before accessing to registers */
+	clk_prepare_enable(chip->cfgr_clk);
+	clk_prepare_enable(chip->core_clk);
+	axi_dma_irq_disable(chip);
+	for (i = 0; i < dw->hdata->nr_channels; i++) {
+		axi_chan_disable(&chip->dw->chan[i]);
+		axi_chan_irq_disable(&chip->dw->chan[i], DWAXIDMAC_IRQ_ALL);
+	}
+	axi_dma_disable(chip);
+
+	pm_runtime_disable(chip->dev);
+	axi_dma_suspend(chip);
+
+	devm_free_irq(chip->dev, chip->irq, chip);
+
+	list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
+			vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		tasklet_kill(&chan->vc.task);
+	}
+
+	dma_async_device_unregister(&dw->dma);
+
+	return 0;
+}
+
+static const struct dev_pm_ops dw_axi_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(axi_dma_runtime_suspend, axi_dma_runtime_resume, NULL)
+};
+
+static const struct of_device_id dw_dma_of_id_table[] = {
+	{ .compatible = "snps,axi-dma-1.01a" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+
+static struct platform_driver dw_driver = {
+	.probe		= dw_probe,
+	.remove		= dw_remove,
+	.driver = {
+		.name	= KBUILD_MODNAME,
+		.of_match_table = of_match_ptr(dw_dma_of_id_table),
+		.pm = &dw_axi_dma_pm_ops,
+	},
+};
+module_platform_driver(dw_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare AXI DMA Controller platform driver");
+MODULE_AUTHOR("Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>");
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
new file mode 100644
index 0000000..f8888dc
--- /dev/null
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2017-2018 Synopsys, Inc. (www.synopsys.com)
+
+/*
+ * Synopsys DesignWare AXI DMA Controller driver.
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+
+#ifndef _AXI_DMA_PLATFORM_H
+#define _AXI_DMA_PLATFORM_H
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/types.h>
+
+#include "../virt-dma.h"
+
+#define DMAC_MAX_CHANNELS	8
+#define DMAC_MAX_MASTERS	2
+#define DMAC_MAX_BLK_SIZE	0x200000
+
+struct dw_axi_dma_hcfg {
+	u32	nr_channels;
+	u32	nr_masters;
+	u32	m_data_width;
+	u32	block_size[DMAC_MAX_CHANNELS];
+	u32	priority[DMAC_MAX_CHANNELS];
+	/* maximum supported axi burst length */
+	u32	axi_rw_burst_len;
+	bool	restrict_axi_burst_len;
+};
+
+struct axi_dma_chan {
+	struct axi_dma_chip		*chip;
+	void __iomem			*chan_regs;
+	u8				id;
+	atomic_t			descs_allocated;
+
+	struct virt_dma_chan		vc;
+
+	/* these other elements are all protected by vc.lock */
+	bool				is_paused;
+};
+
+struct dw_axi_dma {
+	struct dma_device	dma;
+	struct dw_axi_dma_hcfg	*hdata;
+	struct dma_pool		*desc_pool;
+
+	/* channels */
+	struct axi_dma_chan	*chan;
+};
+
+struct axi_dma_chip {
+	struct device		*dev;
+	int			irq;
+	void __iomem		*regs;
+	struct clk		*core_clk;
+	struct clk		*cfgr_clk;
+	struct dw_axi_dma	*dw;
+};
+
+/* LLI == Linked List Item */
+struct __packed axi_dma_lli {
+	__le64		sar;
+	__le64		dar;
+	__le32		block_ts_lo;
+	__le32		block_ts_hi;
+	__le64		llp;
+	__le32		ctl_lo;
+	__le32		ctl_hi;
+	__le32		sstat;
+	__le32		dstat;
+	__le32		status_lo;
+	__le32		ststus_hi;
+	__le32		reserved_lo;
+	__le32		reserved_hi;
+};
+
+struct axi_dma_desc {
+	struct axi_dma_lli		lli;
+
+	struct virt_dma_desc		vd;
+	struct axi_dma_chan		*chan;
+	struct list_head		xfer_list;
+};
+
+static inline struct device *dchan2dev(struct dma_chan *dchan)
+{
+	return &dchan->dev->device;
+}
+
+static inline struct device *chan2dev(struct axi_dma_chan *chan)
+{
+	return &chan->vc.chan.dev->device;
+}
+
+static inline struct axi_dma_desc *vd_to_axi_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct axi_dma_desc, vd);
+}
+
+static inline struct axi_dma_chan *vc_to_axi_dma_chan(struct virt_dma_chan *vc)
+{
+	return container_of(vc, struct axi_dma_chan, vc);
+}
+
+static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
+{
+	return vc_to_axi_dma_chan(to_virt_chan(dchan));
+}
+
+
+#define COMMON_REG_LEN		0x100
+#define CHAN_REG_LEN		0x100
+
+/* Common registers offset */
+#define DMAC_ID			0x000 /* R DMAC ID */
+#define DMAC_COMPVER		0x008 /* R DMAC Component Version */
+#define DMAC_CFG		0x010 /* R/W DMAC Configuration */
+#define DMAC_CHEN		0x018 /* R/W DMAC Channel Enable */
+#define DMAC_CHEN_L		0x018 /* R/W DMAC Channel Enable 00-31 */
+#define DMAC_CHEN_H		0x01C /* R/W DMAC Channel Enable 32-63 */
+#define DMAC_INTSTATUS		0x030 /* R DMAC Interrupt Status */
+#define DMAC_COMMON_INTCLEAR	0x038 /* W DMAC Interrupt Clear */
+#define DMAC_COMMON_INTSTATUS_ENA 0x040 /* R DMAC Interrupt Status Enable */
+#define DMAC_COMMON_INTSIGNAL_ENA 0x048 /* R/W DMAC Interrupt Signal Enable */
+#define DMAC_COMMON_INTSTATUS	0x050 /* R DMAC Interrupt Status */
+#define DMAC_RESET		0x058 /* R DMAC Reset Register1 */
+
+/* DMA channel registers offset */
+#define CH_SAR			0x000 /* R/W Chan Source Address */
+#define CH_DAR			0x008 /* R/W Chan Destination Address */
+#define CH_BLOCK_TS		0x010 /* R/W Chan Block Transfer Size */
+#define CH_CTL			0x018 /* R/W Chan Control */
+#define CH_CTL_L		0x018 /* R/W Chan Control 00-31 */
+#define CH_CTL_H		0x01C /* R/W Chan Control 32-63 */
+#define CH_CFG			0x020 /* R/W Chan Configuration */
+#define CH_CFG_L		0x020 /* R/W Chan Configuration 00-31 */
+#define CH_CFG_H		0x024 /* R/W Chan Configuration 32-63 */
+#define CH_LLP			0x028 /* R/W Chan Linked List Pointer */
+#define CH_STATUS		0x030 /* R Chan Status */
+#define CH_SWHSSRC		0x038 /* R/W Chan SW Handshake Source */
+#define CH_SWHSDST		0x040 /* R/W Chan SW Handshake Destination */
+#define CH_BLK_TFR_RESUMEREQ	0x048 /* W Chan Block Transfer Resume Req */
+#define CH_AXI_ID		0x050 /* R/W Chan AXI ID */
+#define CH_AXI_QOS		0x058 /* R/W Chan AXI QOS */
+#define CH_SSTAT		0x060 /* R Chan Source Status */
+#define CH_DSTAT		0x068 /* R Chan Destination Status */
+#define CH_SSTATAR		0x070 /* R/W Chan Source Status Fetch Addr */
+#define CH_DSTATAR		0x078 /* R/W Chan Destination Status Fetch Addr */
+#define CH_INTSTATUS_ENA	0x080 /* R/W Chan Interrupt Status Enable */
+#define CH_INTSTATUS		0x088 /* R/W Chan Interrupt Status */
+#define CH_INTSIGNAL_ENA	0x090 /* R/W Chan Interrupt Signal Enable */
+#define CH_INTCLEAR		0x098 /* W Chan Interrupt Clear */
+
+
+/* DMAC_CFG */
+#define DMAC_EN_POS			0
+#define DMAC_EN_MASK			BIT(DMAC_EN_POS)
+
+#define INT_EN_POS			1
+#define INT_EN_MASK			BIT(INT_EN_POS)
+
+#define DMAC_CHAN_EN_SHIFT		0
+#define DMAC_CHAN_EN_WE_SHIFT		8
+
+#define DMAC_CHAN_SUSP_SHIFT		16
+#define DMAC_CHAN_SUSP_WE_SHIFT		24
+
+/* CH_CTL_H */
+#define CH_CTL_H_ARLEN_EN		BIT(6)
+#define CH_CTL_H_ARLEN_POS		7
+#define CH_CTL_H_AWLEN_EN		BIT(15)
+#define CH_CTL_H_AWLEN_POS		16
+
+enum {
+	DWAXIDMAC_ARWLEN_1		= 0,
+	DWAXIDMAC_ARWLEN_2		= 1,
+	DWAXIDMAC_ARWLEN_4		= 3,
+	DWAXIDMAC_ARWLEN_8		= 7,
+	DWAXIDMAC_ARWLEN_16		= 15,
+	DWAXIDMAC_ARWLEN_32		= 31,
+	DWAXIDMAC_ARWLEN_64		= 63,
+	DWAXIDMAC_ARWLEN_128		= 127,
+	DWAXIDMAC_ARWLEN_256		= 255,
+	DWAXIDMAC_ARWLEN_MIN		= DWAXIDMAC_ARWLEN_1,
+	DWAXIDMAC_ARWLEN_MAX		= DWAXIDMAC_ARWLEN_256
+};
+
+#define CH_CTL_H_LLI_LAST		BIT(30)
+#define CH_CTL_H_LLI_VALID		BIT(31)
+
+/* CH_CTL_L */
+#define CH_CTL_L_LAST_WRITE_EN		BIT(30)
+
+#define CH_CTL_L_DST_MSIZE_POS		18
+#define CH_CTL_L_SRC_MSIZE_POS		14
+
+enum {
+	DWAXIDMAC_BURST_TRANS_LEN_1	= 0,
+	DWAXIDMAC_BURST_TRANS_LEN_4,
+	DWAXIDMAC_BURST_TRANS_LEN_8,
+	DWAXIDMAC_BURST_TRANS_LEN_16,
+	DWAXIDMAC_BURST_TRANS_LEN_32,
+	DWAXIDMAC_BURST_TRANS_LEN_64,
+	DWAXIDMAC_BURST_TRANS_LEN_128,
+	DWAXIDMAC_BURST_TRANS_LEN_256,
+	DWAXIDMAC_BURST_TRANS_LEN_512,
+	DWAXIDMAC_BURST_TRANS_LEN_1024
+};
+
+#define CH_CTL_L_DST_WIDTH_POS		11
+#define CH_CTL_L_SRC_WIDTH_POS		8
+
+#define CH_CTL_L_DST_INC_POS		6
+#define CH_CTL_L_SRC_INC_POS		4
+enum {
+	DWAXIDMAC_CH_CTL_L_INC		= 0,
+	DWAXIDMAC_CH_CTL_L_NOINC
+};
+
+#define CH_CTL_L_DST_MAST		BIT(2)
+#define CH_CTL_L_SRC_MAST		BIT(0)
+
+/* CH_CFG_H */
+#define CH_CFG_H_PRIORITY_POS		17
+#define CH_CFG_H_HS_SEL_DST_POS		4
+#define CH_CFG_H_HS_SEL_SRC_POS		3
+enum {
+	DWAXIDMAC_HS_SEL_HW		= 0,
+	DWAXIDMAC_HS_SEL_SW
+};
+
+#define CH_CFG_H_TT_FC_POS		0
+enum {
+	DWAXIDMAC_TT_FC_MEM_TO_MEM_DMAC	= 0,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DMAC,
+	DWAXIDMAC_TT_FC_PER_TO_MEM_SRC,
+	DWAXIDMAC_TT_FC_PER_TO_PER_SRC,
+	DWAXIDMAC_TT_FC_MEM_TO_PER_DST,
+	DWAXIDMAC_TT_FC_PER_TO_PER_DST
+};
+
+/* CH_CFG_L */
+#define CH_CFG_L_DST_MULTBLK_TYPE_POS	2
+#define CH_CFG_L_SRC_MULTBLK_TYPE_POS	0
+enum {
+	DWAXIDMAC_MBLK_TYPE_CONTIGUOUS	= 0,
+	DWAXIDMAC_MBLK_TYPE_RELOAD,
+	DWAXIDMAC_MBLK_TYPE_SHADOW_REG,
+	DWAXIDMAC_MBLK_TYPE_LL
+};
+
+/**
+ * DW AXI DMA channel interrupts
+ *
+ * @DWAXIDMAC_IRQ_NONE: Bitmask of no one interrupt
+ * @DWAXIDMAC_IRQ_BLOCK_TRF: Block transfer complete
+ * @DWAXIDMAC_IRQ_DMA_TRF: Dma transfer complete
+ * @DWAXIDMAC_IRQ_SRC_TRAN: Source transaction complete
+ * @DWAXIDMAC_IRQ_DST_TRAN: Destination transaction complete
+ * @DWAXIDMAC_IRQ_SRC_DEC_ERR: Source decode error
+ * @DWAXIDMAC_IRQ_DST_DEC_ERR: Destination decode error
+ * @DWAXIDMAC_IRQ_SRC_SLV_ERR: Source slave error
+ * @DWAXIDMAC_IRQ_DST_SLV_ERR: Destination slave error
+ * @DWAXIDMAC_IRQ_LLI_RD_DEC_ERR: LLI read decode error
+ * @DWAXIDMAC_IRQ_LLI_WR_DEC_ERR: LLI write decode error
+ * @DWAXIDMAC_IRQ_LLI_RD_SLV_ERR: LLI read slave error
+ * @DWAXIDMAC_IRQ_LLI_WR_SLV_ERR: LLI write slave error
+ * @DWAXIDMAC_IRQ_INVALID_ERR: LLI invalid error or Shadow register error
+ * @DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR: Slave Interface Multiblock type error
+ * @DWAXIDMAC_IRQ_DEC_ERR: Slave Interface decode error
+ * @DWAXIDMAC_IRQ_WR2RO_ERR: Slave Interface write to read only error
+ * @DWAXIDMAC_IRQ_RD2RWO_ERR: Slave Interface read to write only error
+ * @DWAXIDMAC_IRQ_WRONCHEN_ERR: Slave Interface write to channel error
+ * @DWAXIDMAC_IRQ_SHADOWREG_ERR: Slave Interface shadow reg error
+ * @DWAXIDMAC_IRQ_WRONHOLD_ERR: Slave Interface hold error
+ * @DWAXIDMAC_IRQ_LOCK_CLEARED: Lock Cleared Status
+ * @DWAXIDMAC_IRQ_SRC_SUSPENDED: Source Suspended Status
+ * @DWAXIDMAC_IRQ_SUSPENDED: Channel Suspended Status
+ * @DWAXIDMAC_IRQ_DISABLED: Channel Disabled Status
+ * @DWAXIDMAC_IRQ_ABORTED: Channel Aborted Status
+ * @DWAXIDMAC_IRQ_ALL_ERR: Bitmask of all error interrupts
+ * @DWAXIDMAC_IRQ_ALL: Bitmask of all interrupts
+ */
+enum {
+	DWAXIDMAC_IRQ_NONE		= 0,
+	DWAXIDMAC_IRQ_BLOCK_TRF		= BIT(0),
+	DWAXIDMAC_IRQ_DMA_TRF		= BIT(1),
+	DWAXIDMAC_IRQ_SRC_TRAN		= BIT(3),
+	DWAXIDMAC_IRQ_DST_TRAN		= BIT(4),
+	DWAXIDMAC_IRQ_SRC_DEC_ERR	= BIT(5),
+	DWAXIDMAC_IRQ_DST_DEC_ERR	= BIT(6),
+	DWAXIDMAC_IRQ_SRC_SLV_ERR	= BIT(7),
+	DWAXIDMAC_IRQ_DST_SLV_ERR	= BIT(8),
+	DWAXIDMAC_IRQ_LLI_RD_DEC_ERR	= BIT(9),
+	DWAXIDMAC_IRQ_LLI_WR_DEC_ERR	= BIT(10),
+	DWAXIDMAC_IRQ_LLI_RD_SLV_ERR	= BIT(11),
+	DWAXIDMAC_IRQ_LLI_WR_SLV_ERR	= BIT(12),
+	DWAXIDMAC_IRQ_INVALID_ERR	= BIT(13),
+	DWAXIDMAC_IRQ_MULTIBLKTYPE_ERR	= BIT(14),
+	DWAXIDMAC_IRQ_DEC_ERR		= BIT(16),
+	DWAXIDMAC_IRQ_WR2RO_ERR		= BIT(17),
+	DWAXIDMAC_IRQ_RD2RWO_ERR	= BIT(18),
+	DWAXIDMAC_IRQ_WRONCHEN_ERR	= BIT(19),
+	DWAXIDMAC_IRQ_SHADOWREG_ERR	= BIT(20),
+	DWAXIDMAC_IRQ_WRONHOLD_ERR	= BIT(21),
+	DWAXIDMAC_IRQ_LOCK_CLEARED	= BIT(27),
+	DWAXIDMAC_IRQ_SRC_SUSPENDED	= BIT(28),
+	DWAXIDMAC_IRQ_SUSPENDED		= BIT(29),
+	DWAXIDMAC_IRQ_DISABLED		= BIT(30),
+	DWAXIDMAC_IRQ_ABORTED		= BIT(31),
+	DWAXIDMAC_IRQ_ALL_ERR		= (GENMASK(21, 16) | GENMASK(14, 5)),
+	DWAXIDMAC_IRQ_ALL		= GENMASK(31, 0)
+};
+
+enum {
+	DWAXIDMAC_TRANS_WIDTH_8		= 0,
+	DWAXIDMAC_TRANS_WIDTH_16,
+	DWAXIDMAC_TRANS_WIDTH_32,
+	DWAXIDMAC_TRANS_WIDTH_64,
+	DWAXIDMAC_TRANS_WIDTH_128,
+	DWAXIDMAC_TRANS_WIDTH_256,
+	DWAXIDMAC_TRANS_WIDTH_512,
+	DWAXIDMAC_TRANS_WIDTH_MAX	= DWAXIDMAC_TRANS_WIDTH_512
+};
+
+#endif /* _AXI_DMA_PLATFORM_H */
diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
new file mode 100644
index 0000000..04b9728
--- /dev/null
+++ b/drivers/dma/dw/Kconfig
@@ -0,0 +1,23 @@
+#
+# DMA engine configuration for dw
+#
+
+config DW_DMAC_CORE
+	tristate
+	select DMA_ENGINE
+
+config DW_DMAC
+	tristate "Synopsys DesignWare AHB DMA platform driver"
+	select DW_DMAC_CORE
+	help
+	  Support the Synopsys DesignWare AHB DMA controller. This
+	  can be integrated in chips such as the Intel Cherrytrail.
+
+config DW_DMAC_PCI
+	tristate "Synopsys DesignWare AHB DMA PCI driver"
+	depends on PCI
+	select DW_DMAC_CORE
+	help
+	  Support the Synopsys DesignWare AHB DMA controller on the
+	  platforms that enumerate it as a PCI device. For example,
+	  Intel Medfield has integrated this GPDMA controller.
diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile
new file mode 100644
index 0000000..2b949c2
--- /dev/null
+++ b/drivers/dma/dw/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_DW_DMAC_CORE)	+= dw_dmac_core.o
+dw_dmac_core-objs	:= core.o
+
+obj-$(CONFIG_DW_DMAC)		+= dw_dmac.o
+dw_dmac-objs		:= platform.o
+
+obj-$(CONFIG_DW_DMAC_PCI)	+= dw_dmac_pci.o
+dw_dmac_pci-objs	:= pci.o
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
new file mode 100644
index 0000000..0f389e0
--- /dev/null
+++ b/drivers/dma/dw/core.c
@@ -0,0 +1,1439 @@
+/*
+ * Core driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+#include "internal.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more).  See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has been tested with the Atmel AT32AP7000, which does not
+ * support descriptor writeback.
+ */
+
+#define DWC_DEFAULT_CTLLO(_chan) ({				\
+		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
+		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
+		bool _is_slave = is_slave_direction(_dwc->direction);	\
+		u8 _smsize = _is_slave ? _sconfig->src_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+		u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :	\
+			DW_DMA_MSIZE_16;			\
+		u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ?		\
+			_dwc->dws.p_master : _dwc->dws.m_master;	\
+		u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ?		\
+			_dwc->dws.p_master : _dwc->dws.m_master;	\
+								\
+		(DWC_CTLL_DST_MSIZE(_dmsize)			\
+		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
+		 | DWC_CTLL_LLP_D_EN				\
+		 | DWC_CTLL_LLP_S_EN				\
+		 | DWC_CTLL_DMS(_dms)				\
+		 | DWC_CTLL_SMS(_sms));				\
+	})
+
+/* The set of bus widths supported by the DMA controller */
+#define DW_DMA_BUSWIDTHS			  \
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED)	| \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)		| \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES)		| \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/*----------------------------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+	return to_dw_desc(dwc->active_list.next);
+}
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dw_desc		*desc = txd_to_dw_desc(tx);
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	/*
+	 * REVISIT: We should attempt to chain as many descriptors as
+	 * possible, perhaps even appending to those already submitted
+	 * for DMA. But this is hard to do in a race-free manner.
+	 */
+
+	list_add_tail(&desc->desc_node, &dwc->queue);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+	dev_vdbg(chan2dev(tx->chan), "%s: queued %u\n",
+		 __func__, desc->txd.cookie);
+
+	return cookie;
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_desc *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(dw->desc_pool, GFP_ATOMIC, &phys);
+	if (!desc)
+		return NULL;
+
+	dwc->descs_allocated++;
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->txd, &dwc->chan);
+	desc->txd.tx_submit = dwc_tx_submit;
+	desc->txd.flags = DMA_CTRL_ACK;
+	desc->txd.phys = phys;
+	return desc;
+}
+
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	struct dw_desc *child, *_next;
+
+	if (unlikely(!desc))
+		return;
+
+	list_for_each_entry_safe(child, _next, &desc->tx_list, desc_node) {
+		list_del(&child->desc_node);
+		dma_pool_free(dw->desc_pool, child, child->txd.phys);
+		dwc->descs_allocated--;
+	}
+
+	dma_pool_free(dw->desc_pool, desc, desc->txd.phys);
+	dwc->descs_allocated--;
+}
+
+static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc)
+{
+	u32 cfghi = 0;
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+	/* Low 4 bits of the request lines */
+	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
+	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
+
+	/* Request line extension (2 bits) */
+	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
+	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc)
+{
+	u32 cfghi = DWC_CFGH_FIFO_MODE;
+	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+	bool hs_polarity = dwc->dws.hs_polarity;
+
+	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+
+	/* Set polarity of handshake interface */
+	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dwc_initialize(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
+		return;
+
+	if (dw->pdata->is_idma32)
+		dwc_initialize_chan_idma32(dwc);
+	else
+		dwc_initialize_chan_dw(dwc);
+
+	/* Enable interrupts */
+	channel_set_bit(dw, MASK.XFER, dwc->mask);
+	channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+	set_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+}
+
+/*----------------------------------------------------------------------*/
+
+static inline void dwc_dump_chan_regs(struct dw_dma_chan *dwc)
+{
+	dev_err(chan2dev(&dwc->chan),
+		"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+		channel_readl(dwc, SAR),
+		channel_readl(dwc, DAR),
+		channel_readl(dwc, LLP),
+		channel_readl(dwc, CTL_HI),
+		channel_readl(dwc, CTL_LO));
+}
+
+static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+}
+
+static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes,
+			  unsigned int width, size_t *len)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	u32 block;
+
+	/* Always in bytes for iDMA 32-bit */
+	if (dw->pdata->is_idma32)
+		width = 0;
+
+	if ((bytes >> width) > dwc->block_size) {
+		block = dwc->block_size;
+		*len = block << width;
+	} else {
+		block = bytes >> width;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+	if (dw->pdata->is_idma32)
+		return IDMA32C_CTLH_BLOCK_TS(block);
+
+	return DWC_CTLH_BLOCK_TS(block) << width;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Perform single block transfer */
+static inline void dwc_do_single_block(struct dw_dma_chan *dwc,
+				       struct dw_desc *desc)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	u32		ctllo;
+
+	/*
+	 * Software emulation of LLP mode relies on interrupts to continue
+	 * multi block transfer.
+	 */
+	ctllo = lli_read(desc, ctllo) | DWC_CTLL_INT_EN;
+
+	channel_writel(dwc, SAR, lli_read(desc, sar));
+	channel_writel(dwc, DAR, lli_read(desc, dar));
+	channel_writel(dwc, CTL_LO, ctllo);
+	channel_writel(dwc, CTL_HI, lli_read(desc, ctlhi));
+	channel_set_bit(dw, CH_EN, dwc->mask);
+
+	/* Move pointer to next descriptor */
+	dwc->tx_node_active = dwc->tx_node_active->next;
+}
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+	u8		lms = DWC_LLP_LMS(dwc->dws.m_master);
+	unsigned long	was_soft_llp;
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(chan2dev(&dwc->chan),
+			"%s: BUG: Attempted to start non-idle channel\n",
+			__func__);
+		dwc_dump_chan_regs(dwc);
+
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	if (dwc->nollp) {
+		was_soft_llp = test_and_set_bit(DW_DMA_IS_SOFT_LLP,
+						&dwc->flags);
+		if (was_soft_llp) {
+			dev_err(chan2dev(&dwc->chan),
+				"BUG: Attempted to start new LLP transfer inside ongoing one\n");
+			return;
+		}
+
+		dwc_initialize(dwc);
+
+		first->residue = first->total_len;
+		dwc->tx_node_active = &first->tx_list;
+
+		/* Submit first block */
+		dwc_do_single_block(dwc, first);
+
+		return;
+	}
+
+	dwc_initialize(dwc);
+
+	channel_writel(dwc, LLP, first->txd.phys | lms);
+	channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	channel_writel(dwc, CTL_HI, 0);
+	channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+static void dwc_dostart_first_queued(struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc;
+
+	if (list_empty(&dwc->queue))
+		return;
+
+	list_move(dwc->queue.next, &dwc->active_list);
+	desc = dwc_first_active(dwc);
+	dev_vdbg(chan2dev(&dwc->chan), "%s: started %u\n", __func__, desc->txd.cookie);
+	dwc_dostart(dwc, desc);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc,
+		bool callback_required)
+{
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct dw_desc			*child;
+	unsigned long			flags;
+	struct dmaengine_desc_callback	cb;
+
+	dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	dma_cookie_complete(txd);
+	if (callback_required)
+		dmaengine_desc_get_callback(txd, &cb);
+	else
+		memset(&cb, 0, sizeof(cb));
+
+	/* async_tx_ack */
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		async_tx_ack(&child->txd);
+	async_tx_ack(&desc->txd);
+	dwc_desc_put(dwc, desc);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc, *_desc;
+	LIST_HEAD(list);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(chan2dev(&dwc->chan),
+			"BUG: XFER bit set, but channel not idle!\n");
+
+		/* Try to continue after resetting the channel... */
+		dwc_chan_disable(dw, dwc);
+	}
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	list_splice_init(&dwc->active_list, &list);
+	dwc_dostart_first_queued(dwc);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		dwc_descriptor_complete(dwc, desc, true);
+}
+
+/* Returns how many bytes were already received from source */
+static inline u32 dwc_get_sent(struct dw_dma_chan *dwc)
+{
+	u32 ctlhi = channel_readl(dwc, CTL_HI);
+	u32 ctllo = channel_readl(dwc, CTL_LO);
+
+	return block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	dma_addr_t llp;
+	struct dw_desc *desc, *_desc;
+	struct dw_desc *child;
+	u32 status_xfer;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	llp = channel_readl(dwc, LLP);
+	status_xfer = dma_readl(dw, RAW.XFER);
+
+	if (status_xfer & dwc->mask) {
+		/* Everything we've submitted is done */
+		dma_writel(dw, CLEAR.XFER, dwc->mask);
+
+		if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+			struct list_head *head, *active = dwc->tx_node_active;
+
+			/*
+			 * We are inside first active descriptor.
+			 * Otherwise something is really wrong.
+			 */
+			desc = dwc_first_active(dwc);
+
+			head = &desc->tx_list;
+			if (active != head) {
+				/* Update residue to reflect last sent descriptor */
+				if (active == head->next)
+					desc->residue -= desc->len;
+				else
+					desc->residue -= to_dw_desc(active->prev)->len;
+
+				child = to_dw_desc(active);
+
+				/* Submit next block */
+				dwc_do_single_block(dwc, child);
+
+				spin_unlock_irqrestore(&dwc->lock, flags);
+				return;
+			}
+
+			/* We are done here */
+			clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+		}
+
+		spin_unlock_irqrestore(&dwc->lock, flags);
+
+		dwc_complete_all(dw, dwc);
+		return;
+	}
+
+	if (list_empty(&dwc->active_list)) {
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		return;
+	}
+
+	if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags)) {
+		dev_vdbg(chan2dev(&dwc->chan), "%s: soft LLP mode\n", __func__);
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		return;
+	}
+
+	dev_vdbg(chan2dev(&dwc->chan), "%s: llp=%pad\n", __func__, &llp);
+
+	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+		/* Initial residue value */
+		desc->residue = desc->total_len;
+
+		/* Check first descriptors addr */
+		if (desc->txd.phys == DWC_LLP_LOC(llp)) {
+			spin_unlock_irqrestore(&dwc->lock, flags);
+			return;
+		}
+
+		/* Check first descriptors llp */
+		if (lli_read(desc, llp) == llp) {
+			/* This one is currently in progress */
+			desc->residue -= dwc_get_sent(dwc);
+			spin_unlock_irqrestore(&dwc->lock, flags);
+			return;
+		}
+
+		desc->residue -= desc->len;
+		list_for_each_entry(child, &desc->tx_list, desc_node) {
+			if (lli_read(child, llp) == llp) {
+				/* Currently in progress */
+				desc->residue -= dwc_get_sent(dwc);
+				spin_unlock_irqrestore(&dwc->lock, flags);
+				return;
+			}
+			desc->residue -= child->len;
+		}
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this one must be done.
+		 */
+		spin_unlock_irqrestore(&dwc->lock, flags);
+		dwc_descriptor_complete(dwc, desc, true);
+		spin_lock_irqsave(&dwc->lock, flags);
+	}
+
+	dev_err(chan2dev(&dwc->chan),
+		"BUG: All descriptors done, but channel not idle!\n");
+
+	/* Try to continue after resetting the channel... */
+	dwc_chan_disable(dw, dwc);
+
+	dwc_dostart_first_queued(dwc);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
+static inline void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	dev_crit(chan2dev(&dwc->chan), "  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+		 lli_read(desc, sar),
+		 lli_read(desc, dar),
+		 lli_read(desc, llp),
+		 lli_read(desc, ctlhi),
+		 lli_read(desc, ctllo));
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *bad_desc;
+	struct dw_desc *child;
+	unsigned long flags;
+
+	dwc_scan_descriptors(dw, dwc);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * borked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	bad_desc = dwc_first_active(dwc);
+	list_del_init(&bad_desc->desc_node);
+	list_move(dwc->queue.next, dwc->active_list.prev);
+
+	/* Clear the error flag and try to restart the controller */
+	dma_writel(dw, CLEAR.ERROR, dwc->mask);
+	if (!list_empty(&dwc->active_list))
+		dwc_dostart(dwc, dwc_first_active(dwc));
+
+	/*
+	 * WARN may seem harsh, but since this only happens
+	 * when someone submits a bad physical address in a
+	 * descriptor, we should consider ourselves lucky that the
+	 * controller flagged an error instead of scribbling over
+	 * random memory locations.
+	 */
+	dev_WARN(chan2dev(&dwc->chan), "Bad descriptor submitted for DMA!\n"
+				       "  cookie: %d\n", bad_desc->txd.cookie);
+	dwc_dump_lli(dwc, bad_desc);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		dwc_dump_lli(dwc, child);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	/* Pretend the descriptor completed successfully */
+	dwc_descriptor_complete(dwc, bad_desc, true);
+}
+
+static void dw_dma_tasklet(unsigned long data)
+{
+	struct dw_dma *dw = (struct dw_dma *)data;
+	struct dw_dma_chan *dwc;
+	u32 status_xfer;
+	u32 status_err;
+	unsigned int i;
+
+	status_xfer = dma_readl(dw, RAW.XFER);
+	status_err = dma_readl(dw, RAW.ERROR);
+
+	dev_vdbg(dw->dma.dev, "%s: status_err=%x\n", __func__, status_err);
+
+	for (i = 0; i < dw->dma.chancnt; i++) {
+		dwc = &dw->chan[i];
+		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
+			dev_vdbg(dw->dma.dev, "Cyclic xfer is not implemented\n");
+		else if (status_err & (1 << i))
+			dwc_handle_error(dw, dwc);
+		else if (status_xfer & (1 << i))
+			dwc_scan_descriptors(dw, dwc);
+	}
+
+	/* Re-enable interrupts */
+	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+	struct dw_dma *dw = dev_id;
+	u32 status;
+
+	/* Check if we have any interrupt from the DMAC which is not in use */
+	if (!dw->in_use)
+		return IRQ_NONE;
+
+	status = dma_readl(dw, STATUS_INT);
+	dev_vdbg(dw->dma.dev, "%s: status=0x%x\n", __func__, status);
+
+	/* Check if we have any interrupt from the DMAC */
+	if (!status)
+		return IRQ_NONE;
+
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	status = dma_readl(dw, STATUS_INT);
+	if (status) {
+		dev_err(dw->dma.dev,
+			"BUG: Unexpected interrupts pending: 0x%x\n",
+			status);
+
+		/* Try to recover */
+		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+	}
+
+	tasklet_schedule(&dw->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc;
+	struct dw_desc		*first;
+	struct dw_desc		*prev;
+	size_t			xfer_count;
+	size_t			offset;
+	u8			m_master = dwc->dws.m_master;
+	unsigned int		src_width;
+	unsigned int		dst_width;
+	unsigned int		data_width = dw->pdata->data_width[m_master];
+	u32			ctllo;
+	u8			lms = DWC_LLP_LMS(m_master);
+
+	dev_vdbg(chan2dev(chan),
+			"%s: d%pad s%pad l0x%zx f0x%lx\n", __func__,
+			&dest, &src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+		return NULL;
+	}
+
+	dwc->direction = DMA_MEM_TO_MEM;
+
+	src_width = dst_width = __ffs(data_width | src | dest | len);
+
+	ctllo = DWC_DEFAULT_CTLLO(chan)
+			| DWC_CTLL_DST_WIDTH(dst_width)
+			| DWC_CTLL_SRC_WIDTH(src_width)
+			| DWC_CTLL_DST_INC
+			| DWC_CTLL_SRC_INC
+			| DWC_CTLL_FC_M2M;
+	prev = first = NULL;
+
+	for (offset = 0; offset < len; offset += xfer_count) {
+		desc = dwc_desc_get(dwc);
+		if (!desc)
+			goto err_desc_get;
+
+		lli_write(desc, sar, src + offset);
+		lli_write(desc, dar, dest + offset);
+		lli_write(desc, ctllo, ctllo);
+		lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count));
+		desc->len = xfer_count;
+
+		if (!first) {
+			first = desc;
+		} else {
+			lli_write(prev, llp, desc->txd.phys | lms);
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+		prev = desc;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+	prev->lli.llp = 0;
+	lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	first->txd.flags = flags;
+	first->total_len = len;
+
+	return &first->txd;
+
+err_desc_get:
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
+	struct dw_desc		*prev;
+	struct dw_desc		*first;
+	u32			ctllo;
+	u8			m_master = dwc->dws.m_master;
+	u8			lms = DWC_LLP_LMS(m_master);
+	dma_addr_t		reg;
+	unsigned int		reg_width;
+	unsigned int		mem_width;
+	unsigned int		data_width = dw->pdata->data_width[m_master];
+	unsigned int		i;
+	struct scatterlist	*sg;
+	size_t			total_len = 0;
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	if (unlikely(!is_slave_direction(direction) || !sg_len))
+		return NULL;
+
+	dwc->direction = direction;
+
+	prev = first = NULL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		reg_width = __ffs(sconfig->dst_addr_width);
+		reg = sconfig->dst_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_DST_WIDTH(reg_width)
+				| DWC_CTLL_DST_FIX
+				| DWC_CTLL_SRC_INC);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
+			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
+
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len, mem;
+			size_t		dlen;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+
+			mem_width = __ffs(data_width | mem | len);
+
+slave_sg_todev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc)
+				goto err_desc_get;
+
+			lli_write(desc, sar, mem);
+			lli_write(desc, dar, reg);
+			lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen));
+			lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
+			desc->len = dlen;
+
+			if (!first) {
+				first = desc;
+			} else {
+				lli_write(prev, llp, desc->txd.phys | lms);
+				list_add_tail(&desc->desc_node, &first->tx_list);
+			}
+			prev = desc;
+
+			mem += dlen;
+			len -= dlen;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_todev_fill_desc;
+		}
+		break;
+	case DMA_DEV_TO_MEM:
+		reg_width = __ffs(sconfig->src_addr_width);
+		reg = sconfig->src_addr;
+		ctllo = (DWC_DEFAULT_CTLLO(chan)
+				| DWC_CTLL_SRC_WIDTH(reg_width)
+				| DWC_CTLL_DST_INC
+				| DWC_CTLL_SRC_FIX);
+
+		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
+			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
+
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len, mem;
+			size_t		dlen;
+
+			mem = sg_dma_address(sg);
+			len = sg_dma_len(sg);
+
+slave_sg_fromdev_fill_desc:
+			desc = dwc_desc_get(dwc);
+			if (!desc)
+				goto err_desc_get;
+
+			lli_write(desc, sar, reg);
+			lli_write(desc, dar, mem);
+			lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen));
+			mem_width = __ffs(data_width | mem | dlen);
+			lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
+			desc->len = dlen;
+
+			if (!first) {
+				first = desc;
+			} else {
+				lli_write(prev, llp, desc->txd.phys | lms);
+				list_add_tail(&desc->desc_node, &first->tx_list);
+			}
+			prev = desc;
+
+			mem += dlen;
+			len -= dlen;
+			total_len += dlen;
+
+			if (len)
+				goto slave_sg_fromdev_fill_desc;
+		}
+		break;
+	default:
+		return NULL;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		lli_set(prev, ctllo, DWC_CTLL_INT_EN);
+
+	prev->lli.llp = 0;
+	lli_clear(prev, ctllo, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	first->total_len = total_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan),
+		"not enough descriptors available. Direction %d\n", direction);
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+bool dw_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	struct dw_dma_slave *dws = param;
+
+	if (dws->dma_dev != chan->device->dev)
+		return false;
+
+	/* We have to copy data since dws can be temporary storage */
+	memcpy(&dwc->dws, dws, sizeof(struct dw_dma_slave));
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(dw_dma_filter);
+
+static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	struct dma_slave_config *sc = &dwc->dma_sconfig;
+	struct dw_dma *dw = to_dw_dma(chan->device);
+	/*
+	 * Fix sconfig's burst size according to dw_dmac. We need to convert
+	 * them as:
+	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+	 *
+	 * NOTE: burst size 2 is not supported by DesignWare controller.
+	 *       iDMA 32-bit supports it.
+	 */
+	u32 s = dw->pdata->is_idma32 ? 1 : 2;
+
+	/* Check if chan will be configured for slave transfers */
+	if (!is_slave_direction(sconfig->direction))
+		return -EINVAL;
+
+	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
+	dwc->direction = sconfig->direction;
+
+	sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0;
+	sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0;
+
+	return 0;
+}
+
+static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	unsigned int		count = 20;	/* timeout iterations */
+	u32			cfglo;
+
+	cfglo = channel_readl(dwc, CFG_LO);
+	if (dw->pdata->is_idma32) {
+		if (drain)
+			cfglo |= IDMA32C_CFGL_CH_DRAIN;
+		else
+			cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
+	}
+	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
+		udelay(2);
+
+	set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+}
+
+static int dwc_pause(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	dwc_chan_pause(dwc, false);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	return 0;
+}
+
+static inline void dwc_chan_resume(struct dw_dma_chan *dwc)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+
+	clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+}
+
+static int dwc_resume(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
+		dwc_chan_resume(dwc);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	return 0;
+}
+
+static int dwc_terminate_all(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc, *_desc;
+	unsigned long		flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
+
+	dwc_chan_pause(dwc, true);
+
+	dwc_chan_disable(dw, dwc);
+
+	dwc_chan_resume(dwc);
+
+	/* active_list entries will end up before queued entries */
+	list_splice_init(&dwc->queue, &list);
+	list_splice_init(&dwc->active_list, &list);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	/* Flush all pending and queued descriptors */
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		dwc_descriptor_complete(dwc, desc, false);
+
+	return 0;
+}
+
+static struct dw_desc *dwc_find_desc(struct dw_dma_chan *dwc, dma_cookie_t c)
+{
+	struct dw_desc *desc;
+
+	list_for_each_entry(desc, &dwc->active_list, desc_node)
+		if (desc->txd.cookie == c)
+			return desc;
+
+	return NULL;
+}
+
+static u32 dwc_get_residue(struct dw_dma_chan *dwc, dma_cookie_t cookie)
+{
+	struct dw_desc *desc;
+	unsigned long flags;
+	u32 residue;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	desc = dwc_find_desc(dwc, cookie);
+	if (desc) {
+		if (desc == dwc_first_active(dwc)) {
+			residue = desc->residue;
+			if (test_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags) && residue)
+				residue -= dwc_get_sent(dwc);
+		} else {
+			residue = desc->total_len;
+		}
+	} else {
+		residue = 0;
+	}
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+	return residue;
+}
+
+static enum dma_status
+dwc_tx_status(struct dma_chan *chan,
+	      dma_cookie_t cookie,
+	      struct dma_tx_state *txstate)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	enum dma_status		ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	dma_set_residue(txstate, dwc_get_residue(dwc, cookie));
+
+	if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags) && ret == DMA_IN_PROGRESS)
+		return DMA_PAUSED;
+
+	return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	if (list_empty(&dwc->active_list))
+		dwc_dostart_first_queued(dwc);
+	spin_unlock_irqrestore(&dwc->lock, flags);
+}
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Program FIFO size of channels.
+ *
+ * By default full FIFO (512 bytes) is assigned to channel 0. Here we
+ * slice FIFO on equal parts between channels.
+ */
+static void idma32_fifo_partition(struct dw_dma *dw)
+{
+	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
+		    IDMA32C_FP_UPDATE;
+	u64 fifo_partition = 0;
+
+	if (!dw->pdata->is_idma32)
+		return;
+
+	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
+	fifo_partition |= value << 0;
+
+	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
+	fifo_partition |= value << 32;
+
+	/* Program FIFO Partition registers - 64 bytes per channel */
+	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
+	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
+}
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+	unsigned int i;
+
+	dma_writel(dw, CFG, 0);
+
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+		cpu_relax();
+
+	for (i = 0; i < dw->dma.chancnt; i++)
+		clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
+}
+
+static void dw_dma_on(struct dw_dma *dw)
+{
+	dma_writel(dw, CFG, DW_CFG_DMA_EN);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+
+	dev_vdbg(chan2dev(chan), "%s\n", __func__);
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+		return -EIO;
+	}
+
+	dma_cookie_init(chan);
+
+	/*
+	 * NOTE: some controllers may have additional features that we
+	 * need to initialize here, like "scatter-gather" (which
+	 * doesn't mean what you think it means), and status writeback.
+	 */
+
+	/*
+	 * We need controller-specific data to set up slave transfers.
+	 */
+	if (chan->private && !dw_dma_filter(chan, chan->private)) {
+		dev_warn(chan2dev(chan), "Wrong controller-specific data\n");
+		return -EINVAL;
+	}
+
+	/* Enable controller here if needed */
+	if (!dw->in_use)
+		dw_dma_on(dw);
+	dw->in_use |= dwc->mask;
+
+	return 0;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	unsigned long		flags;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "%s: descs allocated=%u\n", __func__,
+			dwc->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&dwc->active_list));
+	BUG_ON(!list_empty(&dwc->queue));
+	BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+	spin_lock_irqsave(&dwc->lock, flags);
+
+	/* Clear custom channel configuration */
+	memset(&dwc->dws, 0, sizeof(struct dw_dma_slave));
+
+	clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
+
+	/* Disable interrupts */
+	channel_clear_bit(dw, MASK.XFER, dwc->mask);
+	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+	spin_unlock_irqrestore(&dwc->lock, flags);
+
+	/* Disable controller in case it was a last user */
+	dw->in_use &= ~dwc->mask;
+	if (!dw->in_use)
+		dw_dma_off(dw);
+
+	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
+}
+
+int dw_dma_probe(struct dw_dma_chip *chip)
+{
+	struct dw_dma_platform_data *pdata;
+	struct dw_dma		*dw;
+	bool			autocfg = false;
+	unsigned int		dw_params;
+	unsigned int		i;
+	int			err;
+
+	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
+	if (!dw->pdata)
+		return -ENOMEM;
+
+	dw->regs = chip->regs;
+	chip->dw = dw;
+
+	pm_runtime_get_sync(chip->dev);
+
+	if (!chip->pdata) {
+		dw_params = dma_readl(dw, DW_PARAMS);
+		dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+
+		autocfg = dw_params >> DW_PARAMS_EN & 1;
+		if (!autocfg) {
+			err = -EINVAL;
+			goto err_pdata;
+		}
+
+		/* Reassign the platform data pointer */
+		pdata = dw->pdata;
+
+		/* Get hardware configuration parameters */
+		pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+		pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+		for (i = 0; i < pdata->nr_masters; i++) {
+			pdata->data_width[i] =
+				4 << (dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3);
+		}
+		pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
+
+		/* Fill platform data with the default values */
+		pdata->is_private = true;
+		pdata->is_memcpy = true;
+		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
+		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
+	} else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+		err = -EINVAL;
+		goto err_pdata;
+	} else {
+		memcpy(dw->pdata, chip->pdata, sizeof(*dw->pdata));
+
+		/* Reassign the platform data pointer */
+		pdata = dw->pdata;
+	}
+
+	dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
+				GFP_KERNEL);
+	if (!dw->chan) {
+		err = -ENOMEM;
+		goto err_pdata;
+	}
+
+	/* Calculate all channel mask before DMA setup */
+	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+	/* Force dma off, just in case */
+	dw_dma_off(dw);
+
+	idma32_fifo_partition(dw);
+
+	/* Device and instance ID for IRQ and DMA pool */
+	if (pdata->is_idma32)
+		snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id);
+	else
+		snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id);
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	dw->desc_pool = dmam_pool_create(dw->name, chip->dev,
+					 sizeof(struct dw_desc), 4, 0);
+	if (!dw->desc_pool) {
+		dev_err(chip->dev, "No memory for descriptors dma pool\n");
+		err = -ENOMEM;
+		goto err_pdata;
+	}
+
+	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+	err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED,
+			  dw->name, dw);
+	if (err)
+		goto err_pdata;
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < pdata->nr_channels; i++) {
+		struct dw_dma_chan	*dwc = &dw->chan[i];
+
+		dwc->chan.device = &dw->dma;
+		dma_cookie_init(&dwc->chan);
+		if (pdata->chan_allocation_order == CHAN_ALLOCATION_ASCENDING)
+			list_add_tail(&dwc->chan.device_node,
+					&dw->dma.channels);
+		else
+			list_add(&dwc->chan.device_node, &dw->dma.channels);
+
+		/* 7 is highest priority & 0 is lowest. */
+		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
+			dwc->priority = pdata->nr_channels - i - 1;
+		else
+			dwc->priority = i;
+
+		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+		spin_lock_init(&dwc->lock);
+		dwc->mask = 1 << i;
+
+		INIT_LIST_HEAD(&dwc->active_list);
+		INIT_LIST_HEAD(&dwc->queue);
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+
+		dwc->direction = DMA_TRANS_NONE;
+
+		/* Hardware configuration */
+		if (autocfg) {
+			unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
+			void __iomem *addr = &__dw_regs(dw)->DWC_PARAMS[r];
+			unsigned int dwc_params = readl(addr);
+
+			dev_dbg(chip->dev, "DWC_PARAMS[%d]: 0x%08x\n", i,
+					   dwc_params);
+
+			/*
+			 * Decode maximum block size for given channel. The
+			 * stored 4 bit value represents blocks from 0x00 for 3
+			 * up to 0x0a for 4095.
+			 */
+			dwc->block_size =
+				(4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+			dwc->nollp =
+				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+		} else {
+			dwc->block_size = pdata->block_size;
+			dwc->nollp = !pdata->multi_block[i];
+		}
+	}
+
+	/* Clear all interrupts on all channels. */
+	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+	/* Set capabilities */
+	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+	if (pdata->is_private)
+		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	if (pdata->is_memcpy)
+		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
+	dw->dma.dev = chip->dev;
+	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+	dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+
+	dw->dma.device_config = dwc_config;
+	dw->dma.device_pause = dwc_pause;
+	dw->dma.device_resume = dwc_resume;
+	dw->dma.device_terminate_all = dwc_terminate_all;
+
+	dw->dma.device_tx_status = dwc_tx_status;
+	dw->dma.device_issue_pending = dwc_issue_pending;
+
+	/* DMA capabilities */
+	dw->dma.src_addr_widths = DW_DMA_BUSWIDTHS;
+	dw->dma.dst_addr_widths = DW_DMA_BUSWIDTHS;
+	dw->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+			     BIT(DMA_MEM_TO_MEM);
+	dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	err = dma_async_device_register(&dw->dma);
+	if (err)
+		goto err_dma_register;
+
+	dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n",
+		 pdata->nr_channels);
+
+	pm_runtime_put_sync_suspend(chip->dev);
+
+	return 0;
+
+err_dma_register:
+	free_irq(chip->irq, dw);
+err_pdata:
+	pm_runtime_put_sync_suspend(chip->dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(dw_dma_probe);
+
+int dw_dma_remove(struct dw_dma_chip *chip)
+{
+	struct dw_dma		*dw = chip->dw;
+	struct dw_dma_chan	*dwc, *_dwc;
+
+	pm_runtime_get_sync(chip->dev);
+
+	dw_dma_off(dw);
+	dma_async_device_unregister(&dw->dma);
+
+	free_irq(chip->irq, dw);
+	tasklet_kill(&dw->tasklet);
+
+	list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+			chan.device_node) {
+		list_del(&dwc->chan.device_node);
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+	}
+
+	pm_runtime_put_sync_suspend(chip->dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_dma_remove);
+
+int dw_dma_disable(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw = chip->dw;
+
+	dw_dma_off(dw);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_dma_disable);
+
+int dw_dma_enable(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw = chip->dw;
+
+	idma32_fifo_partition(dw);
+
+	dw_dma_on(dw);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_dma_enable);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver");
+MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
+MODULE_AUTHOR("Viresh Kumar <vireshk@kernel.org>");
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
new file mode 100644
index 0000000..4143973
--- /dev/null
+++ b/drivers/dma/dw/internal.h
@@ -0,0 +1,23 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _DMA_DW_INTERNAL_H
+#define _DMA_DW_INTERNAL_H
+
+#include <linux/dma/dw.h>
+
+#include "regs.h"
+
+int dw_dma_disable(struct dw_dma_chip *chip);
+int dw_dma_enable(struct dw_dma_chip *chip);
+
+extern bool dw_dma_filter(struct dma_chan *chan, void *param);
+
+#endif /* _DMA_DW_INTERNAL_H */
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
new file mode 100644
index 0000000..7778ed7
--- /dev/null
+++ b/drivers/dma/dw/pci.c
@@ -0,0 +1,149 @@
+/*
+ * PCI driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+
+#include "internal.h"
+
+static struct dw_dma_platform_data mrfld_pdata = {
+	.nr_channels = 8,
+	.is_private = true,
+	.is_memcpy = true,
+	.is_idma32 = true,
+	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+	.chan_priority = CHAN_PRIORITY_ASCENDING,
+	.block_size = 131071,
+	.nr_masters = 1,
+	.data_width = {4},
+};
+
+static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
+{
+	const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+	struct dw_dma_chip *chip;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	ret = pcim_iomap_regions(pdev, 1 << 0, pci_name(pdev));
+	if (ret) {
+		dev_err(&pdev->dev, "I/O memory remapping failed\n");
+		return ret;
+	}
+
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->dev = &pdev->dev;
+	chip->id = pdev->devfn;
+	chip->regs = pcim_iomap_table(pdev)[0];
+	chip->irq = pdev->irq;
+	chip->pdata = pdata;
+
+	ret = dw_dma_probe(chip);
+	if (ret)
+		return ret;
+
+	pci_set_drvdata(pdev, chip);
+
+	return 0;
+}
+
+static void dw_pci_remove(struct pci_dev *pdev)
+{
+	struct dw_dma_chip *chip = pci_get_drvdata(pdev);
+	int ret;
+
+	ret = dw_dma_remove(chip);
+	if (ret)
+		dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int dw_pci_suspend_late(struct device *dev)
+{
+	struct pci_dev *pci = to_pci_dev(dev);
+	struct dw_dma_chip *chip = pci_get_drvdata(pci);
+
+	return dw_dma_disable(chip);
+};
+
+static int dw_pci_resume_early(struct device *dev)
+{
+	struct pci_dev *pci = to_pci_dev(dev);
+	struct dw_dma_chip *chip = pci_get_drvdata(pci);
+
+	return dw_dma_enable(chip);
+};
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops dw_pci_dev_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_pci_suspend_late, dw_pci_resume_early)
+};
+
+static const struct pci_device_id dw_pci_id_table[] = {
+	/* Medfield (GPDMA) */
+	{ PCI_VDEVICE(INTEL, 0x0827) },
+
+	/* BayTrail */
+	{ PCI_VDEVICE(INTEL, 0x0f06) },
+	{ PCI_VDEVICE(INTEL, 0x0f40) },
+
+	/* Merrifield iDMA 32-bit (GPDMA) */
+	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata },
+
+	/* Braswell */
+	{ PCI_VDEVICE(INTEL, 0x2286) },
+	{ PCI_VDEVICE(INTEL, 0x22c0) },
+
+	/* Haswell */
+	{ PCI_VDEVICE(INTEL, 0x9c60) },
+
+	/* Broadwell */
+	{ PCI_VDEVICE(INTEL, 0x9ce0) },
+
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
+
+static struct pci_driver dw_pci_driver = {
+	.name		= "dw_dmac_pci",
+	.id_table	= dw_pci_id_table,
+	.probe		= dw_pci_probe,
+	.remove		= dw_pci_remove,
+	.driver	= {
+		.pm	= &dw_pci_dev_pm_ops,
+	},
+};
+
+module_pci_driver(dw_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller PCI driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
new file mode 100644
index 0000000..f62dd09
--- /dev/null
+++ b/drivers/dma/dw/platform.c
@@ -0,0 +1,348 @@
+/*
+ * Platform driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2013 Intel Corporation
+ *
+ * Some parts of this driver are derived from the original dw_dmac.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/pm_runtime.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/acpi.h>
+#include <linux/acpi_dma.h>
+
+#include "internal.h"
+
+#define DRV_NAME	"dw_dmac"
+
+static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
+{
+	struct dw_dma *dw = ofdma->of_dma_data;
+	struct dw_dma_slave slave = {
+		.dma_dev = dw->dma.dev,
+	};
+	dma_cap_mask_t cap;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	slave.src_id = dma_spec->args[0];
+	slave.dst_id = dma_spec->args[0];
+	slave.m_master = dma_spec->args[1];
+	slave.p_master = dma_spec->args[2];
+
+	if (WARN_ON(slave.src_id >= DW_DMA_MAX_NR_REQUESTS ||
+		    slave.dst_id >= DW_DMA_MAX_NR_REQUESTS ||
+		    slave.m_master >= dw->pdata->nr_masters ||
+		    slave.p_master >= dw->pdata->nr_masters))
+		return NULL;
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	/* TODO: there should be a simpler way to do this */
+	return dma_request_channel(cap, dw_dma_filter, &slave);
+}
+
+#ifdef CONFIG_ACPI
+static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
+{
+	struct acpi_dma_spec *dma_spec = param;
+	struct dw_dma_slave slave = {
+		.dma_dev = dma_spec->dev,
+		.src_id = dma_spec->slave_id,
+		.dst_id = dma_spec->slave_id,
+		.m_master = 0,
+		.p_master = 1,
+	};
+
+	return dw_dma_filter(chan, &slave);
+}
+
+static void dw_dma_acpi_controller_register(struct dw_dma *dw)
+{
+	struct device *dev = dw->dma.dev;
+	struct acpi_dma_filter_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return;
+
+	dma_cap_zero(info->dma_cap);
+	dma_cap_set(DMA_SLAVE, info->dma_cap);
+	info->filter_fn = dw_dma_acpi_filter;
+
+	ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate,
+						info);
+	if (ret)
+		dev_err(dev, "could not register acpi_dma_controller\n");
+}
+#else /* !CONFIG_ACPI */
+static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {}
+#endif /* !CONFIG_ACPI */
+
+#ifdef CONFIG_OF
+static struct dw_dma_platform_data *
+dw_dma_parse_dt(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct dw_dma_platform_data *pdata;
+	u32 tmp, arr[DW_DMA_MAX_NR_MASTERS], mb[DW_DMA_MAX_NR_CHANNELS];
+	u32 nr_masters;
+	u32 nr_channels;
+
+	if (!np) {
+		dev_err(&pdev->dev, "Missing DT data\n");
+		return NULL;
+	}
+
+	if (of_property_read_u32(np, "dma-masters", &nr_masters))
+		return NULL;
+	if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
+		return NULL;
+
+	if (of_property_read_u32(np, "dma-channels", &nr_channels))
+		return NULL;
+	if (nr_channels > DW_DMA_MAX_NR_CHANNELS)
+		return NULL;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return NULL;
+
+	pdata->nr_masters = nr_masters;
+	pdata->nr_channels = nr_channels;
+
+	if (of_property_read_bool(np, "is_private"))
+		pdata->is_private = true;
+
+	/*
+	 * All known devices, which use DT for configuration, support
+	 * memory-to-memory transfers. So enable it by default.
+	 */
+	pdata->is_memcpy = true;
+
+	if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
+		pdata->chan_allocation_order = (unsigned char)tmp;
+
+	if (!of_property_read_u32(np, "chan_priority", &tmp))
+		pdata->chan_priority = tmp;
+
+	if (!of_property_read_u32(np, "block_size", &tmp))
+		pdata->block_size = tmp;
+
+	if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
+		for (tmp = 0; tmp < nr_masters; tmp++)
+			pdata->data_width[tmp] = arr[tmp];
+	} else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
+		for (tmp = 0; tmp < nr_masters; tmp++)
+			pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
+	}
+
+	if (!of_property_read_u32_array(np, "multi-block", mb, nr_channels)) {
+		for (tmp = 0; tmp < nr_channels; tmp++)
+			pdata->multi_block[tmp] = mb[tmp];
+	} else {
+		for (tmp = 0; tmp < nr_channels; tmp++)
+			pdata->multi_block[tmp] = 1;
+	}
+
+	return pdata;
+}
+#else
+static inline struct dw_dma_platform_data *
+dw_dma_parse_dt(struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif
+
+static int dw_probe(struct platform_device *pdev)
+{
+	struct dw_dma_chip *chip;
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	const struct dw_dma_platform_data *pdata;
+	int err;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	err = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	pdata = dev_get_platdata(dev);
+	if (!pdata)
+		pdata = dw_dma_parse_dt(pdev);
+
+	chip->dev = dev;
+	chip->id = pdev->id;
+	chip->pdata = pdata;
+
+	chip->clk = devm_clk_get(chip->dev, "hclk");
+	if (IS_ERR(chip->clk))
+		return PTR_ERR(chip->clk);
+	err = clk_prepare_enable(chip->clk);
+	if (err)
+		return err;
+
+	pm_runtime_enable(&pdev->dev);
+
+	err = dw_dma_probe(chip);
+	if (err)
+		goto err_dw_dma_probe;
+
+	platform_set_drvdata(pdev, chip);
+
+	if (pdev->dev.of_node) {
+		err = of_dma_controller_register(pdev->dev.of_node,
+						 dw_dma_of_xlate, chip->dw);
+		if (err)
+			dev_err(&pdev->dev,
+				"could not register of_dma_controller\n");
+	}
+
+	if (ACPI_HANDLE(&pdev->dev))
+		dw_dma_acpi_controller_register(chip->dw);
+
+	return 0;
+
+err_dw_dma_probe:
+	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(chip->clk);
+	return err;
+}
+
+static int dw_remove(struct platform_device *pdev)
+{
+	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
+	dw_dma_remove(chip);
+	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(chip->clk);
+
+	return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
+
+	/*
+	 * We have to call dw_dma_disable() to stop any ongoing transfer. On
+	 * some platforms we can't do that since DMA device is powered off.
+	 * Moreover we have no possibility to check if the platform is affected
+	 * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
+	 * unconditionally. On the other hand we can't use
+	 * pm_runtime_suspended() because runtime PM framework is not fully
+	 * used by the driver.
+	 */
+	pm_runtime_get_sync(chip->dev);
+	dw_dma_disable(chip);
+	pm_runtime_put_sync_suspend(chip->dev);
+
+	clk_disable_unprepare(chip->clk);
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id dw_dma_of_id_table[] = {
+	{ .compatible = "snps,dma-spear1340" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id dw_dma_acpi_id_table[] = {
+	{ "INTL9C60", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+
+static int dw_suspend_late(struct device *dev)
+{
+	struct dw_dma_chip *chip = dev_get_drvdata(dev);
+
+	dw_dma_disable(chip);
+	clk_disable_unprepare(chip->clk);
+
+	return 0;
+}
+
+static int dw_resume_early(struct device *dev)
+{
+	struct dw_dma_chip *chip = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(chip->clk);
+	if (ret)
+		return ret;
+
+	return dw_dma_enable(chip);
+}
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops dw_dev_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dw_suspend_late, dw_resume_early)
+};
+
+static struct platform_driver dw_driver = {
+	.probe		= dw_probe,
+	.remove		= dw_remove,
+	.shutdown       = dw_shutdown,
+	.driver = {
+		.name	= DRV_NAME,
+		.pm	= &dw_dev_pm_ops,
+		.of_match_table = of_match_ptr(dw_dma_of_id_table),
+		.acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table),
+	},
+};
+
+static int __init dw_init(void)
+{
+	return platform_driver_register(&dw_driver);
+}
+subsys_initcall(dw_init);
+
+static void __exit dw_exit(void)
+{
+	platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
new file mode 100644
index 0000000..09e7dfd
--- /dev/null
+++ b/drivers/dma/dw/regs.h
@@ -0,0 +1,380 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "internal.h"
+
+#define DW_DMA_MAX_NR_REQUESTS	16
+
+/* flow controller */
+enum dw_dma_fc {
+	DW_DMA_FC_D_M2M,
+	DW_DMA_FC_D_M2P,
+	DW_DMA_FC_D_P2M,
+	DW_DMA_FC_D_P2P,
+	DW_DMA_FC_P_P2M,
+	DW_DMA_FC_SP_P2P,
+	DW_DMA_FC_P_M2P,
+	DW_DMA_FC_DP_P2P,
+};
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name)		u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+	DW_REG(SAR);		/* Source Address Register */
+	DW_REG(DAR);		/* Destination Address Register */
+	DW_REG(LLP);		/* Linked List Pointer */
+	u32	CTL_LO;		/* Control Register Low */
+	u32	CTL_HI;		/* Control Register High */
+	DW_REG(SSTAT);
+	DW_REG(DSTAT);
+	DW_REG(SSTATAR);
+	DW_REG(DSTATAR);
+	u32	CFG_LO;		/* Configuration Register Low */
+	u32	CFG_HI;		/* Configuration Register High */
+	DW_REG(SGR);
+	DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+	DW_REG(XFER);
+	DW_REG(BLOCK);
+	DW_REG(SRC_TRAN);
+	DW_REG(DST_TRAN);
+	DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+	/* per-channel registers */
+	struct dw_dma_chan_regs	CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+	/* irq handling */
+	struct dw_dma_irq_regs	RAW;		/* r */
+	struct dw_dma_irq_regs	STATUS;		/* r (raw & mask) */
+	struct dw_dma_irq_regs	MASK;		/* rw (set = irq enabled) */
+	struct dw_dma_irq_regs	CLEAR;		/* w (ack, affects "raw") */
+
+	DW_REG(STATUS_INT);			/* r */
+
+	/* software handshaking */
+	DW_REG(REQ_SRC);
+	DW_REG(REQ_DST);
+	DW_REG(SGL_REQ_SRC);
+	DW_REG(SGL_REQ_DST);
+	DW_REG(LAST_SRC);
+	DW_REG(LAST_DST);
+
+	/* miscellaneous */
+	DW_REG(CFG);
+	DW_REG(CH_EN);
+	DW_REG(ID);
+	DW_REG(TEST);
+
+	/* iDMA 32-bit support */
+	DW_REG(CLASS_PRIORITY0);
+	DW_REG(CLASS_PRIORITY1);
+
+	/* optional encoded params, 0x3c8..0x3f7 */
+	u32	__reserved;
+
+	/* per-channel configuration registers */
+	u32	DWC_PARAMS[DW_DMA_MAX_NR_CHANNELS];
+	u32	MULTI_BLK_TYPE;
+	u32	MAX_BLK_SIZE;
+
+	/* top-level parameters */
+	u32	DW_PARAMS;
+
+	/* component ID */
+	u32	COMP_TYPE;
+	u32	COMP_VERSION;
+
+	/* iDMA 32-bit support */
+	DW_REG(FIFO_PARTITION0);
+	DW_REG(FIFO_PARTITION1);
+
+	DW_REG(SAI_ERR);
+	DW_REG(GLOBAL_CFG);
+};
+
+/* Bitfields in DW_PARAMS */
+#define DW_PARAMS_NR_CHAN	8		/* number of channels */
+#define DW_PARAMS_NR_MASTER	11		/* number of AHB masters */
+#define DW_PARAMS_DATA_WIDTH(n)	(15 + 2 * (n))
+#define DW_PARAMS_DATA_WIDTH1	15		/* master 1 data width */
+#define DW_PARAMS_DATA_WIDTH2	17		/* master 2 data width */
+#define DW_PARAMS_DATA_WIDTH3	19		/* master 3 data width */
+#define DW_PARAMS_DATA_WIDTH4	21		/* master 4 data width */
+#define DW_PARAMS_EN		28		/* encoded parameters */
+
+/* Bitfields in DWC_PARAMS */
+#define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
+
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
+/* Bitfields in LLP */
+#define DWC_LLP_LMS(x)		((x) & 3)	/* list master select */
+#define DWC_LLP_LOC(x)		((x) & ~3)	/* next lli */
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n)	((n)<<4)
+#define DWC_CTLL_DST_INC	(0<<7)		/* DAR update/not */
+#define DWC_CTLL_DST_DEC	(1<<7)
+#define DWC_CTLL_DST_FIX	(2<<7)
+#define DWC_CTLL_SRC_INC	(0<<9)		/* SAR update/not */
+#define DWC_CTLL_SRC_DEC	(1<<9)
+#define DWC_CTLL_SRC_FIX	(2<<9)
+#define DWC_CTLL_DST_MSIZE(n)	((n)<<11)	/* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n)	((n)<<14)
+#define DWC_CTLL_S_GATH_EN	(1 << 17)	/* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN	(1 << 18)	/* dst scatter, !FIX */
+#define DWC_CTLL_FC(n)		((n) << 20)
+#define DWC_CTLL_FC_M2M		(0 << 20)	/* mem-to-mem */
+#define DWC_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
+#define DWC_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
+#define DWC_CTLL_FC_P2P		(3 << 20)	/* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n)		((n)<<23)	/* dst master select */
+#define DWC_CTLL_SMS(n)		((n)<<25)	/* src master select */
+#define DWC_CTLL_LLP_D_EN	(1 << 27)	/* dest block chain */
+#define DWC_CTLL_LLP_S_EN	(1 << 28)	/* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_BLOCK_TS_MASK	GENMASK(11, 0)
+#define DWC_CTLH_BLOCK_TS(x)	((x) & DWC_CTLH_BLOCK_TS_MASK)
+#define DWC_CTLH_DONE		(1 << 12)
+
+/* Bitfields in CFG_LO */
+#define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
+#define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
+#define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
+#define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
+#define DWC_CFGL_HS_SRC		(1 << 11)	/* handshake w/src */
+#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
+#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
+#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
+#define DWC_CFGL_MAX_BURST(x)	((x) << 20)
+#define DWC_CFGL_RELOAD_SAR	(1 << 30)
+#define DWC_CFGL_RELOAD_DAR	(1 << 31)
+
+/* Bitfields in CFG_HI */
+#define DWC_CFGH_FCMODE		(1 << 0)
+#define DWC_CFGH_FIFO_MODE	(1 << 1)
+#define DWC_CFGH_PROTCTL(x)	((x) << 2)
+#define DWC_CFGH_DS_UPD_EN	(1 << 5)
+#define DWC_CFGH_SS_UPD_EN	(1 << 6)
+#define DWC_CFGH_SRC_PER(x)	((x) << 7)
+#define DWC_CFGH_DST_PER(x)	((x) << 11)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x)		((x) << 0)
+#define DWC_SGR_SGC(x)		((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x)		((x) << 0)
+#define DWC_DSR_DSC(x)		((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN		(1 << 0)
+
+/* iDMA 32-bit support */
+
+/* Bitfields in CTL_HI */
+#define IDMA32C_CTLH_BLOCK_TS_MASK	GENMASK(16, 0)
+#define IDMA32C_CTLH_BLOCK_TS(x)	((x) & IDMA32C_CTLH_BLOCK_TS_MASK)
+#define IDMA32C_CTLH_DONE		(1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA32C_CFGL_DST_BURST_ALIGN	(1 << 0)	/* dst burst align */
+#define IDMA32C_CFGL_SRC_BURST_ALIGN	(1 << 1)	/* src burst align */
+#define IDMA32C_CFGL_CH_DRAIN		(1 << 10)	/* drain FIFO */
+#define IDMA32C_CFGL_DST_OPT_BL		(1 << 20)	/* optimize dst burst length */
+#define IDMA32C_CFGL_SRC_OPT_BL		(1 << 21)	/* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA32C_CFGH_SRC_PER(x)		((x) << 0)
+#define IDMA32C_CFGH_DST_PER(x)		((x) << 4)
+#define IDMA32C_CFGH_RD_ISSUE_THD(x)	((x) << 8)
+#define IDMA32C_CFGH_RW_ISSUE_THD(x)	((x) << 18)
+#define IDMA32C_CFGH_SRC_PER_EXT(x)	((x) << 28)	/* src peripheral extension */
+#define IDMA32C_CFGH_DST_PER_EXT(x)	((x) << 30)	/* dst peripheral extension */
+
+/* Bitfields in FIFO_PARTITION */
+#define IDMA32C_FP_PSIZE_CH0(x)		((x) << 0)
+#define IDMA32C_FP_PSIZE_CH1(x)		((x) << 13)
+#define IDMA32C_FP_UPDATE		(1 << 26)
+
+enum dw_dmac_flags {
+	DW_DMA_IS_CYCLIC = 0,
+	DW_DMA_IS_SOFT_LLP = 1,
+	DW_DMA_IS_PAUSED = 2,
+	DW_DMA_IS_INITIALIZED = 3,
+};
+
+struct dw_dma_chan {
+	struct dma_chan			chan;
+	void __iomem			*ch_regs;
+	u8				mask;
+	u8				priority;
+	enum dma_transfer_direction	direction;
+
+	/* software emulation of the LLP transfers */
+	struct list_head	*tx_node_active;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	unsigned long		flags;
+	struct list_head	active_list;
+	struct list_head	queue;
+
+	unsigned int		descs_allocated;
+
+	/* hardware configuration */
+	unsigned int		block_size;
+	bool			nollp;
+
+	/* custom slave configuration */
+	struct dw_dma_slave	dws;
+
+	/* configuration passed via .device_config */
+	struct dma_slave_config dma_sconfig;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+	return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+	readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+	writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct dw_dma_chan, chan);
+}
+
+struct dw_dma {
+	struct dma_device	dma;
+	char			name[20];
+	void __iomem		*regs;
+	struct dma_pool		*desc_pool;
+	struct tasklet_struct	tasklet;
+
+	/* channels */
+	struct dw_dma_chan	*chan;
+	u8			all_chan_mask;
+	u8			in_use;
+
+	/* platform data */
+	struct dw_dma_platform_data	*pdata;
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+	return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+	readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+	writel((val), &(__dw_regs(dw)->name))
+
+#define idma32_readq(dw, name)				\
+	hi_lo_readq(&(__dw_regs(dw)->name))
+#define idma32_writeq(dw, name, val)			\
+	hi_lo_writeq((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+	/* values that are not changed by hardware */
+	__le32		sar;
+	__le32		dar;
+	__le32		llp;		/* chain to next lli */
+	__le32		ctllo;
+	/* values that may get written back: */
+	__le32		ctlhi;
+	/* sstat and dstat can snapshot peripheral register state.
+	 * silicon config may discard either or both...
+	 */
+	__le32		sstat;
+	__le32		dstat;
+};
+
+struct dw_desc {
+	/* FIRST values the hardware uses */
+	struct dw_lli			lli;
+
+#define lli_set(d, reg, v)		((d)->lli.reg |= cpu_to_le32(v))
+#define lli_clear(d, reg, v)		((d)->lli.reg &= ~cpu_to_le32(v))
+#define lli_read(d, reg)		le32_to_cpu((d)->lli.reg)
+#define lli_write(d, reg, v)		((d)->lli.reg = cpu_to_le32(v))
+
+	/* THEN values for driver housekeeping */
+	struct list_head		desc_node;
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+	size_t				total_len;
+	u32				residue;
+};
+
+#define to_dw_desc(h)	list_entry(h, struct dw_desc, desc_node)
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
new file mode 100644
index 0000000..a155923
--- /dev/null
+++ b/drivers/dma/ep93xx_dma.c
@@ -0,0 +1,1418 @@
+/*
+ * Driver for the Cirrus Logic EP93xx DMA Controller
+ *
+ * Copyright (C) 2011 Mika Westerberg
+ *
+ * DMA M2P implementation is based on the original
+ * arch/arm/mach-ep93xx/dma-m2p.c which has following copyrights:
+ *
+ *   Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org>
+ *   Copyright (C) 2006 Applied Data Systems
+ *   Copyright (C) 2009 Ryan Mallon <rmallon@gmail.com>
+ *
+ * This driver is based on dw_dmac and amba-pl08x drivers.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/platform_data/dma-ep93xx.h>
+
+#include "dmaengine.h"
+
+/* M2P registers */
+#define M2P_CONTROL			0x0000
+#define M2P_CONTROL_STALLINT		BIT(0)
+#define M2P_CONTROL_NFBINT		BIT(1)
+#define M2P_CONTROL_CH_ERROR_INT	BIT(3)
+#define M2P_CONTROL_ENABLE		BIT(4)
+#define M2P_CONTROL_ICE			BIT(6)
+
+#define M2P_INTERRUPT			0x0004
+#define M2P_INTERRUPT_STALL		BIT(0)
+#define M2P_INTERRUPT_NFB		BIT(1)
+#define M2P_INTERRUPT_ERROR		BIT(3)
+
+#define M2P_PPALLOC			0x0008
+#define M2P_STATUS			0x000c
+
+#define M2P_MAXCNT0			0x0020
+#define M2P_BASE0			0x0024
+#define M2P_MAXCNT1			0x0030
+#define M2P_BASE1			0x0034
+
+#define M2P_STATE_IDLE			0
+#define M2P_STATE_STALL			1
+#define M2P_STATE_ON			2
+#define M2P_STATE_NEXT			3
+
+/* M2M registers */
+#define M2M_CONTROL			0x0000
+#define M2M_CONTROL_DONEINT		BIT(2)
+#define M2M_CONTROL_ENABLE		BIT(3)
+#define M2M_CONTROL_START		BIT(4)
+#define M2M_CONTROL_DAH			BIT(11)
+#define M2M_CONTROL_SAH			BIT(12)
+#define M2M_CONTROL_PW_SHIFT		9
+#define M2M_CONTROL_PW_8		(0 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_16		(1 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_32		(2 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_PW_MASK		(3 << M2M_CONTROL_PW_SHIFT)
+#define M2M_CONTROL_TM_SHIFT		13
+#define M2M_CONTROL_TM_TX		(1 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_TM_RX		(2 << M2M_CONTROL_TM_SHIFT)
+#define M2M_CONTROL_NFBINT		BIT(21)
+#define M2M_CONTROL_RSS_SHIFT		22
+#define M2M_CONTROL_RSS_SSPRX		(1 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_SSPTX		(2 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_RSS_IDE		(3 << M2M_CONTROL_RSS_SHIFT)
+#define M2M_CONTROL_NO_HDSK		BIT(24)
+#define M2M_CONTROL_PWSC_SHIFT		25
+
+#define M2M_INTERRUPT			0x0004
+#define M2M_INTERRUPT_MASK		6
+
+#define M2M_STATUS			0x000c
+#define M2M_STATUS_CTL_SHIFT		1
+#define M2M_STATUS_CTL_IDLE		(0 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_STALL		(1 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MEMRD		(2 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MEMWR		(3 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_BWCWAIT		(4 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_CTL_MASK		(7 << M2M_STATUS_CTL_SHIFT)
+#define M2M_STATUS_BUF_SHIFT		4
+#define M2M_STATUS_BUF_NO		(0 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_ON		(1 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_NEXT		(2 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_BUF_MASK		(3 << M2M_STATUS_BUF_SHIFT)
+#define M2M_STATUS_DONE			BIT(6)
+
+#define M2M_BCR0			0x0010
+#define M2M_BCR1			0x0014
+#define M2M_SAR_BASE0			0x0018
+#define M2M_SAR_BASE1			0x001c
+#define M2M_DAR_BASE0			0x002c
+#define M2M_DAR_BASE1			0x0030
+
+#define DMA_MAX_CHAN_BYTES		0xffff
+#define DMA_MAX_CHAN_DESCRIPTORS	32
+
+struct ep93xx_dma_engine;
+
+/**
+ * struct ep93xx_dma_desc - EP93xx specific transaction descriptor
+ * @src_addr: source address of the transaction
+ * @dst_addr: destination address of the transaction
+ * @size: size of the transaction (in bytes)
+ * @complete: this descriptor is completed
+ * @txd: dmaengine API descriptor
+ * @tx_list: list of linked descriptors
+ * @node: link used for putting this into a channel queue
+ */
+struct ep93xx_dma_desc {
+	u32				src_addr;
+	u32				dst_addr;
+	size_t				size;
+	bool				complete;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		tx_list;
+	struct list_head		node;
+};
+
+/**
+ * struct ep93xx_dma_chan - an EP93xx DMA M2P/M2M channel
+ * @chan: dmaengine API channel
+ * @edma: pointer to to the engine device
+ * @regs: memory mapped registers
+ * @irq: interrupt number of the channel
+ * @clk: clock used by this channel
+ * @tasklet: channel specific tasklet used for callbacks
+ * @lock: lock protecting the fields following
+ * @flags: flags for the channel
+ * @buffer: which buffer to use next (0/1)
+ * @active: flattened chain of descriptors currently being processed
+ * @queue: pending descriptors which are handled next
+ * @free_list: list of free descriptors which can be used
+ * @runtime_addr: physical address currently used as dest/src (M2M only). This
+ *                is set via .device_config before slave operation is
+ *                prepared
+ * @runtime_ctrl: M2M runtime values for the control register.
+ *
+ * As EP93xx DMA controller doesn't support real chained DMA descriptors we
+ * will have slightly different scheme here: @active points to a head of
+ * flattened DMA descriptor chain.
+ *
+ * @queue holds pending transactions. These are linked through the first
+ * descriptor in the chain. When a descriptor is moved to the @active queue,
+ * the first and chained descriptors are flattened into a single list.
+ *
+ * @chan.private holds pointer to &struct ep93xx_dma_data which contains
+ * necessary channel configuration information. For memcpy channels this must
+ * be %NULL.
+ */
+struct ep93xx_dma_chan {
+	struct dma_chan			chan;
+	const struct ep93xx_dma_engine	*edma;
+	void __iomem			*regs;
+	int				irq;
+	struct clk			*clk;
+	struct tasklet_struct		tasklet;
+	/* protects the fields following */
+	spinlock_t			lock;
+	unsigned long			flags;
+/* Channel is configured for cyclic transfers */
+#define EP93XX_DMA_IS_CYCLIC		0
+
+	int				buffer;
+	struct list_head		active;
+	struct list_head		queue;
+	struct list_head		free_list;
+	u32				runtime_addr;
+	u32				runtime_ctrl;
+};
+
+/**
+ * struct ep93xx_dma_engine - the EP93xx DMA engine instance
+ * @dma_dev: holds the dmaengine device
+ * @m2m: is this an M2M or M2P device
+ * @hw_setup: method which sets the channel up for operation
+ * @hw_shutdown: shuts the channel down and flushes whatever is left
+ * @hw_submit: pushes active descriptor(s) to the hardware
+ * @hw_interrupt: handle the interrupt
+ * @num_channels: number of channels for this instance
+ * @channels: array of channels
+ *
+ * There is one instance of this struct for the M2P channels and one for the
+ * M2M channels. hw_xxx() methods are used to perform operations which are
+ * different on M2M and M2P channels. These methods are called with channel
+ * lock held and interrupts disabled so they cannot sleep.
+ */
+struct ep93xx_dma_engine {
+	struct dma_device	dma_dev;
+	bool			m2m;
+	int			(*hw_setup)(struct ep93xx_dma_chan *);
+	void			(*hw_synchronize)(struct ep93xx_dma_chan *);
+	void			(*hw_shutdown)(struct ep93xx_dma_chan *);
+	void			(*hw_submit)(struct ep93xx_dma_chan *);
+	int			(*hw_interrupt)(struct ep93xx_dma_chan *);
+#define INTERRUPT_UNKNOWN	0
+#define INTERRUPT_DONE		1
+#define INTERRUPT_NEXT_BUFFER	2
+
+	size_t			num_channels;
+	struct ep93xx_dma_chan	channels[];
+};
+
+static inline struct device *chan2dev(struct ep93xx_dma_chan *edmac)
+{
+	return &edmac->chan.dev->device;
+}
+
+static struct ep93xx_dma_chan *to_ep93xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct ep93xx_dma_chan, chan);
+}
+
+/**
+ * ep93xx_dma_set_active - set new active descriptor chain
+ * @edmac: channel
+ * @desc: head of the new active descriptor chain
+ *
+ * Sets @desc to be the head of the new active descriptor chain. This is the
+ * chain which is processed next. The active list must be empty before calling
+ * this function.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static void ep93xx_dma_set_active(struct ep93xx_dma_chan *edmac,
+				  struct ep93xx_dma_desc *desc)
+{
+	BUG_ON(!list_empty(&edmac->active));
+
+	list_add_tail(&desc->node, &edmac->active);
+
+	/* Flatten the @desc->tx_list chain into @edmac->active list */
+	while (!list_empty(&desc->tx_list)) {
+		struct ep93xx_dma_desc *d = list_first_entry(&desc->tx_list,
+			struct ep93xx_dma_desc, node);
+
+		/*
+		 * We copy the callback parameters from the first descriptor
+		 * to all the chained descriptors. This way we can call the
+		 * callback without having to find out the first descriptor in
+		 * the chain. Useful for cyclic transfers.
+		 */
+		d->txd.callback = desc->txd.callback;
+		d->txd.callback_param = desc->txd.callback_param;
+
+		list_move_tail(&d->node, &edmac->active);
+	}
+}
+
+/* Called with @edmac->lock held and interrupts disabled */
+static struct ep93xx_dma_desc *
+ep93xx_dma_get_active(struct ep93xx_dma_chan *edmac)
+{
+	return list_first_entry_or_null(&edmac->active,
+					struct ep93xx_dma_desc, node);
+}
+
+/**
+ * ep93xx_dma_advance_active - advances to the next active descriptor
+ * @edmac: channel
+ *
+ * Function advances active descriptor to the next in the @edmac->active and
+ * returns %true if we still have descriptors in the chain to process.
+ * Otherwise returns %false.
+ *
+ * When the channel is in cyclic mode always returns %true.
+ *
+ * Called with @edmac->lock held and interrupts disabled.
+ */
+static bool ep93xx_dma_advance_active(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+
+	list_rotate_left(&edmac->active);
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+		return true;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc)
+		return false;
+
+	/*
+	 * If txd.cookie is set it means that we are back in the first
+	 * descriptor in the chain and hence done with it.
+	 */
+	return !desc->txd.cookie;
+}
+
+/*
+ * M2P DMA implementation
+ */
+
+static void m2p_set_control(struct ep93xx_dma_chan *edmac, u32 control)
+{
+	writel(control, edmac->regs + M2P_CONTROL);
+	/*
+	 * EP93xx User's Guide states that we must perform a dummy read after
+	 * write to the control register.
+	 */
+	readl(edmac->regs + M2P_CONTROL);
+}
+
+static int m2p_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control;
+
+	writel(data->port & 0xf, edmac->regs + M2P_PPALLOC);
+
+	control = M2P_CONTROL_CH_ERROR_INT | M2P_CONTROL_ICE
+		| M2P_CONTROL_ENABLE;
+	m2p_set_control(edmac, control);
+
+	edmac->buffer = 0;
+
+	return 0;
+}
+
+static inline u32 m2p_channel_state(struct ep93xx_dma_chan *edmac)
+{
+	return (readl(edmac->regs + M2P_STATUS) >> 4) & 0x3;
+}
+
+static void m2p_hw_synchronize(struct ep93xx_dma_chan *edmac)
+{
+	unsigned long flags;
+	u32 control;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	control = readl(edmac->regs + M2P_CONTROL);
+	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+	m2p_set_control(edmac, control);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	while (m2p_channel_state(edmac) >= M2P_STATE_ON)
+		schedule();
+}
+
+static void m2p_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	m2p_set_control(edmac, 0);
+
+	while (m2p_channel_state(edmac) != M2P_STATE_IDLE)
+		dev_warn(chan2dev(edmac), "M2P: Not yet IDLE\n");
+}
+
+static void m2p_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+	u32 bus_addr;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac), "M2P: empty descriptor list\n");
+		return;
+	}
+
+	if (ep93xx_dma_chan_direction(&edmac->chan) == DMA_MEM_TO_DEV)
+		bus_addr = desc->src_addr;
+	else
+		bus_addr = desc->dst_addr;
+
+	if (edmac->buffer == 0) {
+		writel(desc->size, edmac->regs + M2P_MAXCNT0);
+		writel(bus_addr, edmac->regs + M2P_BASE0);
+	} else {
+		writel(desc->size, edmac->regs + M2P_MAXCNT1);
+		writel(bus_addr, edmac->regs + M2P_BASE1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2p_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	u32 control = readl(edmac->regs + M2P_CONTROL);
+
+	m2p_fill_desc(edmac);
+	control |= M2P_CONTROL_STALLINT;
+
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2p_fill_desc(edmac);
+		control |= M2P_CONTROL_NFBINT;
+	}
+
+	m2p_set_control(edmac, control);
+}
+
+static int m2p_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 irq_status = readl(edmac->regs + M2P_INTERRUPT);
+	u32 control;
+
+	if (irq_status & M2P_INTERRUPT_ERROR) {
+		struct ep93xx_dma_desc *desc = ep93xx_dma_get_active(edmac);
+
+		/* Clear the error interrupt */
+		writel(1, edmac->regs + M2P_INTERRUPT);
+
+		/*
+		 * It seems that there is no easy way of reporting errors back
+		 * to client so we just report the error here and continue as
+		 * usual.
+		 *
+		 * Revisit this when there is a mechanism to report back the
+		 * errors.
+		 */
+		dev_err(chan2dev(edmac),
+			"DMA transfer failed! Details:\n"
+			"\tcookie	: %d\n"
+			"\tsrc_addr	: 0x%08x\n"
+			"\tdst_addr	: 0x%08x\n"
+			"\tsize		: %zu\n",
+			desc->txd.cookie, desc->src_addr, desc->dst_addr,
+			desc->size);
+	}
+
+	/*
+	 * Even latest E2 silicon revision sometimes assert STALL interrupt
+	 * instead of NFB. Therefore we treat them equally, basing on the
+	 * amount of data we still have to transfer.
+	 */
+	if (!(irq_status & (M2P_INTERRUPT_STALL | M2P_INTERRUPT_NFB)))
+		return INTERRUPT_UNKNOWN;
+
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2p_fill_desc(edmac);
+		return INTERRUPT_NEXT_BUFFER;
+	}
+
+	/* Disable interrupts */
+	control = readl(edmac->regs + M2P_CONTROL);
+	control &= ~(M2P_CONTROL_STALLINT | M2P_CONTROL_NFBINT);
+	m2p_set_control(edmac, control);
+
+	return INTERRUPT_DONE;
+}
+
+/*
+ * M2M DMA implementation
+ */
+
+static int m2m_hw_setup(struct ep93xx_dma_chan *edmac)
+{
+	const struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = 0;
+
+	if (!data) {
+		/* This is memcpy channel, nothing to configure */
+		writel(control, edmac->regs + M2M_CONTROL);
+		return 0;
+	}
+
+	switch (data->port) {
+	case EP93XX_DMA_SSP:
+		/*
+		 * This was found via experimenting - anything less than 5
+		 * causes the channel to perform only a partial transfer which
+		 * leads to problems since we don't get DONE interrupt then.
+		 */
+		control = (5 << M2M_CONTROL_PWSC_SHIFT);
+		control |= M2M_CONTROL_NO_HDSK;
+
+		if (data->direction == DMA_MEM_TO_DEV) {
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+			control |= M2M_CONTROL_RSS_SSPTX;
+		} else {
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+			control |= M2M_CONTROL_RSS_SSPRX;
+		}
+		break;
+
+	case EP93XX_DMA_IDE:
+		/*
+		 * This IDE part is totally untested. Values below are taken
+		 * from the EP93xx Users's Guide and might not be correct.
+		 */
+		if (data->direction == DMA_MEM_TO_DEV) {
+			/* Worst case from the UG */
+			control = (3 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_DAH;
+			control |= M2M_CONTROL_TM_TX;
+		} else {
+			control = (2 << M2M_CONTROL_PWSC_SHIFT);
+			control |= M2M_CONTROL_SAH;
+			control |= M2M_CONTROL_TM_RX;
+		}
+
+		control |= M2M_CONTROL_NO_HDSK;
+		control |= M2M_CONTROL_RSS_IDE;
+		control |= M2M_CONTROL_PW_16;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	writel(control, edmac->regs + M2M_CONTROL);
+	return 0;
+}
+
+static void m2m_hw_shutdown(struct ep93xx_dma_chan *edmac)
+{
+	/* Just disable the channel */
+	writel(0, edmac->regs + M2M_CONTROL);
+}
+
+static void m2m_fill_desc(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc;
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac), "M2M: empty descriptor list\n");
+		return;
+	}
+
+	if (edmac->buffer == 0) {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE0);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE0);
+		writel(desc->size, edmac->regs + M2M_BCR0);
+	} else {
+		writel(desc->src_addr, edmac->regs + M2M_SAR_BASE1);
+		writel(desc->dst_addr, edmac->regs + M2M_DAR_BASE1);
+		writel(desc->size, edmac->regs + M2M_BCR1);
+	}
+
+	edmac->buffer ^= 1;
+}
+
+static void m2m_hw_submit(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_data *data = edmac->chan.private;
+	u32 control = readl(edmac->regs + M2M_CONTROL);
+
+	/*
+	 * Since we allow clients to configure PW (peripheral width) we always
+	 * clear PW bits here and then set them according what is given in
+	 * the runtime configuration.
+	 */
+	control &= ~M2M_CONTROL_PW_MASK;
+	control |= edmac->runtime_ctrl;
+
+	m2m_fill_desc(edmac);
+	control |= M2M_CONTROL_DONEINT;
+
+	if (ep93xx_dma_advance_active(edmac)) {
+		m2m_fill_desc(edmac);
+		control |= M2M_CONTROL_NFBINT;
+	}
+
+	/*
+	 * Now we can finally enable the channel. For M2M channel this must be
+	 * done _after_ the BCRx registers are programmed.
+	 */
+	control |= M2M_CONTROL_ENABLE;
+	writel(control, edmac->regs + M2M_CONTROL);
+
+	if (!data) {
+		/*
+		 * For memcpy channels the software trigger must be asserted
+		 * in order to start the memcpy operation.
+		 */
+		control |= M2M_CONTROL_START;
+		writel(control, edmac->regs + M2M_CONTROL);
+	}
+}
+
+/*
+ * According to EP93xx User's Guide, we should receive DONE interrupt when all
+ * M2M DMA controller transactions complete normally. This is not always the
+ * case - sometimes EP93xx M2M DMA asserts DONE interrupt when the DMA channel
+ * is still running (channel Buffer FSM in DMA_BUF_ON state, and channel
+ * Control FSM in DMA_MEM_RD state, observed at least in IDE-DMA operation).
+ * In effect, disabling the channel when only DONE bit is set could stop
+ * currently running DMA transfer. To avoid this, we use Buffer FSM and
+ * Control FSM to check current state of DMA channel.
+ */
+static int m2m_hw_interrupt(struct ep93xx_dma_chan *edmac)
+{
+	u32 status = readl(edmac->regs + M2M_STATUS);
+	u32 ctl_fsm = status & M2M_STATUS_CTL_MASK;
+	u32 buf_fsm = status & M2M_STATUS_BUF_MASK;
+	bool done = status & M2M_STATUS_DONE;
+	bool last_done;
+	u32 control;
+	struct ep93xx_dma_desc *desc;
+
+	/* Accept only DONE and NFB interrupts */
+	if (!(readl(edmac->regs + M2M_INTERRUPT) & M2M_INTERRUPT_MASK))
+		return INTERRUPT_UNKNOWN;
+
+	if (done) {
+		/* Clear the DONE bit */
+		writel(0, edmac->regs + M2M_INTERRUPT);
+	}
+
+	/*
+	 * Check whether we are done with descriptors or not. This, together
+	 * with DMA channel state, determines action to take in interrupt.
+	 */
+	desc = ep93xx_dma_get_active(edmac);
+	last_done = !desc || desc->txd.cookie;
+
+	/*
+	 * Use M2M DMA Buffer FSM and Control FSM to check current state of
+	 * DMA channel. Using DONE and NFB bits from channel status register
+	 * or bits from channel interrupt register is not reliable.
+	 */
+	if (!last_done &&
+	    (buf_fsm == M2M_STATUS_BUF_NO ||
+	     buf_fsm == M2M_STATUS_BUF_ON)) {
+		/*
+		 * Two buffers are ready for update when Buffer FSM is in
+		 * DMA_NO_BUF state. Only one buffer can be prepared without
+		 * disabling the channel or polling the DONE bit.
+		 * To simplify things, always prepare only one buffer.
+		 */
+		if (ep93xx_dma_advance_active(edmac)) {
+			m2m_fill_desc(edmac);
+			if (done && !edmac->chan.private) {
+				/* Software trigger for memcpy channel */
+				control = readl(edmac->regs + M2M_CONTROL);
+				control |= M2M_CONTROL_START;
+				writel(control, edmac->regs + M2M_CONTROL);
+			}
+			return INTERRUPT_NEXT_BUFFER;
+		} else {
+			last_done = true;
+		}
+	}
+
+	/*
+	 * Disable the channel only when Buffer FSM is in DMA_NO_BUF state
+	 * and Control FSM is in DMA_STALL state.
+	 */
+	if (last_done &&
+	    buf_fsm == M2M_STATUS_BUF_NO &&
+	    ctl_fsm == M2M_STATUS_CTL_STALL) {
+		/* Disable interrupts and the channel */
+		control = readl(edmac->regs + M2M_CONTROL);
+		control &= ~(M2M_CONTROL_DONEINT | M2M_CONTROL_NFBINT
+			    | M2M_CONTROL_ENABLE);
+		writel(control, edmac->regs + M2M_CONTROL);
+		return INTERRUPT_DONE;
+	}
+
+	/*
+	 * Nothing to do this time.
+	 */
+	return INTERRUPT_NEXT_BUFFER;
+}
+
+/*
+ * DMA engine API implementation
+ */
+
+static struct ep93xx_dma_desc *
+ep93xx_dma_desc_get(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *desc, *_desc;
+	struct ep93xx_dma_desc *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	list_for_each_entry_safe(desc, _desc, &edmac->free_list, node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del_init(&desc->node);
+
+			/* Re-initialize the descriptor */
+			desc->src_addr = 0;
+			desc->dst_addr = 0;
+			desc->size = 0;
+			desc->complete = false;
+			desc->txd.cookie = 0;
+			desc->txd.callback = NULL;
+			desc->txd.callback_param = NULL;
+
+			ret = desc;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return ret;
+}
+
+static void ep93xx_dma_desc_put(struct ep93xx_dma_chan *edmac,
+				struct ep93xx_dma_desc *desc)
+{
+	if (desc) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&edmac->lock, flags);
+		list_splice_init(&desc->tx_list, &edmac->free_list);
+		list_add(&desc->node, &edmac->free_list);
+		spin_unlock_irqrestore(&edmac->lock, flags);
+	}
+}
+
+/**
+ * ep93xx_dma_advance_work - start processing the next pending transaction
+ * @edmac: channel
+ *
+ * If we have pending transactions queued and we are currently idling, this
+ * function takes the next queued transaction from the @edmac->queue and
+ * pushes it to the hardware for execution.
+ */
+static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac)
+{
+	struct ep93xx_dma_desc *new;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	if (!list_empty(&edmac->active) || list_empty(&edmac->queue)) {
+		spin_unlock_irqrestore(&edmac->lock, flags);
+		return;
+	}
+
+	/* Take the next descriptor from the pending queue */
+	new = list_first_entry(&edmac->queue, struct ep93xx_dma_desc, node);
+	list_del_init(&new->node);
+
+	ep93xx_dma_set_active(edmac, new);
+
+	/* Push it to the hardware */
+	edmac->edma->hw_submit(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+}
+
+static void ep93xx_dma_tasklet(unsigned long data)
+{
+	struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data;
+	struct ep93xx_dma_desc *desc, *d;
+	struct dmaengine_desc_callback cb;
+	LIST_HEAD(list);
+
+	memset(&cb, 0, sizeof(cb));
+	spin_lock_irq(&edmac->lock);
+	/*
+	 * If dma_terminate_all() was called before we get to run, the active
+	 * list has become empty. If that happens we aren't supposed to do
+	 * anything more than call ep93xx_dma_advance_work().
+	 */
+	desc = ep93xx_dma_get_active(edmac);
+	if (desc) {
+		if (desc->complete) {
+			/* mark descriptor complete for non cyclic case only */
+			if (!test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+				dma_cookie_complete(&desc->txd);
+			list_splice_init(&edmac->active, &list);
+		}
+		dmaengine_desc_get_callback(&desc->txd, &cb);
+	}
+	spin_unlock_irq(&edmac->lock);
+
+	/* Pick up the next descriptor from the queue */
+	ep93xx_dma_advance_work(edmac);
+
+	/* Now we can release all the chained descriptors */
+	list_for_each_entry_safe(desc, d, &list, node) {
+		dma_descriptor_unmap(&desc->txd);
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static irqreturn_t ep93xx_dma_interrupt(int irq, void *dev_id)
+{
+	struct ep93xx_dma_chan *edmac = dev_id;
+	struct ep93xx_dma_desc *desc;
+	irqreturn_t ret = IRQ_HANDLED;
+
+	spin_lock(&edmac->lock);
+
+	desc = ep93xx_dma_get_active(edmac);
+	if (!desc) {
+		dev_warn(chan2dev(edmac),
+			 "got interrupt while active list is empty\n");
+		spin_unlock(&edmac->lock);
+		return IRQ_NONE;
+	}
+
+	switch (edmac->edma->hw_interrupt(edmac)) {
+	case INTERRUPT_DONE:
+		desc->complete = true;
+		tasklet_schedule(&edmac->tasklet);
+		break;
+
+	case INTERRUPT_NEXT_BUFFER:
+		if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags))
+			tasklet_schedule(&edmac->tasklet);
+		break;
+
+	default:
+		dev_warn(chan2dev(edmac), "unknown interrupt!\n");
+		ret = IRQ_NONE;
+		break;
+	}
+
+	spin_unlock(&edmac->lock);
+	return ret;
+}
+
+/**
+ * ep93xx_dma_tx_submit - set the prepared descriptor(s) to be executed
+ * @tx: descriptor to be executed
+ *
+ * Function will execute given descriptor on the hardware or if the hardware
+ * is busy, queue the descriptor to be executed later on. Returns cookie which
+ * can be used to poll the status of the descriptor.
+ */
+static dma_cookie_t ep93xx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(tx->chan);
+	struct ep93xx_dma_desc *desc;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	desc = container_of(tx, struct ep93xx_dma_desc, txd);
+
+	/*
+	 * If nothing is currently prosessed, we push this descriptor
+	 * directly to the hardware. Otherwise we put the descriptor
+	 * to the pending queue.
+	 */
+	if (list_empty(&edmac->active)) {
+		ep93xx_dma_set_active(edmac, desc);
+		edmac->edma->hw_submit(edmac);
+	} else {
+		list_add_tail(&desc->node, &edmac->queue);
+	}
+
+	spin_unlock_irqrestore(&edmac->lock, flags);
+	return cookie;
+}
+
+/**
+ * ep93xx_dma_alloc_chan_resources - allocate resources for the channel
+ * @chan: channel to allocate resources
+ *
+ * Function allocates necessary resources for the given DMA channel and
+ * returns number of allocated descriptors for the channel. Negative errno
+ * is returned in case of failure.
+ */
+static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_data *data = chan->private;
+	const char *name = dma_chan_name(chan);
+	int ret, i;
+
+	/* Sanity check the channel parameters */
+	if (!edmac->edma->m2m) {
+		if (!data)
+			return -EINVAL;
+		if (data->port < EP93XX_DMA_I2S1 ||
+		    data->port > EP93XX_DMA_IRDA)
+			return -EINVAL;
+		if (data->direction != ep93xx_dma_chan_direction(chan))
+			return -EINVAL;
+	} else {
+		if (data) {
+			switch (data->port) {
+			case EP93XX_DMA_SSP:
+			case EP93XX_DMA_IDE:
+				if (!is_slave_direction(data->direction))
+					return -EINVAL;
+				break;
+			default:
+				return -EINVAL;
+			}
+		}
+	}
+
+	if (data && data->name)
+		name = data->name;
+
+	ret = clk_enable(edmac->clk);
+	if (ret)
+		return ret;
+
+	ret = request_irq(edmac->irq, ep93xx_dma_interrupt, 0, name, edmac);
+	if (ret)
+		goto fail_clk_disable;
+
+	spin_lock_irq(&edmac->lock);
+	dma_cookie_init(&edmac->chan);
+	ret = edmac->edma->hw_setup(edmac);
+	spin_unlock_irq(&edmac->lock);
+
+	if (ret)
+		goto fail_free_irq;
+
+	for (i = 0; i < DMA_MAX_CHAN_DESCRIPTORS; i++) {
+		struct ep93xx_dma_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "not enough descriptors\n");
+			break;
+		}
+
+		INIT_LIST_HEAD(&desc->tx_list);
+
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.tx_submit = ep93xx_dma_tx_submit;
+
+		ep93xx_dma_desc_put(edmac, desc);
+	}
+
+	return i;
+
+fail_free_irq:
+	free_irq(edmac->irq, edmac);
+fail_clk_disable:
+	clk_disable(edmac->clk);
+
+	return ret;
+}
+
+/**
+ * ep93xx_dma_free_chan_resources - release resources for the channel
+ * @chan: channel
+ *
+ * Function releases all the resources allocated for the given channel.
+ * The channel must be idle when this is called.
+ */
+static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	BUG_ON(!list_empty(&edmac->active));
+	BUG_ON(!list_empty(&edmac->queue));
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->edma->hw_shutdown(edmac);
+	edmac->runtime_addr = 0;
+	edmac->runtime_ctrl = 0;
+	edmac->buffer = 0;
+	list_splice_init(&edmac->free_list, &list);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, d, &list, node)
+		kfree(desc);
+
+	clk_disable(edmac->clk);
+	free_irq(edmac->irq, edmac);
+}
+
+/**
+ * ep93xx_dma_prep_dma_memcpy - prepare a memcpy DMA operation
+ * @chan: channel
+ * @dest: destination bus address
+ * @src: source bus address
+ * @len: size of the transaction
+ * @flags: flags for the descriptor
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			   dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t bytes, offset;
+
+	first = NULL;
+	for (offset = 0; offset < len; offset += bytes) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		bytes = min_t(size_t, len - offset, DMA_MAX_CHAN_BYTES);
+
+		desc->src_addr = src + offset;
+		desc->dst_addr = dest + offset;
+		desc->size = bytes;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_slave_sg - prepare a slave DMA operation
+ * @chan: channel
+ * @sgl: list of buffers to transfer
+ * @sg_len: number of entries in @sgl
+ * @dir: direction of tha DMA transfer
+ * @flags: flags for the descriptor
+ * @context: operation context (ignored)
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			 unsigned int sg_len, enum dma_transfer_direction dir,
+			 unsigned long flags, void *context)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	struct scatterlist *sg;
+	int i;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	first = NULL;
+	for_each_sg(sgl, sg, sg_len, i) {
+		size_t len = sg_dma_len(sg);
+
+		if (len > DMA_MAX_CHAN_BYTES) {
+			dev_warn(chan2dev(edmac), "too big transfer size %zu\n",
+				 len);
+			goto fail;
+		}
+
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_MEM_TO_DEV) {
+			desc->src_addr = sg_dma_address(sg);
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = sg_dma_address(sg);
+		}
+		desc->size = len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+	first->txd.flags = flags;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_prep_dma_cyclic - prepare a cyclic DMA operation
+ * @chan: channel
+ * @dma_addr: DMA mapped address of the buffer
+ * @buf_len: length of the buffer (in bytes)
+ * @period_len: length of a single period
+ * @dir: direction of the operation
+ * @flags: tx descriptor status flags
+ *
+ * Prepares a descriptor for cyclic DMA operation. This means that once the
+ * descriptor is submitted, we will be submitting in a @period_len sized
+ * buffers and calling callback once the period has been elapsed. Transfer
+ * terminates only when client calls dmaengine_terminate_all() for this
+ * channel.
+ *
+ * Returns a valid DMA descriptor or %NULL in case of failure.
+ */
+static struct dma_async_tx_descriptor *
+ep93xx_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+			   size_t buf_len, size_t period_len,
+			   enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *first;
+	size_t offset = 0;
+
+	if (!edmac->edma->m2m && dir != ep93xx_dma_chan_direction(chan)) {
+		dev_warn(chan2dev(edmac),
+			 "channel was configured with different direction\n");
+		return NULL;
+	}
+
+	if (test_and_set_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags)) {
+		dev_warn(chan2dev(edmac),
+			 "channel is already used for cyclic transfers\n");
+		return NULL;
+	}
+
+	if (period_len > DMA_MAX_CHAN_BYTES) {
+		dev_warn(chan2dev(edmac), "too big period length %zu\n",
+			 period_len);
+		return NULL;
+	}
+
+	/* Split the buffer into period size chunks */
+	first = NULL;
+	for (offset = 0; offset < buf_len; offset += period_len) {
+		desc = ep93xx_dma_desc_get(edmac);
+		if (!desc) {
+			dev_warn(chan2dev(edmac), "couln't get descriptor\n");
+			goto fail;
+		}
+
+		if (dir == DMA_MEM_TO_DEV) {
+			desc->src_addr = dma_addr + offset;
+			desc->dst_addr = edmac->runtime_addr;
+		} else {
+			desc->src_addr = edmac->runtime_addr;
+			desc->dst_addr = dma_addr + offset;
+		}
+
+		desc->size = period_len;
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->tx_list);
+	}
+
+	first->txd.cookie = -EBUSY;
+
+	return &first->txd;
+
+fail:
+	ep93xx_dma_desc_put(edmac, first);
+	return NULL;
+}
+
+/**
+ * ep93xx_dma_synchronize - Synchronizes the termination of transfers to the
+ * current context.
+ * @chan: channel
+ *
+ * Synchronizes the DMA channel termination to the current context. When this
+ * function returns it is guaranteed that all transfers for previously issued
+ * descriptors have stopped and and it is safe to free the memory associated
+ * with them. Furthermore it is guaranteed that all complete callback functions
+ * for a previously submitted descriptor have finished running and it is safe to
+ * free resources accessed from within the complete callbacks.
+ */
+static void ep93xx_dma_synchronize(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+
+	if (edmac->edma->hw_synchronize)
+		edmac->edma->hw_synchronize(edmac);
+}
+
+/**
+ * ep93xx_dma_terminate_all - terminate all transactions
+ * @chan: channel
+ *
+ * Stops all DMA transactions. All descriptors are put back to the
+ * @edmac->free_list and callbacks are _not_ called.
+ */
+static int ep93xx_dma_terminate_all(struct dma_chan *chan)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	struct ep93xx_dma_desc *desc, *_d;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	/* First we disable and flush the DMA channel */
+	edmac->edma->hw_shutdown(edmac);
+	clear_bit(EP93XX_DMA_IS_CYCLIC, &edmac->flags);
+	list_splice_init(&edmac->active, &list);
+	list_splice_init(&edmac->queue, &list);
+	/*
+	 * We then re-enable the channel. This way we can continue submitting
+	 * the descriptors by just calling ->hw_submit() again.
+	 */
+	edmac->edma->hw_setup(edmac);
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	list_for_each_entry_safe(desc, _d, &list, node)
+		ep93xx_dma_desc_put(edmac, desc);
+
+	return 0;
+}
+
+static int ep93xx_dma_slave_config(struct dma_chan *chan,
+				   struct dma_slave_config *config)
+{
+	struct ep93xx_dma_chan *edmac = to_ep93xx_dma_chan(chan);
+	enum dma_slave_buswidth width;
+	unsigned long flags;
+	u32 addr, ctrl;
+
+	if (!edmac->edma->m2m)
+		return -EINVAL;
+
+	switch (config->direction) {
+	case DMA_DEV_TO_MEM:
+		width = config->src_addr_width;
+		addr = config->src_addr;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		width = config->dst_addr_width;
+		addr = config->dst_addr;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		ctrl = 0;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		ctrl = M2M_CONTROL_PW_16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		ctrl = M2M_CONTROL_PW_32;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&edmac->lock, flags);
+	edmac->runtime_addr = addr;
+	edmac->runtime_ctrl = ctrl;
+	spin_unlock_irqrestore(&edmac->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ep93xx_dma_tx_status - check if a transaction is completed
+ * @chan: channel
+ * @cookie: transaction specific cookie
+ * @state: state of the transaction is stored here if given
+ *
+ * This function can be used to query state of a given transaction.
+ */
+static enum dma_status ep93xx_dma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *state)
+{
+	return dma_cookie_status(chan, cookie, state);
+}
+
+/**
+ * ep93xx_dma_issue_pending - push pending transactions to the hardware
+ * @chan: channel
+ *
+ * When this function is called, all pending transactions are pushed to the
+ * hardware and executed.
+ */
+static void ep93xx_dma_issue_pending(struct dma_chan *chan)
+{
+	ep93xx_dma_advance_work(to_ep93xx_dma_chan(chan));
+}
+
+static int __init ep93xx_dma_probe(struct platform_device *pdev)
+{
+	struct ep93xx_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct ep93xx_dma_engine *edma;
+	struct dma_device *dma_dev;
+	size_t edma_size;
+	int ret, i;
+
+	edma_size = pdata->num_channels * sizeof(struct ep93xx_dma_chan);
+	edma = kzalloc(sizeof(*edma) + edma_size, GFP_KERNEL);
+	if (!edma)
+		return -ENOMEM;
+
+	dma_dev = &edma->dma_dev;
+	edma->m2m = platform_get_device_id(pdev)->driver_data;
+	edma->num_channels = pdata->num_channels;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+	for (i = 0; i < pdata->num_channels; i++) {
+		const struct ep93xx_dma_chan_data *cdata = &pdata->channels[i];
+		struct ep93xx_dma_chan *edmac = &edma->channels[i];
+
+		edmac->chan.device = dma_dev;
+		edmac->regs = cdata->base;
+		edmac->irq = cdata->irq;
+		edmac->edma = edma;
+
+		edmac->clk = clk_get(NULL, cdata->name);
+		if (IS_ERR(edmac->clk)) {
+			dev_warn(&pdev->dev, "failed to get clock for %s\n",
+				 cdata->name);
+			continue;
+		}
+
+		spin_lock_init(&edmac->lock);
+		INIT_LIST_HEAD(&edmac->active);
+		INIT_LIST_HEAD(&edmac->queue);
+		INIT_LIST_HEAD(&edmac->free_list);
+		tasklet_init(&edmac->tasklet, ep93xx_dma_tasklet,
+			     (unsigned long)edmac);
+
+		list_add_tail(&edmac->chan.device_node,
+			      &dma_dev->channels);
+	}
+
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+
+	dma_dev->dev = &pdev->dev;
+	dma_dev->device_alloc_chan_resources = ep93xx_dma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = ep93xx_dma_free_chan_resources;
+	dma_dev->device_prep_slave_sg = ep93xx_dma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = ep93xx_dma_prep_dma_cyclic;
+	dma_dev->device_config = ep93xx_dma_slave_config;
+	dma_dev->device_synchronize = ep93xx_dma_synchronize;
+	dma_dev->device_terminate_all = ep93xx_dma_terminate_all;
+	dma_dev->device_issue_pending = ep93xx_dma_issue_pending;
+	dma_dev->device_tx_status = ep93xx_dma_tx_status;
+
+	dma_set_max_seg_size(dma_dev->dev, DMA_MAX_CHAN_BYTES);
+
+	if (edma->m2m) {
+		dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+		dma_dev->device_prep_dma_memcpy = ep93xx_dma_prep_dma_memcpy;
+
+		edma->hw_setup = m2m_hw_setup;
+		edma->hw_shutdown = m2m_hw_shutdown;
+		edma->hw_submit = m2m_hw_submit;
+		edma->hw_interrupt = m2m_hw_interrupt;
+	} else {
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+		edma->hw_synchronize = m2p_hw_synchronize;
+		edma->hw_setup = m2p_hw_setup;
+		edma->hw_shutdown = m2p_hw_shutdown;
+		edma->hw_submit = m2p_hw_submit;
+		edma->hw_interrupt = m2p_hw_interrupt;
+	}
+
+	ret = dma_async_device_register(dma_dev);
+	if (unlikely(ret)) {
+		for (i = 0; i < edma->num_channels; i++) {
+			struct ep93xx_dma_chan *edmac = &edma->channels[i];
+			if (!IS_ERR_OR_NULL(edmac->clk))
+				clk_put(edmac->clk);
+		}
+		kfree(edma);
+	} else {
+		dev_info(dma_dev->dev, "EP93xx M2%s DMA ready\n",
+			 edma->m2m ? "M" : "P");
+	}
+
+	return ret;
+}
+
+static const struct platform_device_id ep93xx_dma_driver_ids[] = {
+	{ "ep93xx-dma-m2p", 0 },
+	{ "ep93xx-dma-m2m", 1 },
+	{ },
+};
+
+static struct platform_driver ep93xx_dma_driver = {
+	.driver		= {
+		.name	= "ep93xx-dma",
+	},
+	.id_table	= ep93xx_dma_driver_ids,
+};
+
+static int __init ep93xx_dma_module_init(void)
+{
+	return platform_driver_probe(&ep93xx_dma_driver, ep93xx_dma_probe);
+}
+subsys_initcall(ep93xx_dma_module_init);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@iki.fi>");
+MODULE_DESCRIPTION("EP93xx DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
new file mode 100644
index 0000000..c756886
--- /dev/null
+++ b/drivers/dma/fsl-edma.c
@@ -0,0 +1,1107 @@
+/*
+ * drivers/dma/fsl-edma.c
+ *
+ * Copyright 2013-2014 Freescale Semiconductor, Inc.
+ *
+ * Driver for the Freescale eDMA engine with flexible channel multiplexing
+ * capability for DMA request sources. The eDMA block can be found on some
+ * Vybrid and Layerscape SoCs.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define EDMA_CR			0x00
+#define EDMA_ES			0x04
+#define EDMA_ERQ		0x0C
+#define EDMA_EEI		0x14
+#define EDMA_SERQ		0x1B
+#define EDMA_CERQ		0x1A
+#define EDMA_SEEI		0x19
+#define EDMA_CEEI		0x18
+#define EDMA_CINT		0x1F
+#define EDMA_CERR		0x1E
+#define EDMA_SSRT		0x1D
+#define EDMA_CDNE		0x1C
+#define EDMA_INTR		0x24
+#define EDMA_ERR		0x2C
+
+#define EDMA_TCD_SADDR(x)	(0x1000 + 32 * (x))
+#define EDMA_TCD_SOFF(x)	(0x1004 + 32 * (x))
+#define EDMA_TCD_ATTR(x)	(0x1006 + 32 * (x))
+#define EDMA_TCD_NBYTES(x)	(0x1008 + 32 * (x))
+#define EDMA_TCD_SLAST(x)	(0x100C + 32 * (x))
+#define EDMA_TCD_DADDR(x)	(0x1010 + 32 * (x))
+#define EDMA_TCD_DOFF(x)	(0x1014 + 32 * (x))
+#define EDMA_TCD_CITER_ELINK(x)	(0x1016 + 32 * (x))
+#define EDMA_TCD_CITER(x)	(0x1016 + 32 * (x))
+#define EDMA_TCD_DLAST_SGA(x)	(0x1018 + 32 * (x))
+#define EDMA_TCD_CSR(x)		(0x101C + 32 * (x))
+#define EDMA_TCD_BITER_ELINK(x)	(0x101E + 32 * (x))
+#define EDMA_TCD_BITER(x)	(0x101E + 32 * (x))
+
+#define EDMA_CR_EDBG		BIT(1)
+#define EDMA_CR_ERCA		BIT(2)
+#define EDMA_CR_ERGA		BIT(3)
+#define EDMA_CR_HOE		BIT(4)
+#define EDMA_CR_HALT		BIT(5)
+#define EDMA_CR_CLM		BIT(6)
+#define EDMA_CR_EMLM		BIT(7)
+#define EDMA_CR_ECX		BIT(16)
+#define EDMA_CR_CX		BIT(17)
+
+#define EDMA_SEEI_SEEI(x)	((x) & 0x1F)
+#define EDMA_CEEI_CEEI(x)	((x) & 0x1F)
+#define EDMA_CINT_CINT(x)	((x) & 0x1F)
+#define EDMA_CERR_CERR(x)	((x) & 0x1F)
+
+#define EDMA_TCD_ATTR_DSIZE(x)		(((x) & 0x0007))
+#define EDMA_TCD_ATTR_DMOD(x)		(((x) & 0x001F) << 3)
+#define EDMA_TCD_ATTR_SSIZE(x)		(((x) & 0x0007) << 8)
+#define EDMA_TCD_ATTR_SMOD(x)		(((x) & 0x001F) << 11)
+#define EDMA_TCD_ATTR_SSIZE_8BIT	(0x0000)
+#define EDMA_TCD_ATTR_SSIZE_16BIT	(0x0100)
+#define EDMA_TCD_ATTR_SSIZE_32BIT	(0x0200)
+#define EDMA_TCD_ATTR_SSIZE_64BIT	(0x0300)
+#define EDMA_TCD_ATTR_SSIZE_32BYTE	(0x0500)
+#define EDMA_TCD_ATTR_DSIZE_8BIT	(0x0000)
+#define EDMA_TCD_ATTR_DSIZE_16BIT	(0x0001)
+#define EDMA_TCD_ATTR_DSIZE_32BIT	(0x0002)
+#define EDMA_TCD_ATTR_DSIZE_64BIT	(0x0003)
+#define EDMA_TCD_ATTR_DSIZE_32BYTE	(0x0005)
+
+#define EDMA_TCD_SOFF_SOFF(x)		(x)
+#define EDMA_TCD_NBYTES_NBYTES(x)	(x)
+#define EDMA_TCD_SLAST_SLAST(x)		(x)
+#define EDMA_TCD_DADDR_DADDR(x)		(x)
+#define EDMA_TCD_CITER_CITER(x)		((x) & 0x7FFF)
+#define EDMA_TCD_DOFF_DOFF(x)		(x)
+#define EDMA_TCD_DLAST_SGA_DLAST_SGA(x)	(x)
+#define EDMA_TCD_BITER_BITER(x)		((x) & 0x7FFF)
+
+#define EDMA_TCD_CSR_START		BIT(0)
+#define EDMA_TCD_CSR_INT_MAJOR		BIT(1)
+#define EDMA_TCD_CSR_INT_HALF		BIT(2)
+#define EDMA_TCD_CSR_D_REQ		BIT(3)
+#define EDMA_TCD_CSR_E_SG		BIT(4)
+#define EDMA_TCD_CSR_E_LINK		BIT(5)
+#define EDMA_TCD_CSR_ACTIVE		BIT(6)
+#define EDMA_TCD_CSR_DONE		BIT(7)
+
+#define EDMAMUX_CHCFG_DIS		0x0
+#define EDMAMUX_CHCFG_ENBL		0x80
+#define EDMAMUX_CHCFG_SOURCE(n)		((n) & 0x3F)
+
+#define DMAMUX_NR	2
+
+#define FSL_EDMA_BUSWIDTHS	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+enum fsl_edma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
+
+struct fsl_edma_hw_tcd {
+	__le32	saddr;
+	__le16	soff;
+	__le16	attr;
+	__le32	nbytes;
+	__le32	slast;
+	__le32	daddr;
+	__le16	doff;
+	__le16	citer;
+	__le32	dlast_sga;
+	__le16	csr;
+	__le16	biter;
+};
+
+struct fsl_edma_sw_tcd {
+	dma_addr_t			ptcd;
+	struct fsl_edma_hw_tcd		*vtcd;
+};
+
+struct fsl_edma_slave_config {
+	enum dma_transfer_direction	dir;
+	enum dma_slave_buswidth		addr_width;
+	u32				dev_addr;
+	u32				burst;
+	u32				attr;
+};
+
+struct fsl_edma_chan {
+	struct virt_dma_chan		vchan;
+	enum dma_status			status;
+	enum fsl_edma_pm_state		pm_state;
+	bool				idle;
+	u32				slave_id;
+	struct fsl_edma_engine		*edma;
+	struct fsl_edma_desc		*edesc;
+	struct fsl_edma_slave_config	fsc;
+	struct dma_pool			*tcd_pool;
+};
+
+struct fsl_edma_desc {
+	struct virt_dma_desc		vdesc;
+	struct fsl_edma_chan		*echan;
+	bool				iscyclic;
+	unsigned int			n_tcds;
+	struct fsl_edma_sw_tcd		tcd[];
+};
+
+struct fsl_edma_engine {
+	struct dma_device	dma_dev;
+	void __iomem		*membase;
+	void __iomem		*muxbase[DMAMUX_NR];
+	struct clk		*muxclk[DMAMUX_NR];
+	struct mutex		fsl_edma_mutex;
+	u32			n_chans;
+	int			txirq;
+	int			errirq;
+	bool			big_endian;
+	struct fsl_edma_chan	chans[];
+};
+
+/*
+ * R/W functions for big- or little-endian registers:
+ * The eDMA controller's endian is independent of the CPU core's endian.
+ * For the big-endian IP module, the offset for 8-bit or 16-bit registers
+ * should also be swapped opposite to that in little-endian IP.
+ */
+
+static u32 edma_readl(struct fsl_edma_engine *edma, void __iomem *addr)
+{
+	if (edma->big_endian)
+		return ioread32be(addr);
+	else
+		return ioread32(addr);
+}
+
+static void edma_writeb(struct fsl_edma_engine *edma, u8 val, void __iomem *addr)
+{
+	/* swap the reg offset for these in big-endian mode */
+	if (edma->big_endian)
+		iowrite8(val, (void __iomem *)((unsigned long)addr ^ 0x3));
+	else
+		iowrite8(val, addr);
+}
+
+static void edma_writew(struct fsl_edma_engine *edma, u16 val, void __iomem *addr)
+{
+	/* swap the reg offset for these in big-endian mode */
+	if (edma->big_endian)
+		iowrite16be(val, (void __iomem *)((unsigned long)addr ^ 0x2));
+	else
+		iowrite16(val, addr);
+}
+
+static void edma_writel(struct fsl_edma_engine *edma, u32 val, void __iomem *addr)
+{
+	if (edma->big_endian)
+		iowrite32be(val, addr);
+	else
+		iowrite32(val, addr);
+}
+
+static struct fsl_edma_chan *to_fsl_edma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct fsl_edma_chan, vchan.chan);
+}
+
+static struct fsl_edma_desc *to_fsl_edma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct fsl_edma_desc, vdesc);
+}
+
+static void fsl_edma_enable_request(struct fsl_edma_chan *fsl_chan)
+{
+	void __iomem *addr = fsl_chan->edma->membase;
+	u32 ch = fsl_chan->vchan.chan.chan_id;
+
+	edma_writeb(fsl_chan->edma, EDMA_SEEI_SEEI(ch), addr + EDMA_SEEI);
+	edma_writeb(fsl_chan->edma, ch, addr + EDMA_SERQ);
+}
+
+static void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan)
+{
+	void __iomem *addr = fsl_chan->edma->membase;
+	u32 ch = fsl_chan->vchan.chan.chan_id;
+
+	edma_writeb(fsl_chan->edma, ch, addr + EDMA_CERQ);
+	edma_writeb(fsl_chan->edma, EDMA_CEEI_CEEI(ch), addr + EDMA_CEEI);
+}
+
+static void fsl_edma_chan_mux(struct fsl_edma_chan *fsl_chan,
+			unsigned int slot, bool enable)
+{
+	u32 ch = fsl_chan->vchan.chan.chan_id;
+	void __iomem *muxaddr;
+	unsigned chans_per_mux, ch_off;
+
+	chans_per_mux = fsl_chan->edma->n_chans / DMAMUX_NR;
+	ch_off = fsl_chan->vchan.chan.chan_id % chans_per_mux;
+	muxaddr = fsl_chan->edma->muxbase[ch / chans_per_mux];
+	slot = EDMAMUX_CHCFG_SOURCE(slot);
+
+	if (enable)
+		iowrite8(EDMAMUX_CHCFG_ENBL | slot, muxaddr + ch_off);
+	else
+		iowrite8(EDMAMUX_CHCFG_DIS, muxaddr + ch_off);
+}
+
+static unsigned int fsl_edma_get_tcd_attr(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case 1:
+		return EDMA_TCD_ATTR_SSIZE_8BIT | EDMA_TCD_ATTR_DSIZE_8BIT;
+	case 2:
+		return EDMA_TCD_ATTR_SSIZE_16BIT | EDMA_TCD_ATTR_DSIZE_16BIT;
+	case 4:
+		return EDMA_TCD_ATTR_SSIZE_32BIT | EDMA_TCD_ATTR_DSIZE_32BIT;
+	case 8:
+		return EDMA_TCD_ATTR_SSIZE_64BIT | EDMA_TCD_ATTR_DSIZE_64BIT;
+	default:
+		return EDMA_TCD_ATTR_SSIZE_32BIT | EDMA_TCD_ATTR_DSIZE_32BIT;
+	}
+}
+
+static void fsl_edma_free_desc(struct virt_dma_desc *vdesc)
+{
+	struct fsl_edma_desc *fsl_desc;
+	int i;
+
+	fsl_desc = to_fsl_edma_desc(vdesc);
+	for (i = 0; i < fsl_desc->n_tcds; i++)
+		dma_pool_free(fsl_desc->echan->tcd_pool, fsl_desc->tcd[i].vtcd,
+			      fsl_desc->tcd[i].ptcd);
+	kfree(fsl_desc);
+}
+
+static int fsl_edma_terminate_all(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	fsl_edma_disable_request(fsl_chan);
+	fsl_chan->edesc = NULL;
+	fsl_chan->idle = true;
+	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+	return 0;
+}
+
+static int fsl_edma_pause(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	if (fsl_chan->edesc) {
+		fsl_edma_disable_request(fsl_chan);
+		fsl_chan->status = DMA_PAUSED;
+		fsl_chan->idle = true;
+	}
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	return 0;
+}
+
+static int fsl_edma_resume(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	if (fsl_chan->edesc) {
+		fsl_edma_enable_request(fsl_chan);
+		fsl_chan->status = DMA_IN_PROGRESS;
+		fsl_chan->idle = false;
+	}
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	return 0;
+}
+
+static int fsl_edma_slave_config(struct dma_chan *chan,
+				 struct dma_slave_config *cfg)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+
+	fsl_chan->fsc.dir = cfg->direction;
+	if (cfg->direction == DMA_DEV_TO_MEM) {
+		fsl_chan->fsc.dev_addr = cfg->src_addr;
+		fsl_chan->fsc.addr_width = cfg->src_addr_width;
+		fsl_chan->fsc.burst = cfg->src_maxburst;
+		fsl_chan->fsc.attr = fsl_edma_get_tcd_attr(cfg->src_addr_width);
+	} else if (cfg->direction == DMA_MEM_TO_DEV) {
+		fsl_chan->fsc.dev_addr = cfg->dst_addr;
+		fsl_chan->fsc.addr_width = cfg->dst_addr_width;
+		fsl_chan->fsc.burst = cfg->dst_maxburst;
+		fsl_chan->fsc.attr = fsl_edma_get_tcd_attr(cfg->dst_addr_width);
+	} else {
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static size_t fsl_edma_desc_residue(struct fsl_edma_chan *fsl_chan,
+		struct virt_dma_desc *vdesc, bool in_progress)
+{
+	struct fsl_edma_desc *edesc = fsl_chan->edesc;
+	void __iomem *addr = fsl_chan->edma->membase;
+	u32 ch = fsl_chan->vchan.chan.chan_id;
+	enum dma_transfer_direction dir = fsl_chan->fsc.dir;
+	dma_addr_t cur_addr, dma_addr;
+	size_t len, size;
+	int i;
+
+	/* calculate the total size in this desc */
+	for (len = i = 0; i < fsl_chan->edesc->n_tcds; i++)
+		len += le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
+
+	if (!in_progress)
+		return len;
+
+	if (dir == DMA_MEM_TO_DEV)
+		cur_addr = edma_readl(fsl_chan->edma, addr + EDMA_TCD_SADDR(ch));
+	else
+		cur_addr = edma_readl(fsl_chan->edma, addr + EDMA_TCD_DADDR(ch));
+
+	/* figure out the finished and calculate the residue */
+	for (i = 0; i < fsl_chan->edesc->n_tcds; i++) {
+		size = le32_to_cpu(edesc->tcd[i].vtcd->nbytes)
+			* le16_to_cpu(edesc->tcd[i].vtcd->biter);
+		if (dir == DMA_MEM_TO_DEV)
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->saddr);
+		else
+			dma_addr = le32_to_cpu(edesc->tcd[i].vtcd->daddr);
+
+		len -= size;
+		if (cur_addr >= dma_addr && cur_addr < dma_addr + size) {
+			len += dma_addr + size - cur_addr;
+			break;
+		}
+	}
+
+	return len;
+}
+
+static enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	if (status == DMA_COMPLETE)
+		return status;
+
+	if (!txstate)
+		return fsl_chan->status;
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&fsl_chan->vchan, cookie);
+	if (fsl_chan->edesc && cookie == fsl_chan->edesc->vdesc.tx.cookie)
+		txstate->residue = fsl_edma_desc_residue(fsl_chan, vdesc, true);
+	else if (vdesc)
+		txstate->residue = fsl_edma_desc_residue(fsl_chan, vdesc, false);
+	else
+		txstate->residue = 0;
+
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+	return fsl_chan->status;
+}
+
+static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
+				  struct fsl_edma_hw_tcd *tcd)
+{
+	struct fsl_edma_engine *edma = fsl_chan->edma;
+	void __iomem *addr = fsl_chan->edma->membase;
+	u32 ch = fsl_chan->vchan.chan.chan_id;
+
+	/*
+	 * TCD parameters are stored in struct fsl_edma_hw_tcd in little
+	 * endian format. However, we need to load the TCD registers in
+	 * big- or little-endian obeying the eDMA engine model endian.
+	 */
+	edma_writew(edma, 0, addr + EDMA_TCD_CSR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->saddr), addr + EDMA_TCD_SADDR(ch));
+	edma_writel(edma, le32_to_cpu(tcd->daddr), addr + EDMA_TCD_DADDR(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->attr), addr + EDMA_TCD_ATTR(ch));
+	edma_writew(edma, le16_to_cpu(tcd->soff), addr + EDMA_TCD_SOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->nbytes), addr + EDMA_TCD_NBYTES(ch));
+	edma_writel(edma, le32_to_cpu(tcd->slast), addr + EDMA_TCD_SLAST(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->citer), addr + EDMA_TCD_CITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->biter), addr + EDMA_TCD_BITER(ch));
+	edma_writew(edma, le16_to_cpu(tcd->doff), addr + EDMA_TCD_DOFF(ch));
+
+	edma_writel(edma, le32_to_cpu(tcd->dlast_sga), addr + EDMA_TCD_DLAST_SGA(ch));
+
+	edma_writew(edma, le16_to_cpu(tcd->csr), addr + EDMA_TCD_CSR(ch));
+}
+
+static inline
+void fsl_edma_fill_tcd(struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
+		       u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
+		       u16 biter, u16 doff, u32 dlast_sga, bool major_int,
+		       bool disable_req, bool enable_sg)
+{
+	u16 csr = 0;
+
+	/*
+	 * eDMA hardware SGs require the TCDs to be stored in little
+	 * endian format irrespective of the register endian model.
+	 * So we put the value in little endian in memory, waiting
+	 * for fsl_edma_set_tcd_regs doing the swap.
+	 */
+	tcd->saddr = cpu_to_le32(src);
+	tcd->daddr = cpu_to_le32(dst);
+
+	tcd->attr = cpu_to_le16(attr);
+
+	tcd->soff = cpu_to_le16(EDMA_TCD_SOFF_SOFF(soff));
+
+	tcd->nbytes = cpu_to_le32(EDMA_TCD_NBYTES_NBYTES(nbytes));
+	tcd->slast = cpu_to_le32(EDMA_TCD_SLAST_SLAST(slast));
+
+	tcd->citer = cpu_to_le16(EDMA_TCD_CITER_CITER(citer));
+	tcd->doff = cpu_to_le16(EDMA_TCD_DOFF_DOFF(doff));
+
+	tcd->dlast_sga = cpu_to_le32(EDMA_TCD_DLAST_SGA_DLAST_SGA(dlast_sga));
+
+	tcd->biter = cpu_to_le16(EDMA_TCD_BITER_BITER(biter));
+	if (major_int)
+		csr |= EDMA_TCD_CSR_INT_MAJOR;
+
+	if (disable_req)
+		csr |= EDMA_TCD_CSR_D_REQ;
+
+	if (enable_sg)
+		csr |= EDMA_TCD_CSR_E_SG;
+
+	tcd->csr = cpu_to_le16(csr);
+}
+
+static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
+		int sg_len)
+{
+	struct fsl_edma_desc *fsl_desc;
+	int i;
+
+	fsl_desc = kzalloc(sizeof(*fsl_desc) + sizeof(struct fsl_edma_sw_tcd) * sg_len,
+				GFP_NOWAIT);
+	if (!fsl_desc)
+		return NULL;
+
+	fsl_desc->echan = fsl_chan;
+	fsl_desc->n_tcds = sg_len;
+	for (i = 0; i < sg_len; i++) {
+		fsl_desc->tcd[i].vtcd = dma_pool_alloc(fsl_chan->tcd_pool,
+					GFP_NOWAIT, &fsl_desc->tcd[i].ptcd);
+		if (!fsl_desc->tcd[i].vtcd)
+			goto err;
+	}
+	return fsl_desc;
+
+err:
+	while (--i >= 0)
+		dma_pool_free(fsl_chan->tcd_pool, fsl_desc->tcd[i].vtcd,
+				fsl_desc->tcd[i].ptcd);
+	kfree(fsl_desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	struct fsl_edma_desc *fsl_desc;
+	dma_addr_t dma_buf_next;
+	int sg_len, i;
+	u32 src_addr, dst_addr, last_sg, nbytes;
+	u16 soff, doff, iter;
+
+	if (!is_slave_direction(fsl_chan->fsc.dir))
+		return NULL;
+
+	sg_len = buf_len / period_len;
+	fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
+	if (!fsl_desc)
+		return NULL;
+	fsl_desc->iscyclic = true;
+
+	dma_buf_next = dma_addr;
+	nbytes = fsl_chan->fsc.addr_width * fsl_chan->fsc.burst;
+	iter = period_len / nbytes;
+
+	for (i = 0; i < sg_len; i++) {
+		if (dma_buf_next >= dma_addr + buf_len)
+			dma_buf_next = dma_addr;
+
+		/* get next sg's physical address */
+		last_sg = fsl_desc->tcd[(i + 1) % sg_len].ptcd;
+
+		if (fsl_chan->fsc.dir == DMA_MEM_TO_DEV) {
+			src_addr = dma_buf_next;
+			dst_addr = fsl_chan->fsc.dev_addr;
+			soff = fsl_chan->fsc.addr_width;
+			doff = 0;
+		} else {
+			src_addr = fsl_chan->fsc.dev_addr;
+			dst_addr = dma_buf_next;
+			soff = 0;
+			doff = fsl_chan->fsc.addr_width;
+		}
+
+		fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr, dst_addr,
+				  fsl_chan->fsc.attr, soff, nbytes, 0, iter,
+				  iter, doff, last_sg, true, false, true);
+		dma_buf_next += period_len;
+	}
+
+	return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	struct fsl_edma_desc *fsl_desc;
+	struct scatterlist *sg;
+	u32 src_addr, dst_addr, last_sg, nbytes;
+	u16 soff, doff, iter;
+	int i;
+
+	if (!is_slave_direction(fsl_chan->fsc.dir))
+		return NULL;
+
+	fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
+	if (!fsl_desc)
+		return NULL;
+	fsl_desc->iscyclic = false;
+
+	nbytes = fsl_chan->fsc.addr_width * fsl_chan->fsc.burst;
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* get next sg's physical address */
+		last_sg = fsl_desc->tcd[(i + 1) % sg_len].ptcd;
+
+		if (fsl_chan->fsc.dir == DMA_MEM_TO_DEV) {
+			src_addr = sg_dma_address(sg);
+			dst_addr = fsl_chan->fsc.dev_addr;
+			soff = fsl_chan->fsc.addr_width;
+			doff = 0;
+		} else {
+			src_addr = fsl_chan->fsc.dev_addr;
+			dst_addr = sg_dma_address(sg);
+			soff = 0;
+			doff = fsl_chan->fsc.addr_width;
+		}
+
+		iter = sg_dma_len(sg) / nbytes;
+		if (i < sg_len - 1) {
+			last_sg = fsl_desc->tcd[(i + 1)].ptcd;
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  false, false, true);
+		} else {
+			last_sg = 0;
+			fsl_edma_fill_tcd(fsl_desc->tcd[i].vtcd, src_addr,
+					  dst_addr, fsl_chan->fsc.attr, soff,
+					  nbytes, 0, iter, iter, doff, last_sg,
+					  true, true, false);
+		}
+	}
+
+	return vchan_tx_prep(&fsl_chan->vchan, &fsl_desc->vdesc, flags);
+}
+
+static void fsl_edma_xfer_desc(struct fsl_edma_chan *fsl_chan)
+{
+	struct virt_dma_desc *vdesc;
+
+	vdesc = vchan_next_desc(&fsl_chan->vchan);
+	if (!vdesc)
+		return;
+	fsl_chan->edesc = to_fsl_edma_desc(vdesc);
+	fsl_edma_set_tcd_regs(fsl_chan, fsl_chan->edesc->tcd[0].vtcd);
+	fsl_edma_enable_request(fsl_chan);
+	fsl_chan->status = DMA_IN_PROGRESS;
+	fsl_chan->idle = false;
+}
+
+static irqreturn_t fsl_edma_tx_handler(int irq, void *dev_id)
+{
+	struct fsl_edma_engine *fsl_edma = dev_id;
+	unsigned int intr, ch;
+	void __iomem *base_addr;
+	struct fsl_edma_chan *fsl_chan;
+
+	base_addr = fsl_edma->membase;
+
+	intr = edma_readl(fsl_edma, base_addr + EDMA_INTR);
+	if (!intr)
+		return IRQ_NONE;
+
+	for (ch = 0; ch < fsl_edma->n_chans; ch++) {
+		if (intr & (0x1 << ch)) {
+			edma_writeb(fsl_edma, EDMA_CINT_CINT(ch),
+				base_addr + EDMA_CINT);
+
+			fsl_chan = &fsl_edma->chans[ch];
+
+			spin_lock(&fsl_chan->vchan.lock);
+			if (!fsl_chan->edesc->iscyclic) {
+				list_del(&fsl_chan->edesc->vdesc.node);
+				vchan_cookie_complete(&fsl_chan->edesc->vdesc);
+				fsl_chan->edesc = NULL;
+				fsl_chan->status = DMA_COMPLETE;
+				fsl_chan->idle = true;
+			} else {
+				vchan_cyclic_callback(&fsl_chan->edesc->vdesc);
+			}
+
+			if (!fsl_chan->edesc)
+				fsl_edma_xfer_desc(fsl_chan);
+
+			spin_unlock(&fsl_chan->vchan.lock);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_edma_err_handler(int irq, void *dev_id)
+{
+	struct fsl_edma_engine *fsl_edma = dev_id;
+	unsigned int err, ch;
+
+	err = edma_readl(fsl_edma, fsl_edma->membase + EDMA_ERR);
+	if (!err)
+		return IRQ_NONE;
+
+	for (ch = 0; ch < fsl_edma->n_chans; ch++) {
+		if (err & (0x1 << ch)) {
+			fsl_edma_disable_request(&fsl_edma->chans[ch]);
+			edma_writeb(fsl_edma, EDMA_CERR_CERR(ch),
+				fsl_edma->membase + EDMA_CERR);
+			fsl_edma->chans[ch].status = DMA_ERROR;
+			fsl_edma->chans[ch].idle = true;
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_edma_irq_handler(int irq, void *dev_id)
+{
+	if (fsl_edma_tx_handler(irq, dev_id) == IRQ_HANDLED)
+		return IRQ_HANDLED;
+
+	return fsl_edma_err_handler(irq, dev_id);
+}
+
+static void fsl_edma_issue_pending(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+
+	if (unlikely(fsl_chan->pm_state != RUNNING)) {
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+		/* cannot submit due to suspend */
+		return;
+	}
+
+	if (vchan_issue_pending(&fsl_chan->vchan) && !fsl_chan->edesc)
+		fsl_edma_xfer_desc(fsl_chan);
+
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+}
+
+static struct dma_chan *fsl_edma_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma)
+{
+	struct fsl_edma_engine *fsl_edma = ofdma->of_dma_data;
+	struct dma_chan *chan, *_chan;
+	struct fsl_edma_chan *fsl_chan;
+	unsigned long chans_per_mux = fsl_edma->n_chans / DMAMUX_NR;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	mutex_lock(&fsl_edma->fsl_edma_mutex);
+	list_for_each_entry_safe(chan, _chan, &fsl_edma->dma_dev.channels, device_node) {
+		if (chan->client_count)
+			continue;
+		if ((chan->chan_id / chans_per_mux) == dma_spec->args[0]) {
+			chan = dma_get_slave_channel(chan);
+			if (chan) {
+				chan->device->privatecnt++;
+				fsl_chan = to_fsl_edma_chan(chan);
+				fsl_chan->slave_id = dma_spec->args[1];
+				fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id,
+						true);
+				mutex_unlock(&fsl_edma->fsl_edma_mutex);
+				return chan;
+			}
+		}
+	}
+	mutex_unlock(&fsl_edma->fsl_edma_mutex);
+	return NULL;
+}
+
+static int fsl_edma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+
+	fsl_chan->tcd_pool = dma_pool_create("tcd_pool", chan->device->dev,
+				sizeof(struct fsl_edma_hw_tcd),
+				32, 0);
+	return 0;
+}
+
+static void fsl_edma_free_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	fsl_edma_disable_request(fsl_chan);
+	fsl_edma_chan_mux(fsl_chan, 0, false);
+	fsl_chan->edesc = NULL;
+	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+	dma_pool_destroy(fsl_chan->tcd_pool);
+	fsl_chan->tcd_pool = NULL;
+}
+
+static int
+fsl_edma_irq_init(struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
+{
+	int ret;
+
+	fsl_edma->txirq = platform_get_irq_byname(pdev, "edma-tx");
+	if (fsl_edma->txirq < 0) {
+		dev_err(&pdev->dev, "Can't get edma-tx irq.\n");
+		return fsl_edma->txirq;
+	}
+
+	fsl_edma->errirq = platform_get_irq_byname(pdev, "edma-err");
+	if (fsl_edma->errirq < 0) {
+		dev_err(&pdev->dev, "Can't get edma-err irq.\n");
+		return fsl_edma->errirq;
+	}
+
+	if (fsl_edma->txirq == fsl_edma->errirq) {
+		ret = devm_request_irq(&pdev->dev, fsl_edma->txirq,
+				fsl_edma_irq_handler, 0, "eDMA", fsl_edma);
+		if (ret) {
+			dev_err(&pdev->dev, "Can't register eDMA IRQ.\n");
+			 return  ret;
+		}
+	} else {
+		ret = devm_request_irq(&pdev->dev, fsl_edma->txirq,
+				fsl_edma_tx_handler, 0, "eDMA tx", fsl_edma);
+		if (ret) {
+			dev_err(&pdev->dev, "Can't register eDMA tx IRQ.\n");
+			return  ret;
+		}
+
+		ret = devm_request_irq(&pdev->dev, fsl_edma->errirq,
+				fsl_edma_err_handler, 0, "eDMA err", fsl_edma);
+		if (ret) {
+			dev_err(&pdev->dev, "Can't register eDMA err IRQ.\n");
+			return  ret;
+		}
+	}
+
+	return 0;
+}
+
+static void fsl_edma_irq_exit(
+		struct platform_device *pdev, struct fsl_edma_engine *fsl_edma)
+{
+	if (fsl_edma->txirq == fsl_edma->errirq) {
+		devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma);
+	} else {
+		devm_free_irq(&pdev->dev, fsl_edma->txirq, fsl_edma);
+		devm_free_irq(&pdev->dev, fsl_edma->errirq, fsl_edma);
+	}
+}
+
+static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
+{
+	int i;
+
+	for (i = 0; i < nr_clocks; i++)
+		clk_disable_unprepare(fsl_edma->muxclk[i]);
+}
+
+static int fsl_edma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct fsl_edma_engine *fsl_edma;
+	struct fsl_edma_chan *fsl_chan;
+	struct resource *res;
+	int len, chans;
+	int ret, i;
+
+	ret = of_property_read_u32(np, "dma-channels", &chans);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get dma-channels.\n");
+		return ret;
+	}
+
+	len = sizeof(*fsl_edma) + sizeof(*fsl_chan) * chans;
+	fsl_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!fsl_edma)
+		return -ENOMEM;
+
+	fsl_edma->n_chans = chans;
+	mutex_init(&fsl_edma->fsl_edma_mutex);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fsl_edma->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fsl_edma->membase))
+		return PTR_ERR(fsl_edma->membase);
+
+	for (i = 0; i < DMAMUX_NR; i++) {
+		char clkname[32];
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
+		fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
+		if (IS_ERR(fsl_edma->muxbase[i])) {
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
+			return PTR_ERR(fsl_edma->muxbase[i]);
+		}
+
+		sprintf(clkname, "dmamux%d", i);
+		fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
+		if (IS_ERR(fsl_edma->muxclk[i])) {
+			dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
+			return PTR_ERR(fsl_edma->muxclk[i]);
+		}
+
+		ret = clk_prepare_enable(fsl_edma->muxclk[i]);
+		if (ret)
+			/* on error: disable all previously enabled clks */
+			fsl_disable_clocks(fsl_edma, i);
+
+	}
+
+	fsl_edma->big_endian = of_property_read_bool(np, "big-endian");
+
+	INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
+
+		fsl_chan->edma = fsl_edma;
+		fsl_chan->pm_state = RUNNING;
+		fsl_chan->slave_id = 0;
+		fsl_chan->idle = true;
+		fsl_chan->vchan.desc_free = fsl_edma_free_desc;
+		vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
+
+		edma_writew(fsl_edma, 0x0, fsl_edma->membase + EDMA_TCD_CSR(i));
+		fsl_edma_chan_mux(fsl_chan, 0, false);
+	}
+
+	edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR);
+	ret = fsl_edma_irq_init(pdev, fsl_edma);
+	if (ret)
+		return ret;
+
+	dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask);
+	dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask);
+
+	fsl_edma->dma_dev.dev = &pdev->dev;
+	fsl_edma->dma_dev.device_alloc_chan_resources
+		= fsl_edma_alloc_chan_resources;
+	fsl_edma->dma_dev.device_free_chan_resources
+		= fsl_edma_free_chan_resources;
+	fsl_edma->dma_dev.device_tx_status = fsl_edma_tx_status;
+	fsl_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg;
+	fsl_edma->dma_dev.device_prep_dma_cyclic = fsl_edma_prep_dma_cyclic;
+	fsl_edma->dma_dev.device_config = fsl_edma_slave_config;
+	fsl_edma->dma_dev.device_pause = fsl_edma_pause;
+	fsl_edma->dma_dev.device_resume = fsl_edma_resume;
+	fsl_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all;
+	fsl_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending;
+
+	fsl_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS;
+	fsl_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS;
+	fsl_edma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+	platform_set_drvdata(pdev, fsl_edma);
+
+	ret = dma_async_device_register(&fsl_edma->dma_dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Can't register Freescale eDMA engine. (%d)\n", ret);
+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
+		return ret;
+	}
+
+	ret = of_dma_controller_register(np, fsl_edma_xlate, fsl_edma);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Can't register Freescale eDMA of_dma. (%d)\n", ret);
+		dma_async_device_unregister(&fsl_edma->dma_dev);
+		fsl_disable_clocks(fsl_edma, DMAMUX_NR);
+		return ret;
+	}
+
+	/* enable round robin arbitration */
+	edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA, fsl_edma->membase + EDMA_CR);
+
+	return 0;
+}
+
+static void fsl_edma_cleanup_vchan(struct dma_device *dmadev)
+{
+	struct fsl_edma_chan *chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan,
+				&dmadev->channels, vchan.chan.device_node) {
+		list_del(&chan->vchan.chan.device_node);
+		tasklet_kill(&chan->vchan.task);
+	}
+}
+
+static int fsl_edma_remove(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct fsl_edma_engine *fsl_edma = platform_get_drvdata(pdev);
+
+	fsl_edma_irq_exit(pdev, fsl_edma);
+	fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
+	of_dma_controller_free(np);
+	dma_async_device_unregister(&fsl_edma->dma_dev);
+	fsl_disable_clocks(fsl_edma, DMAMUX_NR);
+
+	return 0;
+}
+
+static int fsl_edma_suspend_late(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+		/* Make sure chan is idle or will force disable. */
+		if (unlikely(!fsl_chan->idle)) {
+			dev_warn(dev, "WARN: There is non-idle channel.");
+			fsl_edma_disable_request(fsl_chan);
+			fsl_edma_chan_mux(fsl_chan, 0, false);
+		}
+
+		fsl_chan->pm_state = SUSPENDED;
+		spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	}
+
+	return 0;
+}
+
+static int fsl_edma_resume_early(struct device *dev)
+{
+	struct fsl_edma_engine *fsl_edma = dev_get_drvdata(dev);
+	struct fsl_edma_chan *fsl_chan;
+	int i;
+
+	for (i = 0; i < fsl_edma->n_chans; i++) {
+		fsl_chan = &fsl_edma->chans[i];
+		fsl_chan->pm_state = RUNNING;
+		edma_writew(fsl_edma, 0x0, fsl_edma->membase + EDMA_TCD_CSR(i));
+		if (fsl_chan->slave_id != 0)
+			fsl_edma_chan_mux(fsl_chan, fsl_chan->slave_id, true);
+	}
+
+	edma_writel(fsl_edma, EDMA_CR_ERGA | EDMA_CR_ERCA,
+			fsl_edma->membase + EDMA_CR);
+
+	return 0;
+}
+
+/*
+ * eDMA provides the service to others, so it should be suspend late
+ * and resume early. When eDMA suspend, all of the clients should stop
+ * the DMA data transmission and let the channel idle.
+ */
+static const struct dev_pm_ops fsl_edma_pm_ops = {
+	.suspend_late   = fsl_edma_suspend_late,
+	.resume_early   = fsl_edma_resume_early,
+};
+
+static const struct of_device_id fsl_edma_dt_ids[] = {
+	{ .compatible = "fsl,vf610-edma", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids);
+
+static struct platform_driver fsl_edma_driver = {
+	.driver		= {
+		.name	= "fsl-edma",
+		.of_match_table = fsl_edma_dt_ids,
+		.pm     = &fsl_edma_pm_ops,
+	},
+	.probe          = fsl_edma_probe,
+	.remove		= fsl_edma_remove,
+};
+
+static int __init fsl_edma_init(void)
+{
+	return platform_driver_register(&fsl_edma_driver);
+}
+subsys_initcall(fsl_edma_init);
+
+static void __exit fsl_edma_exit(void)
+{
+	platform_driver_unregister(&fsl_edma_driver);
+}
+module_exit(fsl_edma_exit);
+
+MODULE_ALIAS("platform:fsl-edma");
+MODULE_DESCRIPTION("Freescale eDMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c
new file mode 100644
index 0000000..493dc6c
--- /dev/null
+++ b/drivers/dma/fsl_raid.c
@@ -0,0 +1,899 @@
+/*
+ * drivers/dma/fsl_raid.c
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *	Harninder Rai <harninder.rai@freescale.com>
+ *	Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *	Xuelin Shi <xuelin.shi@freescale.com>
+ *
+ * Copyright (c) 2010-2014 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Theory of operation:
+ *
+ * General capabilities:
+ *	RAID Engine (RE) block is capable of offloading XOR, memcpy and P/Q
+ *	calculations required in RAID5 and RAID6 operations. RE driver
+ *	registers with Linux's ASYNC layer as dma driver. RE hardware
+ *	maintains strict ordering of the requests through chained
+ *	command queueing.
+ *
+ * Data flow:
+ *	Software RAID layer of Linux (MD layer) maintains RAID partitions,
+ *	strips, stripes etc. It sends requests to the underlying ASYNC layer
+ *	which further passes it to RE driver. ASYNC layer decides which request
+ *	goes to which job ring of RE hardware. For every request processed by
+ *	RAID Engine, driver gets an interrupt unless coalescing is set. The
+ *	per job ring interrupt handler checks the status register for errors,
+ *	clears the interrupt and leave the post interrupt processing to the irq
+ *	thread.
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "fsl_raid.h"
+
+#define FSL_RE_MAX_XOR_SRCS	16
+#define FSL_RE_MAX_PQ_SRCS	16
+#define FSL_RE_MIN_DESCS	256
+#define FSL_RE_MAX_DESCS	(4 * FSL_RE_MIN_DESCS)
+#define FSL_RE_FRAME_FORMAT	0x1
+#define FSL_RE_MAX_DATA_LEN	(1024*1024)
+
+#define to_fsl_re_dma_desc(tx) container_of(tx, struct fsl_re_desc, async_tx)
+
+/* Add descriptors into per chan software queue - submit_q */
+static dma_cookie_t fsl_re_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct fsl_re_desc *desc;
+	struct fsl_re_chan *re_chan;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	desc = to_fsl_re_dma_desc(tx);
+	re_chan = container_of(tx->chan, struct fsl_re_chan, chan);
+
+	spin_lock_irqsave(&re_chan->desc_lock, flags);
+	cookie = dma_cookie_assign(tx);
+	list_add_tail(&desc->node, &re_chan->submit_q);
+	spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+	return cookie;
+}
+
+/* Copy descriptor from per chan software queue into hardware job ring */
+static void fsl_re_issue_pending(struct dma_chan *chan)
+{
+	struct fsl_re_chan *re_chan;
+	int avail;
+	struct fsl_re_desc *desc, *_desc;
+	unsigned long flags;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+	spin_lock_irqsave(&re_chan->desc_lock, flags);
+	avail = FSL_RE_SLOT_AVAIL(
+		in_be32(&re_chan->jrregs->inbring_slot_avail));
+
+	list_for_each_entry_safe(desc, _desc, &re_chan->submit_q, node) {
+		if (!avail)
+			break;
+
+		list_move_tail(&desc->node, &re_chan->active_q);
+
+		memcpy(&re_chan->inb_ring_virt_addr[re_chan->inb_count],
+		       &desc->hwdesc, sizeof(struct fsl_re_hw_desc));
+
+		re_chan->inb_count = (re_chan->inb_count + 1) &
+						FSL_RE_RING_SIZE_MASK;
+		out_be32(&re_chan->jrregs->inbring_add_job, FSL_RE_ADD_JOB(1));
+		avail--;
+	}
+	spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+static void fsl_re_desc_done(struct fsl_re_desc *desc)
+{
+	dma_cookie_complete(&desc->async_tx);
+	dma_descriptor_unmap(&desc->async_tx);
+	dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+}
+
+static void fsl_re_cleanup_descs(struct fsl_re_chan *re_chan)
+{
+	struct fsl_re_desc *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&re_chan->desc_lock, flags);
+	list_for_each_entry_safe(desc, _desc, &re_chan->ack_q, node) {
+		if (async_tx_test_ack(&desc->async_tx))
+			list_move_tail(&desc->node, &re_chan->free_q);
+	}
+	spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+	fsl_re_issue_pending(&re_chan->chan);
+}
+
+static void fsl_re_dequeue(unsigned long data)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc, *_desc;
+	struct fsl_re_hw_desc *hwdesc;
+	unsigned long flags;
+	unsigned int count, oub_count;
+	int found;
+
+	re_chan = dev_get_drvdata((struct device *)data);
+
+	fsl_re_cleanup_descs(re_chan);
+
+	spin_lock_irqsave(&re_chan->desc_lock, flags);
+	count =	FSL_RE_SLOT_FULL(in_be32(&re_chan->jrregs->oubring_slot_full));
+	while (count--) {
+		found = 0;
+		hwdesc = &re_chan->oub_ring_virt_addr[re_chan->oub_count];
+		list_for_each_entry_safe(desc, _desc, &re_chan->active_q,
+					 node) {
+			/* compare the hw dma addr to find the completed */
+			if (desc->hwdesc.lbea32 == hwdesc->lbea32 &&
+			    desc->hwdesc.addr_low == hwdesc->addr_low) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (found) {
+			fsl_re_desc_done(desc);
+			list_move_tail(&desc->node, &re_chan->ack_q);
+		} else {
+			dev_err(re_chan->dev,
+				"found hwdesc not in sw queue, discard it\n");
+		}
+
+		oub_count = (re_chan->oub_count + 1) & FSL_RE_RING_SIZE_MASK;
+		re_chan->oub_count = oub_count;
+
+		out_be32(&re_chan->jrregs->oubring_job_rmvd,
+			 FSL_RE_RMVD_JOB(1));
+	}
+	spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+/* Per Job Ring interrupt handler */
+static irqreturn_t fsl_re_isr(int irq, void *data)
+{
+	struct fsl_re_chan *re_chan;
+	u32 irqstate, status;
+
+	re_chan = dev_get_drvdata((struct device *)data);
+
+	irqstate = in_be32(&re_chan->jrregs->jr_interrupt_status);
+	if (!irqstate)
+		return IRQ_NONE;
+
+	/*
+	 * There's no way in upper layer (read MD layer) to recover from
+	 * error conditions except restart everything. In long term we
+	 * need to do something more than just crashing
+	 */
+	if (irqstate & FSL_RE_ERROR) {
+		status = in_be32(&re_chan->jrregs->jr_status);
+		dev_err(re_chan->dev, "chan error irqstate: %x, status: %x\n",
+			irqstate, status);
+	}
+
+	/* Clear interrupt */
+	out_be32(&re_chan->jrregs->jr_interrupt_status, FSL_RE_CLR_INTR);
+
+	tasklet_schedule(&re_chan->irqtask);
+
+	return IRQ_HANDLED;
+}
+
+static enum dma_status fsl_re_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void fill_cfd_frame(struct fsl_re_cmpnd_frame *cf, u8 index,
+			   size_t length, dma_addr_t addr, bool final)
+{
+	u32 efrl = length & FSL_RE_CF_LENGTH_MASK;
+
+	efrl |= final << FSL_RE_CF_FINAL_SHIFT;
+	cf[index].efrl32 = efrl;
+	cf[index].addr_high = upper_32_bits(addr);
+	cf[index].addr_low = lower_32_bits(addr);
+}
+
+static struct fsl_re_desc *fsl_re_init_desc(struct fsl_re_chan *re_chan,
+					    struct fsl_re_desc *desc,
+					    void *cf, dma_addr_t paddr)
+{
+	desc->re_chan = re_chan;
+	desc->async_tx.tx_submit = fsl_re_tx_submit;
+	dma_async_tx_descriptor_init(&desc->async_tx, &re_chan->chan);
+	INIT_LIST_HEAD(&desc->node);
+
+	desc->hwdesc.fmt32 = FSL_RE_FRAME_FORMAT << FSL_RE_HWDESC_FMT_SHIFT;
+	desc->hwdesc.lbea32 = upper_32_bits(paddr);
+	desc->hwdesc.addr_low = lower_32_bits(paddr);
+	desc->cf_addr = cf;
+	desc->cf_paddr = paddr;
+
+	desc->cdb_addr = (void *)(cf + FSL_RE_CF_DESC_SIZE);
+	desc->cdb_paddr = paddr + FSL_RE_CF_DESC_SIZE;
+
+	return desc;
+}
+
+static struct fsl_re_desc *fsl_re_chan_alloc_desc(struct fsl_re_chan *re_chan,
+						  unsigned long flags)
+{
+	struct fsl_re_desc *desc = NULL;
+	void *cf;
+	dma_addr_t paddr;
+	unsigned long lock_flag;
+
+	fsl_re_cleanup_descs(re_chan);
+
+	spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+	if (!list_empty(&re_chan->free_q)) {
+		/* take one desc from free_q */
+		desc = list_first_entry(&re_chan->free_q,
+					struct fsl_re_desc, node);
+		list_del(&desc->node);
+
+		desc->async_tx.flags = flags;
+	}
+	spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+
+	if (!desc) {
+		desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+		if (!desc)
+			return NULL;
+
+		cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_NOWAIT,
+				    &paddr);
+		if (!cf) {
+			kfree(desc);
+			return NULL;
+		}
+
+		desc = fsl_re_init_desc(re_chan, desc, cf, paddr);
+		desc->async_tx.flags = flags;
+
+		spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+		re_chan->alloc_count++;
+		spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+	}
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_genq(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		unsigned long flags)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc;
+	struct fsl_re_xor_cdb *xor;
+	struct fsl_re_cmpnd_frame *cf;
+	u32 cdb;
+	unsigned int i, j;
+	unsigned int save_src_cnt = src_cnt;
+	int cont_q = 0;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+	if (len > FSL_RE_MAX_DATA_LEN) {
+		dev_err(re_chan->dev, "genq tx length %zu, max length %d\n",
+			len, FSL_RE_MAX_DATA_LEN);
+		return NULL;
+	}
+
+	desc = fsl_re_chan_alloc_desc(re_chan, flags);
+	if (desc <= 0)
+		return NULL;
+
+	if (scf && (flags & DMA_PREP_CONTINUE)) {
+		cont_q = 1;
+		src_cnt += 1;
+	}
+
+	/* Filling xor CDB */
+	cdb = FSL_RE_XOR_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+	cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+	cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+	cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+	cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+	xor = desc->cdb_addr;
+	xor->cdb32 = cdb;
+
+	if (scf) {
+		/* compute q = src0*coef0^src1*coef1^..., * is GF(8) mult */
+		for (i = 0; i < save_src_cnt; i++)
+			xor->gfm[i] = scf[i];
+		if (cont_q)
+			xor->gfm[i++] = 1;
+	} else {
+		/* compute P, that is XOR all srcs */
+		for (i = 0; i < src_cnt; i++)
+			xor->gfm[i] = 1;
+	}
+
+	/* Filling frame 0 of compound frame descriptor with CDB */
+	cf = desc->cf_addr;
+	fill_cfd_frame(cf, 0, sizeof(*xor), desc->cdb_paddr, 0);
+
+	/* Fill CFD's 1st frame with dest buffer */
+	fill_cfd_frame(cf, 1, len, dest, 0);
+
+	/* Fill CFD's rest of the frames with source buffers */
+	for (i = 2, j = 0; j < save_src_cnt; i++, j++)
+		fill_cfd_frame(cf, i, len, src[j], 0);
+
+	if (cont_q)
+		fill_cfd_frame(cf, i++, len, dest, 0);
+
+	/* Setting the final bit in the last source buffer frame in CFD */
+	cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+	return &desc->async_tx;
+}
+
+/*
+ * Prep function for P parity calculation.In RAID Engine terminology,
+ * XOR calculation is called GenQ calculation done through GenQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_xor(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	/* NULL let genq take all coef as 1 */
+	return fsl_re_prep_dma_genq(chan, dest, src, src_cnt, NULL, len, flags);
+}
+
+/*
+ * Prep function for P/Q parity calculation.In RAID Engine terminology,
+ * P/Q calculation is called GenQQ done through GenQQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_pq(
+		struct dma_chan *chan, dma_addr_t *dest, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		unsigned long flags)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc;
+	struct fsl_re_pq_cdb *pq;
+	struct fsl_re_cmpnd_frame *cf;
+	u32 cdb;
+	u8 *p;
+	int gfmq_len, i, j;
+	unsigned int save_src_cnt = src_cnt;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+	if (len > FSL_RE_MAX_DATA_LEN) {
+		dev_err(re_chan->dev, "pq tx length is %zu, max length is %d\n",
+			len, FSL_RE_MAX_DATA_LEN);
+		return NULL;
+	}
+
+	/*
+	 * RE requires at least 2 sources, if given only one source, we pass the
+	 * second source same as the first one.
+	 * With only one source, generating P is meaningless, only generate Q.
+	 */
+	if (src_cnt == 1) {
+		struct dma_async_tx_descriptor *tx;
+		dma_addr_t dma_src[2];
+		unsigned char coef[2];
+
+		dma_src[0] = *src;
+		coef[0] = *scf;
+		dma_src[1] = *src;
+		coef[1] = 0;
+		tx = fsl_re_prep_dma_genq(chan, dest[1], dma_src, 2, coef, len,
+					  flags);
+		if (tx)
+			desc = to_fsl_re_dma_desc(tx);
+
+		return tx;
+	}
+
+	/*
+	 * During RAID6 array creation, Linux's MD layer gets P and Q
+	 * calculated separately in two steps. But our RAID Engine has
+	 * the capability to calculate both P and Q with a single command
+	 * Hence to merge well with MD layer, we need to provide a hook
+	 * here and call re_jq_prep_dma_genq() function
+	 */
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		return fsl_re_prep_dma_genq(chan, dest[1], src, src_cnt,
+				scf, len, flags);
+
+	if (flags & DMA_PREP_CONTINUE)
+		src_cnt += 3;
+
+	desc = fsl_re_chan_alloc_desc(re_chan, flags);
+	if (desc <= 0)
+		return NULL;
+
+	/* Filling GenQQ CDB */
+	cdb = FSL_RE_PQ_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+	cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+	cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+	cdb |= FSL_RE_BUFFER_OUTPUT << FSL_RE_CDB_BUFFER_SHIFT;
+	cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+	pq = desc->cdb_addr;
+	pq->cdb32 = cdb;
+
+	p = pq->gfm_q1;
+	/* Init gfm_q1[] */
+	for (i = 0; i < src_cnt; i++)
+		p[i] = 1;
+
+	/* Align gfm[] to 32bit */
+	gfmq_len = ALIGN(src_cnt, 4);
+
+	/* Init gfm_q2[] */
+	p += gfmq_len;
+	for (i = 0; i < src_cnt; i++)
+		p[i] = scf[i];
+
+	/* Filling frame 0 of compound frame descriptor with CDB */
+	cf = desc->cf_addr;
+	fill_cfd_frame(cf, 0, sizeof(struct fsl_re_pq_cdb), desc->cdb_paddr, 0);
+
+	/* Fill CFD's 1st & 2nd frame with dest buffers */
+	for (i = 1, j = 0; i < 3; i++, j++)
+		fill_cfd_frame(cf, i, len, dest[j], 0);
+
+	/* Fill CFD's rest of the frames with source buffers */
+	for (i = 3, j = 0; j < save_src_cnt; i++, j++)
+		fill_cfd_frame(cf, i, len, src[j], 0);
+
+	/* PQ computation continuation */
+	if (flags & DMA_PREP_CONTINUE) {
+		if (src_cnt - save_src_cnt == 3) {
+			p[save_src_cnt] = 0;
+			p[save_src_cnt + 1] = 0;
+			p[save_src_cnt + 2] = 1;
+			fill_cfd_frame(cf, i++, len, dest[0], 0);
+			fill_cfd_frame(cf, i++, len, dest[1], 0);
+			fill_cfd_frame(cf, i++, len, dest[1], 0);
+		} else {
+			dev_err(re_chan->dev, "PQ tx continuation error!\n");
+			return NULL;
+		}
+	}
+
+	/* Setting the final bit in the last source buffer frame in CFD */
+	cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+	return &desc->async_tx;
+}
+
+/*
+ * Prep function for memcpy. In RAID Engine, memcpy is done through MOVE
+ * command. Logic of this function will need to be modified once multipage
+ * support is added in Linux's MD/ASYNC Layer
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc;
+	size_t length;
+	struct fsl_re_cmpnd_frame *cf;
+	struct fsl_re_move_cdb *move;
+	u32 cdb;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+	if (len > FSL_RE_MAX_DATA_LEN) {
+		dev_err(re_chan->dev, "cp tx length is %zu, max length is %d\n",
+			len, FSL_RE_MAX_DATA_LEN);
+		return NULL;
+	}
+
+	desc = fsl_re_chan_alloc_desc(re_chan, flags);
+	if (desc <= 0)
+		return NULL;
+
+	/* Filling move CDB */
+	cdb = FSL_RE_MOVE_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+	cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+	cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+	cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+	move = desc->cdb_addr;
+	move->cdb32 = cdb;
+
+	/* Filling frame 0 of CFD with move CDB */
+	cf = desc->cf_addr;
+	fill_cfd_frame(cf, 0, sizeof(*move), desc->cdb_paddr, 0);
+
+	length = min_t(size_t, len, FSL_RE_MAX_DATA_LEN);
+
+	/* Fill CFD's 1st frame with dest buffer */
+	fill_cfd_frame(cf, 1, length, dest, 0);
+
+	/* Fill CFD's 2nd frame with src buffer */
+	fill_cfd_frame(cf, 2, length, src, 1);
+
+	return &desc->async_tx;
+}
+
+static int fsl_re_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc;
+	void *cf;
+	dma_addr_t paddr;
+	int i;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+	for (i = 0; i < FSL_RE_MIN_DESCS; i++) {
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+
+		cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_KERNEL,
+				    &paddr);
+		if (!cf) {
+			kfree(desc);
+			break;
+		}
+
+		INIT_LIST_HEAD(&desc->node);
+		fsl_re_init_desc(re_chan, desc, cf, paddr);
+
+		list_add_tail(&desc->node, &re_chan->free_q);
+		re_chan->alloc_count++;
+	}
+	return re_chan->alloc_count;
+}
+
+static void fsl_re_free_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_re_chan *re_chan;
+	struct fsl_re_desc *desc;
+
+	re_chan = container_of(chan, struct fsl_re_chan, chan);
+	while (re_chan->alloc_count--) {
+		desc = list_first_entry(&re_chan->free_q,
+					struct fsl_re_desc,
+					node);
+
+		list_del(&desc->node);
+		dma_pool_free(re_chan->re_dev->cf_desc_pool, desc->cf_addr,
+			      desc->cf_paddr);
+		kfree(desc);
+	}
+
+	if (!list_empty(&re_chan->free_q))
+		dev_err(re_chan->dev, "chan resource cannot be cleaned!\n");
+}
+
+static int fsl_re_chan_probe(struct platform_device *ofdev,
+		      struct device_node *np, u8 q, u32 off)
+{
+	struct device *dev, *chandev;
+	struct fsl_re_drv_private *re_priv;
+	struct fsl_re_chan *chan;
+	struct dma_device *dma_dev;
+	u32 ptr;
+	u32 status;
+	int ret = 0, rc;
+	struct platform_device *chan_ofdev;
+
+	dev = &ofdev->dev;
+	re_priv = dev_get_drvdata(dev);
+	dma_dev = &re_priv->dma_dev;
+
+	chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	/* create platform device for chan node */
+	chan_ofdev = of_platform_device_create(np, NULL, dev);
+	if (!chan_ofdev) {
+		dev_err(dev, "Not able to create ofdev for jr %d\n", q);
+		ret = -EINVAL;
+		goto err_free;
+	}
+
+	/* read reg property from dts */
+	rc = of_property_read_u32(np, "reg", &ptr);
+	if (rc) {
+		dev_err(dev, "Reg property not found in jr %d\n", q);
+		ret = -ENODEV;
+		goto err_free;
+	}
+
+	chan->jrregs = (struct fsl_re_chan_cfg *)((u8 *)re_priv->re_regs +
+			off + ptr);
+
+	/* read irq property from dts */
+	chan->irq = irq_of_parse_and_map(np, 0);
+	if (!chan->irq) {
+		dev_err(dev, "No IRQ defined for JR %d\n", q);
+		ret = -ENODEV;
+		goto err_free;
+	}
+
+	snprintf(chan->name, sizeof(chan->name), "re_jr%02d", q);
+
+	chandev = &chan_ofdev->dev;
+	tasklet_init(&chan->irqtask, fsl_re_dequeue, (unsigned long)chandev);
+
+	ret = request_irq(chan->irq, fsl_re_isr, 0, chan->name, chandev);
+	if (ret) {
+		dev_err(dev, "Unable to register interrupt for JR %d\n", q);
+		ret = -EINVAL;
+		goto err_free;
+	}
+
+	re_priv->re_jrs[q] = chan;
+	chan->chan.device = dma_dev;
+	chan->chan.private = chan;
+	chan->dev = chandev;
+	chan->re_dev = re_priv;
+
+	spin_lock_init(&chan->desc_lock);
+	INIT_LIST_HEAD(&chan->ack_q);
+	INIT_LIST_HEAD(&chan->active_q);
+	INIT_LIST_HEAD(&chan->submit_q);
+	INIT_LIST_HEAD(&chan->free_q);
+
+	chan->inb_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+		GFP_KERNEL, &chan->inb_phys_addr);
+	if (!chan->inb_ring_virt_addr) {
+		dev_err(dev, "No dma memory for inb_ring_virt_addr\n");
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	chan->oub_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+		GFP_KERNEL, &chan->oub_phys_addr);
+	if (!chan->oub_ring_virt_addr) {
+		dev_err(dev, "No dma memory for oub_ring_virt_addr\n");
+		ret = -ENOMEM;
+		goto err_free_1;
+	}
+
+	/* Program the Inbound/Outbound ring base addresses and size */
+	out_be32(&chan->jrregs->inbring_base_h,
+		 chan->inb_phys_addr & FSL_RE_ADDR_BIT_MASK);
+	out_be32(&chan->jrregs->oubring_base_h,
+		 chan->oub_phys_addr & FSL_RE_ADDR_BIT_MASK);
+	out_be32(&chan->jrregs->inbring_base_l,
+		 chan->inb_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+	out_be32(&chan->jrregs->oubring_base_l,
+		 chan->oub_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+	out_be32(&chan->jrregs->inbring_size,
+		 FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+	out_be32(&chan->jrregs->oubring_size,
+		 FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+
+	/* Read LIODN value from u-boot */
+	status = in_be32(&chan->jrregs->jr_config_1) & FSL_RE_REG_LIODN_MASK;
+
+	/* Program the CFG reg */
+	out_be32(&chan->jrregs->jr_config_1,
+		 FSL_RE_CFG1_CBSI | FSL_RE_CFG1_CBS0 | status);
+
+	dev_set_drvdata(chandev, chan);
+
+	/* Enable RE/CHAN */
+	out_be32(&chan->jrregs->jr_command, FSL_RE_ENABLE);
+
+	return 0;
+
+err_free_1:
+	dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+		      chan->inb_phys_addr);
+err_free:
+	return ret;
+}
+
+/* Probe function for RAID Engine */
+static int fsl_re_probe(struct platform_device *ofdev)
+{
+	struct fsl_re_drv_private *re_priv;
+	struct device_node *np;
+	struct device_node *child;
+	u32 off;
+	u8 ridx = 0;
+	struct dma_device *dma_dev;
+	struct resource *res;
+	int rc;
+	struct device *dev = &ofdev->dev;
+
+	re_priv = devm_kzalloc(dev, sizeof(*re_priv), GFP_KERNEL);
+	if (!re_priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	/* IOMAP the entire RAID Engine region */
+	re_priv->re_regs = devm_ioremap(dev, res->start, resource_size(res));
+	if (!re_priv->re_regs)
+		return -EBUSY;
+
+	/* Program the RE mode */
+	out_be32(&re_priv->re_regs->global_config, FSL_RE_NON_DPAA_MODE);
+
+	/* Program Galois Field polynomial */
+	out_be32(&re_priv->re_regs->galois_field_config, FSL_RE_GFM_POLY);
+
+	dev_info(dev, "version %x, mode %x, gfp %x\n",
+		 in_be32(&re_priv->re_regs->re_version_id),
+		 in_be32(&re_priv->re_regs->global_config),
+		 in_be32(&re_priv->re_regs->galois_field_config));
+
+	dma_dev = &re_priv->dma_dev;
+	dma_dev->dev = dev;
+	INIT_LIST_HEAD(&dma_dev->channels);
+	dma_set_mask(dev, DMA_BIT_MASK(40));
+
+	dma_dev->device_alloc_chan_resources = fsl_re_alloc_chan_resources;
+	dma_dev->device_tx_status = fsl_re_tx_status;
+	dma_dev->device_issue_pending = fsl_re_issue_pending;
+
+	dma_dev->max_xor = FSL_RE_MAX_XOR_SRCS;
+	dma_dev->device_prep_dma_xor = fsl_re_prep_dma_xor;
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+	dma_dev->max_pq = FSL_RE_MAX_PQ_SRCS;
+	dma_dev->device_prep_dma_pq = fsl_re_prep_dma_pq;
+	dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+	dma_dev->device_prep_dma_memcpy = fsl_re_prep_dma_memcpy;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+
+	dma_dev->device_free_chan_resources = fsl_re_free_chan_resources;
+
+	re_priv->total_chans = 0;
+
+	re_priv->cf_desc_pool = dmam_pool_create("fsl_re_cf_desc_pool", dev,
+					FSL_RE_CF_CDB_SIZE,
+					FSL_RE_CF_CDB_ALIGN, 0);
+
+	if (!re_priv->cf_desc_pool) {
+		dev_err(dev, "No memory for fsl re_cf desc pool\n");
+		return -ENOMEM;
+	}
+
+	re_priv->hw_desc_pool = dmam_pool_create("fsl_re_hw_desc_pool", dev,
+			sizeof(struct fsl_re_hw_desc) * FSL_RE_RING_SIZE,
+			FSL_RE_FRAME_ALIGN, 0);
+	if (!re_priv->hw_desc_pool) {
+		dev_err(dev, "No memory for fsl re_hw desc pool\n");
+		return -ENOMEM;
+	}
+
+	dev_set_drvdata(dev, re_priv);
+
+	/* Parse Device tree to find out the total number of JQs present */
+	for_each_compatible_node(np, NULL, "fsl,raideng-v1.0-job-queue") {
+		rc = of_property_read_u32(np, "reg", &off);
+		if (rc) {
+			dev_err(dev, "Reg property not found in JQ node\n");
+			of_node_put(np);
+			return -ENODEV;
+		}
+		/* Find out the Job Rings present under each JQ */
+		for_each_child_of_node(np, child) {
+			rc = of_device_is_compatible(child,
+					     "fsl,raideng-v1.0-job-ring");
+			if (rc) {
+				fsl_re_chan_probe(ofdev, child, ridx++, off);
+				re_priv->total_chans++;
+			}
+		}
+	}
+
+	dma_async_device_register(dma_dev);
+
+	return 0;
+}
+
+static void fsl_re_remove_chan(struct fsl_re_chan *chan)
+{
+	tasklet_kill(&chan->irqtask);
+
+	dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+		      chan->inb_phys_addr);
+
+	dma_pool_free(chan->re_dev->hw_desc_pool, chan->oub_ring_virt_addr,
+		      chan->oub_phys_addr);
+}
+
+static int fsl_re_remove(struct platform_device *ofdev)
+{
+	struct fsl_re_drv_private *re_priv;
+	struct device *dev;
+	int i;
+
+	dev = &ofdev->dev;
+	re_priv = dev_get_drvdata(dev);
+
+	/* Cleanup chan related memory areas */
+	for (i = 0; i < re_priv->total_chans; i++)
+		fsl_re_remove_chan(re_priv->re_jrs[i]);
+
+	/* Unregister the driver */
+	dma_async_device_unregister(&re_priv->dma_dev);
+
+	return 0;
+}
+
+static const struct of_device_id fsl_re_ids[] = {
+	{ .compatible = "fsl,raideng-v1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, fsl_re_ids);
+
+static struct platform_driver fsl_re_driver = {
+	.driver = {
+		.name = "fsl-raideng",
+		.of_match_table = fsl_re_ids,
+	},
+	.probe = fsl_re_probe,
+	.remove = fsl_re_remove,
+};
+
+module_platform_driver(fsl_re_driver);
+
+MODULE_AUTHOR("Harninder Rai <harninder.rai@freescale.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Freescale RAID Engine Device Driver");
diff --git a/drivers/dma/fsl_raid.h b/drivers/dma/fsl_raid.h
new file mode 100644
index 0000000..69d743c
--- /dev/null
+++ b/drivers/dma/fsl_raid.h
@@ -0,0 +1,306 @@
+/*
+ * drivers/dma/fsl_raid.h
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *	Harninder Rai <harninder.rai@freescale.com>
+ *	Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *	Xuelin Shi <xuelin.shi@freescale.com>
+
+ * Copyright (c) 2010-2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define FSL_RE_MAX_CHANS		4
+#define FSL_RE_DPAA_MODE		BIT(30)
+#define FSL_RE_NON_DPAA_MODE		BIT(31)
+#define FSL_RE_GFM_POLY			0x1d000000
+#define FSL_RE_ADD_JOB(x)		((x) << 16)
+#define FSL_RE_RMVD_JOB(x)		((x) << 16)
+#define FSL_RE_CFG1_CBSI		0x08000000
+#define FSL_RE_CFG1_CBS0		0x00080000
+#define FSL_RE_SLOT_FULL_SHIFT		8
+#define FSL_RE_SLOT_FULL(x)		((x) >> FSL_RE_SLOT_FULL_SHIFT)
+#define FSL_RE_SLOT_AVAIL_SHIFT		8
+#define FSL_RE_SLOT_AVAIL(x)		((x) >> FSL_RE_SLOT_AVAIL_SHIFT)
+#define FSL_RE_PQ_OPCODE		0x1B
+#define FSL_RE_XOR_OPCODE		0x1A
+#define FSL_RE_MOVE_OPCODE		0x8
+#define FSL_RE_FRAME_ALIGN		16
+#define FSL_RE_BLOCK_SIZE		0x3 /* 4096 bytes */
+#define FSL_RE_CACHEABLE_IO		0x0
+#define FSL_RE_BUFFER_OUTPUT		0x0
+#define FSL_RE_INTR_ON_ERROR		0x1
+#define FSL_RE_DATA_DEP			0x1
+#define FSL_RE_ENABLE_DPI		0x0
+#define FSL_RE_RING_SIZE		0x400
+#define FSL_RE_RING_SIZE_MASK		(FSL_RE_RING_SIZE - 1)
+#define FSL_RE_RING_SIZE_SHIFT		8
+#define FSL_RE_ADDR_BIT_SHIFT		4
+#define FSL_RE_ADDR_BIT_MASK		(BIT(FSL_RE_ADDR_BIT_SHIFT) - 1)
+#define FSL_RE_ERROR			0x40000000
+#define FSL_RE_INTR			0x80000000
+#define FSL_RE_CLR_INTR			0x80000000
+#define FSL_RE_PAUSE			0x80000000
+#define FSL_RE_ENABLE			0x80000000
+#define FSL_RE_REG_LIODN_MASK		0x00000FFF
+
+#define FSL_RE_CDB_OPCODE_MASK		0xF8000000
+#define FSL_RE_CDB_OPCODE_SHIFT		27
+#define FSL_RE_CDB_EXCLEN_MASK		0x03000000
+#define FSL_RE_CDB_EXCLEN_SHIFT		24
+#define FSL_RE_CDB_EXCLQ1_MASK		0x00F00000
+#define FSL_RE_CDB_EXCLQ1_SHIFT		20
+#define FSL_RE_CDB_EXCLQ2_MASK		0x000F0000
+#define FSL_RE_CDB_EXCLQ2_SHIFT		16
+#define FSL_RE_CDB_BLKSIZE_MASK		0x0000C000
+#define FSL_RE_CDB_BLKSIZE_SHIFT	14
+#define FSL_RE_CDB_CACHE_MASK		0x00003000
+#define FSL_RE_CDB_CACHE_SHIFT		12
+#define FSL_RE_CDB_BUFFER_MASK		0x00000800
+#define FSL_RE_CDB_BUFFER_SHIFT		11
+#define FSL_RE_CDB_ERROR_MASK		0x00000400
+#define FSL_RE_CDB_ERROR_SHIFT		10
+#define FSL_RE_CDB_NRCS_MASK		0x0000003C
+#define FSL_RE_CDB_NRCS_SHIFT		6
+#define FSL_RE_CDB_DEPEND_MASK		0x00000008
+#define FSL_RE_CDB_DEPEND_SHIFT		3
+#define FSL_RE_CDB_DPI_MASK		0x00000004
+#define FSL_RE_CDB_DPI_SHIFT		2
+
+/*
+ * the largest cf block is 19*sizeof(struct cmpnd_frame), which is 304 bytes.
+ * here 19 = 1(cdb)+2(dest)+16(src), align to 64bytes, that is 320 bytes.
+ * the largest cdb block: struct pq_cdb which is 180 bytes, adding to cf block
+ * 320+180=500, align to 64bytes, that is 512 bytes.
+ */
+#define FSL_RE_CF_DESC_SIZE		320
+#define FSL_RE_CF_CDB_SIZE		512
+#define FSL_RE_CF_CDB_ALIGN		64
+
+struct fsl_re_ctrl {
+	/* General Configuration Registers */
+	__be32 global_config;	/* Global Configuration Register */
+	u8     rsvd1[4];
+	__be32 galois_field_config; /* Galois Field Configuration Register */
+	u8     rsvd2[4];
+	__be32 jq_wrr_config;   /* WRR Configuration register */
+	u8     rsvd3[4];
+	__be32 crc_config;	/* CRC Configuration register */
+	u8     rsvd4[228];
+	__be32 system_reset;	/* System Reset Register */
+	u8     rsvd5[252];
+	__be32 global_status;	/* Global Status Register */
+	u8     rsvd6[832];
+	__be32 re_liodn_base;	/* LIODN Base Register */
+	u8     rsvd7[1712];
+	__be32 re_version_id;	/* Version ID register of RE */
+	__be32 re_version_id_2; /* Version ID 2 register of RE */
+	u8     rsvd8[512];
+	__be32 host_config;	/* Host I/F Configuration Register */
+};
+
+struct fsl_re_chan_cfg {
+	/* Registers for JR interface */
+	__be32 jr_config_0;	/* Job Queue Configuration 0 Register */
+	__be32 jr_config_1;	/* Job Queue Configuration 1 Register */
+	__be32 jr_interrupt_status; /* Job Queue Interrupt Status Register */
+	u8     rsvd1[4];
+	__be32 jr_command;	/* Job Queue Command Register */
+	u8     rsvd2[4];
+	__be32 jr_status;	/* Job Queue Status Register */
+	u8     rsvd3[228];
+
+	/* Input Ring */
+	__be32 inbring_base_h;	/* Inbound Ring Base Address Register - High */
+	__be32 inbring_base_l;	/* Inbound Ring Base Address Register - Low */
+	__be32 inbring_size;	/* Inbound Ring Size Register */
+	u8     rsvd4[4];
+	__be32 inbring_slot_avail; /* Inbound Ring Slot Available Register */
+	u8     rsvd5[4];
+	__be32 inbring_add_job;	/* Inbound Ring Add Job Register */
+	u8     rsvd6[4];
+	__be32 inbring_cnsmr_indx; /* Inbound Ring Consumer Index Register */
+	u8     rsvd7[220];
+
+	/* Output Ring */
+	__be32 oubring_base_h;	/* Outbound Ring Base Address Register - High */
+	__be32 oubring_base_l;	/* Outbound Ring Base Address Register - Low */
+	__be32 oubring_size;	/* Outbound Ring Size Register */
+	u8     rsvd8[4];
+	__be32 oubring_job_rmvd; /* Outbound Ring Job Removed Register */
+	u8     rsvd9[4];
+	__be32 oubring_slot_full; /* Outbound Ring Slot Full Register */
+	u8     rsvd10[4];
+	__be32 oubring_prdcr_indx; /* Outbound Ring Producer Index */
+};
+
+/*
+ * Command Descriptor Block (CDB) for unicast move command.
+ * In RAID Engine terms, memcpy is done through move command
+ */
+struct fsl_re_move_cdb {
+	__be32 cdb32;
+};
+
+/* Data protection/integrity related fields */
+#define FSL_RE_DPI_APPS_MASK		0xC0000000
+#define FSL_RE_DPI_APPS_SHIFT		30
+#define FSL_RE_DPI_REF_MASK		0x30000000
+#define FSL_RE_DPI_REF_SHIFT		28
+#define FSL_RE_DPI_GUARD_MASK		0x0C000000
+#define FSL_RE_DPI_GUARD_SHIFT		26
+#define FSL_RE_DPI_ATTR_MASK		0x03000000
+#define FSL_RE_DPI_ATTR_SHIFT		24
+#define FSL_RE_DPI_META_MASK		0x0000FFFF
+
+struct fsl_re_dpi {
+	__be32 dpi32;
+	__be32 ref;
+};
+
+/*
+ * CDB for GenQ command. In RAID Engine terminology, XOR is
+ * done through this command
+ */
+struct fsl_re_xor_cdb {
+	__be32 cdb32;
+	u8 gfm[16];
+	struct fsl_re_dpi dpi_dest_spec;
+	struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* CDB for no-op command */
+struct fsl_re_noop_cdb {
+	__be32 cdb32;
+};
+
+/*
+ * CDB for GenQQ command. In RAID Engine terminology, P/Q is
+ * done through this command
+ */
+struct fsl_re_pq_cdb {
+	__be32 cdb32;
+	u8 gfm_q1[16];
+	u8 gfm_q2[16];
+	struct fsl_re_dpi dpi_dest_spec[2];
+	struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* Compound frame */
+#define FSL_RE_CF_ADDR_HIGH_MASK	0x000000FF
+#define FSL_RE_CF_EXT_MASK		0x80000000
+#define FSL_RE_CF_EXT_SHIFT		31
+#define FSL_RE_CF_FINAL_MASK		0x40000000
+#define FSL_RE_CF_FINAL_SHIFT		30
+#define FSL_RE_CF_LENGTH_MASK		0x000FFFFF
+#define FSL_RE_CF_BPID_MASK		0x00FF0000
+#define FSL_RE_CF_BPID_SHIFT		16
+#define FSL_RE_CF_OFFSET_MASK		0x00001FFF
+
+struct fsl_re_cmpnd_frame {
+	__be32 addr_high;
+	__be32 addr_low;
+	__be32 efrl32;
+	__be32 rbro32;
+};
+
+/* Frame descriptor */
+#define FSL_RE_HWDESC_LIODN_MASK	0x3F000000
+#define FSL_RE_HWDESC_LIODN_SHIFT	24
+#define FSL_RE_HWDESC_BPID_MASK		0x00FF0000
+#define FSL_RE_HWDESC_BPID_SHIFT	16
+#define FSL_RE_HWDESC_ELIODN_MASK	0x0000F000
+#define FSL_RE_HWDESC_ELIODN_SHIFT	12
+#define FSL_RE_HWDESC_FMT_SHIFT		29
+#define FSL_RE_HWDESC_FMT_MASK		(0x3 << FSL_RE_HWDESC_FMT_SHIFT)
+
+struct fsl_re_hw_desc {
+	__be32 lbea32;
+	__be32 addr_low;
+	__be32 fmt32;
+	__be32 status;
+};
+
+/* Raid Engine device private data */
+struct fsl_re_drv_private {
+	u8 total_chans;
+	struct dma_device dma_dev;
+	struct fsl_re_ctrl *re_regs;
+	struct fsl_re_chan *re_jrs[FSL_RE_MAX_CHANS];
+	struct dma_pool *cf_desc_pool;
+	struct dma_pool *hw_desc_pool;
+};
+
+/* Per job ring data structure */
+struct fsl_re_chan {
+	char name[16];
+	spinlock_t desc_lock; /* queue lock */
+	struct list_head ack_q;  /* wait to acked queue */
+	struct list_head active_q; /* already issued on hw, not completed */
+	struct list_head submit_q;
+	struct list_head free_q; /* alloc available queue */
+	struct device *dev;
+	struct fsl_re_drv_private *re_dev;
+	struct dma_chan chan;
+	struct fsl_re_chan_cfg *jrregs;
+	int irq;
+	struct tasklet_struct irqtask;
+	u32 alloc_count;
+
+	/* hw descriptor ring for inbound queue*/
+	dma_addr_t inb_phys_addr;
+	struct fsl_re_hw_desc *inb_ring_virt_addr;
+	u32 inb_count;
+
+	/* hw descriptor ring for outbound queue */
+	dma_addr_t oub_phys_addr;
+	struct fsl_re_hw_desc *oub_ring_virt_addr;
+	u32 oub_count;
+};
+
+/* Async transaction descriptor */
+struct fsl_re_desc {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head node;
+	struct fsl_re_hw_desc hwdesc;
+	struct fsl_re_chan *re_chan;
+
+	/* hwdesc will point to cf_addr */
+	void *cf_addr;
+	dma_addr_t cf_paddr;
+
+	void *cdb_addr;
+	dma_addr_t cdb_paddr;
+	int status;
+};
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 0000000..1117b51
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1428 @@
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ *   DMA engine driver for Freescale MPC8540 DMA controller, which is
+ *   also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ *   The support for MPC8349 DMA controller is also added.
+ *
+ * This driver instructs the DMA controller to issue the PCI Read Multiple
+ * command for PCI read operations, instead of using the default PCI Read Line
+ * command. Please be aware that this setting may result in read pre-fetching
+ * on some platforms.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/fsldma.h>
+#include "dmaengine.h"
+#include "fsldma.h"
+
+#define chan_dbg(chan, fmt, arg...)					\
+	dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)					\
+	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+static const char msg_ld_oom[] = "No free memory for link descriptor";
+
+/*
+ * Register Helpers
+ */
+
+static void set_sr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->sr, val, 32);
+}
+
+static u32 get_sr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->sr, 32);
+}
+
+static void set_mr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->mr, val, 32);
+}
+
+static u32 get_mr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->mr, 32);
+}
+
+static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
+{
+	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static void set_bcr(struct fsldma_chan *chan, u32 val)
+{
+	DMA_OUT(chan, &chan->regs->bcr, val, 32);
+}
+
+static u32 get_bcr(struct fsldma_chan *chan)
+{
+	return DMA_IN(chan, &chan->regs->bcr, 32);
+}
+
+/*
+ * Descriptor Helpers
+ */
+
+static void set_desc_cnt(struct fsldma_chan *chan,
+				struct fsl_dma_ld_hw *hw, u32 count)
+{
+	hw->count = CPU_TO_DMA(chan, count, 32);
+}
+
+static void set_desc_src(struct fsldma_chan *chan,
+			 struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	hw->src_addr = CPU_TO_DMA(chan, snoop_bits | src, 64);
+}
+
+static void set_desc_dst(struct fsldma_chan *chan,
+			 struct fsl_dma_ld_hw *hw, dma_addr_t dst)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	hw->dst_addr = CPU_TO_DMA(chan, snoop_bits | dst, 64);
+}
+
+static void set_desc_next(struct fsldma_chan *chan,
+			  struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+	hw->next_ln_addr = CPU_TO_DMA(chan, snoop_bits | next, 64);
+}
+
+static void set_ld_eol(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+
+	desc->hw.next_ln_addr = CPU_TO_DMA(chan,
+		DMA_TO_CPU(chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL
+			| snoop_bits, 64);
+}
+
+/*
+ * DMA Engine Hardware Control Helpers
+ */
+
+static void dma_init(struct fsldma_chan *chan)
+{
+	/* Reset the channel */
+	set_mr(chan, 0);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		/* Set the channel to below modes:
+		 * EIE - Error interrupt enable
+		 * EOLNIE - End of links interrupt enable
+		 * BWC - Bandwidth sharing among channels
+		 */
+		set_mr(chan, FSL_DMA_MR_BWC | FSL_DMA_MR_EIE
+			| FSL_DMA_MR_EOLNIE);
+		break;
+	case FSL_DMA_IP_83XX:
+		/* Set the channel to below modes:
+		 * EOTIE - End-of-transfer interrupt enable
+		 * PRC_RM - PCI read multiple
+		 */
+		set_mr(chan, FSL_DMA_MR_EOTIE | FSL_DMA_MR_PRC_RM);
+		break;
+	}
+}
+
+static int dma_is_idle(struct fsldma_chan *chan)
+{
+	u32 sr = get_sr(chan);
+	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+/*
+ * Start the DMA controller
+ *
+ * Preconditions:
+ * - the CDAR register must point to the start descriptor
+ * - the MRn[CS] bit must be cleared
+ */
+static void dma_start(struct fsldma_chan *chan)
+{
+	u32 mode;
+
+	mode = get_mr(chan);
+
+	if (chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+		set_bcr(chan, 0);
+		mode |= FSL_DMA_MR_EMP_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMP_EN;
+	}
+
+	if (chan->feature & FSL_DMA_CHAN_START_EXT) {
+		mode |= FSL_DMA_MR_EMS_EN;
+	} else {
+		mode &= ~FSL_DMA_MR_EMS_EN;
+		mode |= FSL_DMA_MR_CS;
+	}
+
+	set_mr(chan, mode);
+}
+
+static void dma_halt(struct fsldma_chan *chan)
+{
+	u32 mode;
+	int i;
+
+	/* read the mode register */
+	mode = get_mr(chan);
+
+	/*
+	 * The 85xx controller supports channel abort, which will stop
+	 * the current transfer. On 83xx, this bit is the transfer error
+	 * mask bit, which should not be changed.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		mode |= FSL_DMA_MR_CA;
+		set_mr(chan, mode);
+
+		mode &= ~FSL_DMA_MR_CA;
+	}
+
+	/* stop the DMA controller */
+	mode &= ~(FSL_DMA_MR_CS | FSL_DMA_MR_EMS_EN);
+	set_mr(chan, mode);
+
+	/* wait for the DMA controller to become idle */
+	for (i = 0; i < 100; i++) {
+		if (dma_is_idle(chan))
+			return;
+
+		udelay(10);
+	}
+
+	if (!dma_is_idle(chan))
+		chan_err(chan, "DMA halt timeout!\n");
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	mode = get_mr(chan);
+
+	switch (size) {
+	case 0:
+		mode &= ~FSL_DMA_MR_SAHE;
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		mode &= ~FSL_DMA_MR_SAHTS_MASK;
+		mode |= FSL_DMA_MR_SAHE | (__ilog2(size) << 14);
+		break;
+	}
+
+	set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_set_dst_loop_size - Set destination address hold transfer size
+ * @chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dst_loop_size(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	mode = get_mr(chan);
+
+	switch (size) {
+	case 0:
+		mode &= ~FSL_DMA_MR_DAHE;
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		mode &= ~FSL_DMA_MR_DAHTS_MASK;
+		mode |= FSL_DMA_MR_DAHE | (__ilog2(size) << 16);
+		break;
+	}
+
+	set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_set_request_count - Set DMA Request Count for external control
+ * @chan : Freescale DMA channel
+ * @size     : Number of bytes to transfer in a single request
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA request count is how many bytes are allowed to transfer before
+ * pausing the channel, after which a new assertion of DREQ# resumes channel
+ * operation.
+ *
+ * A size of 0 disables external pause control. The maximum size is 1024.
+ */
+static void fsl_chan_set_request_count(struct fsldma_chan *chan, int size)
+{
+	u32 mode;
+
+	BUG_ON(size > 1024);
+
+	mode = get_mr(chan);
+	mode &= ~FSL_DMA_MR_BWC_MASK;
+	mode |= (__ilog2(size) << 24) & FSL_DMA_MR_BWC_MASK;
+
+	set_mr(chan, mode);
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * The Freescale DMA channel can be controlled by the external signal DREQ#.
+ * The DMA Request Count feature should be used in addition to this feature
+ * to set the number of bytes to transfer before pausing the channel.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsldma_chan *chan, int enable)
+{
+	if (enable)
+		chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+	else
+		chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
+{
+	if (enable)
+		chan->feature |= FSL_DMA_CHAN_START_EXT;
+	else
+		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+int fsl_dma_external_start(struct dma_chan *dchan, int enable)
+{
+	struct fsldma_chan *chan;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	fsl_chan_toggle_ext_start(chan, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_dma_external_start);
+
+static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
+{
+	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
+
+	if (list_empty(&chan->ld_pending))
+		goto out_splice;
+
+	/*
+	 * Add the hardware descriptor to the chain of hardware descriptors
+	 * that already exists in memory.
+	 *
+	 * This will un-set the EOL bit of the existing transaction, and the
+	 * last link in this transaction will become the EOL descriptor.
+	 */
+	set_desc_next(chan, &tail->hw, desc->async_tx.phys);
+
+	/*
+	 * Add the software descriptor and all children to the list
+	 * of pending transactions
+	 */
+out_splice:
+	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct fsldma_chan *chan = to_fsl_chan(tx->chan);
+	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+	struct fsl_desc_sw *child;
+	dma_cookie_t cookie = -EINVAL;
+
+	spin_lock_bh(&chan->desc_lock);
+
+#ifdef CONFIG_PM
+	if (unlikely(chan->pm_state != RUNNING)) {
+		chan_dbg(chan, "cannot submit due to suspend\n");
+		spin_unlock_bh(&chan->desc_lock);
+		return -1;
+	}
+#endif
+
+	/*
+	 * assign cookies to all of the software descriptors
+	 * that make up this transaction
+	 */
+	list_for_each_entry(child, &desc->tx_list, node) {
+		cookie = dma_cookie_assign(&child->async_tx);
+	}
+
+	/* put this transaction onto the tail of the pending queue */
+	append_ld_queue(chan, desc);
+
+	spin_unlock_bh(&chan->desc_lock);
+
+	return cookie;
+}
+
+/**
+ * fsl_dma_free_descriptor - Free descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ * @desc: descriptor to be freed
+ */
+static void fsl_dma_free_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	list_del(&desc->node);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+	dma_addr_t pdesc;
+
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		chan_dbg(chan, "out of memory for link descriptor\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = fsl_dma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+	chan_dbg(chan, "LD %p allocated\n", desc);
+
+	return desc;
+}
+
+/**
+ * fsldma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ * All descriptors should only be freed in this function.
+ */
+static void fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
+		if (async_tx_test_ack(&desc->async_tx))
+			fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsldma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: Freescale DMA channel
+ * @desc: descriptor to cleanup and free
+ * @cookie: Freescale DMA transaction identifier
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *txd = &desc->async_tx;
+	dma_cookie_t ret = cookie;
+
+	BUG_ON(txd->cookie < 0);
+
+	if (txd->cookie > 0) {
+		ret = txd->cookie;
+
+		dma_descriptor_unmap(txd);
+		/* Run the link descriptor callback function */
+		dmaengine_desc_get_callback_invoke(txd, NULL);
+	}
+
+	/* Run any dependencies */
+	dma_run_dependencies(txd);
+
+	return ret;
+}
+
+/**
+ * fsldma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: Freescale DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api, or move it to
+ * queue ld_completed.
+ */
+static void fsldma_clean_running_descriptor(struct fsldma_chan *chan,
+		struct fsl_desc_sw *desc)
+{
+	/* Remove from the list of transactions */
+	list_del(&desc->node);
+
+	/*
+	 * the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/*
+		 * Move this descriptor to the list of descriptors which is
+		 * completed, but still awaiting the 'ack' bit to be set.
+		 */
+		list_add_tail(&desc->node, &chan->ld_completed);
+		return;
+	}
+
+	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - transfer any pending transactions
+ * @chan : Freescale DMA channel
+ *
+ * HARDWARE STATE: idle
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "no pending LDs\n");
+		return;
+	}
+
+	/*
+	 * The DMA controller is not idle, which means that the interrupt
+	 * handler will start any queued transactions when it runs after
+	 * this transaction finishes
+	 */
+	if (!chan->idle) {
+		chan_dbg(chan, "DMA controller still busy\n");
+		return;
+	}
+
+	/*
+	 * If there are some link descriptors which have not been
+	 * transferred, we need to start the controller
+	 */
+
+	/*
+	 * Move all elements from the queue of pending transactions
+	 * onto the list of running transactions
+	 */
+	chan_dbg(chan, "idle, starting controller\n");
+	desc = list_first_entry(&chan->ld_pending, struct fsl_desc_sw, node);
+	list_splice_tail_init(&chan->ld_pending, &chan->ld_running);
+
+	/*
+	 * The 85xx DMA controller doesn't clear the channel start bit
+	 * automatically at the end of a transfer. Therefore we must clear
+	 * it in software before starting the transfer.
+	 */
+	if ((chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) {
+		u32 mode;
+
+		mode = get_mr(chan);
+		mode &= ~FSL_DMA_MR_CS;
+		set_mr(chan, mode);
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_cdar(chan, desc->async_tx.phys);
+	get_cdar(chan);
+
+	dma_start(chan);
+	chan->idle = false;
+}
+
+/**
+ * fsldma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: Freescale DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	dma_addr_t curr_phys = get_cdar(chan);
+	int seen_current = 0;
+
+	fsldma_clean_completed_descriptor(chan);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
+		/*
+		 * do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/*
+		 * stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (desc->async_tx.phys == curr_phys) {
+			seen_current = 1;
+			if (!dma_is_idle(chan))
+				break;
+		}
+
+		cookie = fsldma_run_tx_complete_actions(chan, desc, cookie);
+
+		fsldma_clean_running_descriptor(chan, desc);
+	}
+
+	/*
+	 * Start any pending transactions automatically
+	 *
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	fsl_chan_xfer_ld_queue(chan);
+
+	if (cookie > 0)
+		chan->common.completed_cookie = cookie;
+}
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 1;
+
+	/*
+	 * We need the descriptor to be aligned to 32bytes
+	 * for meeting FSL DMA specification requirement.
+	 */
+	chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+					  sizeof(struct fsl_desc_sw),
+					  __alignof__(struct fsl_desc_sw), 0);
+	if (!chan->desc_pool) {
+		chan_err(chan, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	/* there is at least one descriptor free to be allocated */
+	return 1;
+}
+
+/**
+ * fsldma_free_desc_list - Free all descriptors in a queue
+ * @chan: Freescae DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void fsldma_free_desc_list(struct fsldma_chan *chan,
+				  struct list_head *list)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
+}
+
+static void fsldma_free_desc_list_reverse(struct fsldma_chan *chan,
+					  struct list_head *list)
+{
+	struct fsl_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe_reverse(desc, _desc, list, node)
+		fsl_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+	chan_dbg(chan, "free all channel resources\n");
+	spin_lock_bh(&chan->desc_lock);
+	fsldma_cleanup_descriptors(chan);
+	fsldma_free_desc_list(chan, &chan->ld_pending);
+	fsldma_free_desc_list(chan, &chan->ld_running);
+	fsldma_free_desc_list(chan, &chan->ld_completed);
+	spin_unlock_bh(&chan->desc_lock);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_memcpy(struct dma_chan *dchan,
+	dma_addr_t dma_dst, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct fsldma_chan *chan;
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy;
+
+	if (!dchan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	chan = to_fsl_chan(dchan);
+
+	do {
+
+		/* Allocate the link descriptor from DMA pool */
+		new = fsl_dma_alloc_descriptor(chan);
+		if (!new) {
+			chan_err(chan, "%s\n", msg_ld_oom);
+			goto fail;
+		}
+
+		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+		set_desc_cnt(chan, &new->hw, copy);
+		set_desc_src(chan, &new->hw, dma_src);
+		set_desc_dst(chan, &new->hw, dma_dst);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list */
+	set_ld_eol(chan, new);
+
+	return &first->async_tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	fsldma_free_desc_list_reverse(chan, &first->tx_list);
+	return NULL;
+}
+
+static int fsl_dma_device_terminate_all(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	spin_lock_bh(&chan->desc_lock);
+
+	/* Halt the DMA engine */
+	dma_halt(chan);
+
+	/* Remove and free all of the descriptors in the LD queue */
+	fsldma_free_desc_list(chan, &chan->ld_pending);
+	fsldma_free_desc_list(chan, &chan->ld_running);
+	fsldma_free_desc_list(chan, &chan->ld_completed);
+	chan->idle = true;
+
+	spin_unlock_bh(&chan->desc_lock);
+	return 0;
+}
+
+static int fsl_dma_device_config(struct dma_chan *dchan,
+				 struct dma_slave_config *config)
+{
+	struct fsldma_chan *chan;
+	int size;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	/* make sure the channel supports setting burst size */
+	if (!chan->set_request_count)
+		return -ENXIO;
+
+	/* we set the controller burst size depending on direction */
+	if (config->direction == DMA_MEM_TO_DEV)
+		size = config->dst_addr_width * config->dst_maxburst;
+	else
+		size = config->src_addr_width * config->src_maxburst;
+
+	chan->set_request_count(chan, size);
+	return 0;
+}
+
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *dchan)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+
+	spin_lock_bh(&chan->desc_lock);
+	fsl_chan_xfer_ld_queue(chan);
+	spin_unlock_bh(&chan->desc_lock);
+}
+
+/**
+ * fsl_tx_status - Determine the DMA status
+ * @chan : Freescale DMA channel
+ */
+static enum dma_status fsl_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct fsldma_chan *chan = to_fsl_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_bh(&chan->desc_lock);
+	fsldma_cleanup_descriptors(chan);
+	spin_unlock_bh(&chan->desc_lock);
+
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+/*----------------------------------------------------------------------------*/
+/* Interrupt Handling                                                         */
+/*----------------------------------------------------------------------------*/
+
+static irqreturn_t fsldma_chan_irq(int irq, void *data)
+{
+	struct fsldma_chan *chan = data;
+	u32 stat;
+
+	/* save and clear the status register */
+	stat = get_sr(chan);
+	set_sr(chan, stat);
+	chan_dbg(chan, "irq: stat = 0x%x\n", stat);
+
+	/* check that this was really our device */
+	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+	if (!stat)
+		return IRQ_NONE;
+
+	if (stat & FSL_DMA_SR_TE)
+		chan_err(chan, "Transfer Error!\n");
+
+	/*
+	 * Programming Error
+	 * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+	 * trigger a PE interrupt.
+	 */
+	if (stat & FSL_DMA_SR_PE) {
+		chan_dbg(chan, "irq: Programming Error INT\n");
+		stat &= ~FSL_DMA_SR_PE;
+		if (get_bcr(chan) != 0)
+			chan_err(chan, "Programming Error!\n");
+	}
+
+	/*
+	 * For MPC8349, EOCDI event need to update cookie
+	 * and start the next transfer if it exist.
+	 */
+	if (stat & FSL_DMA_SR_EOCDI) {
+		chan_dbg(chan, "irq: End-of-Chain link INT\n");
+		stat &= ~FSL_DMA_SR_EOCDI;
+	}
+
+	/*
+	 * If it current transfer is the end-of-transfer,
+	 * we should clear the Channel Start bit for
+	 * prepare next transfer.
+	 */
+	if (stat & FSL_DMA_SR_EOLNI) {
+		chan_dbg(chan, "irq: End-of-link INT\n");
+		stat &= ~FSL_DMA_SR_EOLNI;
+	}
+
+	/* check that the DMA controller is really idle */
+	if (!dma_is_idle(chan))
+		chan_err(chan, "irq: controller not idle!\n");
+
+	/* check that we handled all of the bits */
+	if (stat)
+		chan_err(chan, "irq: unhandled sr 0x%08x\n", stat);
+
+	/*
+	 * Schedule the tasklet to handle all cleanup of the current
+	 * transaction. It will start a new transaction if there is
+	 * one pending.
+	 */
+	tasklet_schedule(&chan->tasklet);
+	chan_dbg(chan, "irq: Exit\n");
+	return IRQ_HANDLED;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+	struct fsldma_chan *chan = (struct fsldma_chan *)data;
+
+	chan_dbg(chan, "tasklet entry\n");
+
+	spin_lock_bh(&chan->desc_lock);
+
+	/* the hardware is now idle and ready for more */
+	chan->idle = true;
+
+	/* Run all cleanup for descriptors which have been completed */
+	fsldma_cleanup_descriptors(chan);
+
+	spin_unlock_bh(&chan->desc_lock);
+
+	chan_dbg(chan, "tasklet exit\n");
+}
+
+static irqreturn_t fsldma_ctrl_irq(int irq, void *data)
+{
+	struct fsldma_device *fdev = data;
+	struct fsldma_chan *chan;
+	unsigned int handled = 0;
+	u32 gsr, mask;
+	int i;
+
+	gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->regs)
+						   : in_le32(fdev->regs);
+	mask = 0xff000000;
+	dev_dbg(fdev->dev, "IRQ: gsr 0x%.8x\n", gsr);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (gsr & mask) {
+			dev_dbg(fdev->dev, "IRQ: chan %d\n", chan->id);
+			fsldma_chan_irq(irq, chan);
+			handled++;
+		}
+
+		gsr &= ~mask;
+		mask >>= 8;
+	}
+
+	return IRQ_RETVAL(handled);
+}
+
+static void fsldma_free_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int i;
+
+	if (fdev->irq) {
+		dev_dbg(fdev->dev, "free per-controller IRQ\n");
+		free_irq(fdev->irq, fdev);
+		return;
+	}
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (chan && chan->irq) {
+			chan_dbg(chan, "free per-channel IRQ\n");
+			free_irq(chan->irq, chan);
+		}
+	}
+}
+
+static int fsldma_request_irqs(struct fsldma_device *fdev)
+{
+	struct fsldma_chan *chan;
+	int ret;
+	int i;
+
+	/* if we have a per-controller IRQ, use that */
+	if (fdev->irq) {
+		dev_dbg(fdev->dev, "request per-controller IRQ\n");
+		ret = request_irq(fdev->irq, fsldma_ctrl_irq, IRQF_SHARED,
+				  "fsldma-controller", fdev);
+		return ret;
+	}
+
+	/* no per-controller IRQ, use the per-channel IRQs */
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (!chan->irq) {
+			chan_err(chan, "interrupts property missing in device tree\n");
+			ret = -ENODEV;
+			goto out_unwind;
+		}
+
+		chan_dbg(chan, "request per-channel IRQ\n");
+		ret = request_irq(chan->irq, fsldma_chan_irq, IRQF_SHARED,
+				  "fsldma-chan", chan);
+		if (ret) {
+			chan_err(chan, "unable to request per-channel IRQ\n");
+			goto out_unwind;
+		}
+	}
+
+	return 0;
+
+out_unwind:
+	for (/* none */; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		if (!chan->irq)
+			continue;
+
+		free_irq(chan->irq, chan);
+	}
+
+	return ret;
+}
+
+/*----------------------------------------------------------------------------*/
+/* OpenFirmware Subsystem                                                     */
+/*----------------------------------------------------------------------------*/
+
+static int fsl_dma_chan_probe(struct fsldma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
+{
+	struct fsldma_chan *chan;
+	struct resource res;
+	int err;
+
+	/* alloc channel */
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan) {
+		err = -ENOMEM;
+		goto out_return;
+	}
+
+	/* ioremap registers for use */
+	chan->regs = of_iomap(node, 0);
+	if (!chan->regs) {
+		dev_err(fdev->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free_chan;
+	}
+
+	err = of_address_to_resource(node, 0, &res);
+	if (err) {
+		dev_err(fdev->dev, "unable to find 'reg' property\n");
+		goto out_iounmap_regs;
+	}
+
+	chan->feature = feature;
+	if (!fdev->feature)
+		fdev->feature = chan->feature;
+
+	/*
+	 * If the DMA device's feature is different than the feature
+	 * of its channels, report the bug
+	 */
+	WARN_ON(fdev->feature != chan->feature);
+
+	chan->dev = fdev->dev;
+	chan->id = (res.start & 0xfff) < 0x300 ?
+		   ((res.start - 0x100) & 0xfff) >> 7 :
+		   ((res.start - 0x200) & 0xfff) >> 7;
+	if (chan->id >= FSL_DMA_MAX_CHANS_PER_DEVICE) {
+		dev_err(fdev->dev, "too many channels for device\n");
+		err = -EINVAL;
+		goto out_iounmap_regs;
+	}
+
+	fdev->chan[chan->id] = chan;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	snprintf(chan->name, sizeof(chan->name), "chan%d", chan->id);
+
+	/* Initialize the channel */
+	dma_init(chan);
+
+	/* Clear cdar registers */
+	set_cdar(chan, 0);
+
+	switch (chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+	case FSL_DMA_IP_83XX:
+		chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+		chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+		chan->set_dst_loop_size = fsl_chan_set_dst_loop_size;
+		chan->set_request_count = fsl_chan_set_request_count;
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	INIT_LIST_HEAD(&chan->ld_pending);
+	INIT_LIST_HEAD(&chan->ld_running);
+	INIT_LIST_HEAD(&chan->ld_completed);
+	chan->idle = true;
+#ifdef CONFIG_PM
+	chan->pm_state = RUNNING;
+#endif
+
+	chan->common.device = &fdev->common;
+	dma_cookie_init(&chan->common);
+
+	/* find the IRQ line, if it exists in the device tree */
+	chan->irq = irq_of_parse_and_map(node, 0);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&chan->common.device_node, &fdev->common.channels);
+
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", chan->id, compatible,
+		 chan->irq ? chan->irq : fdev->irq);
+
+	return 0;
+
+out_iounmap_regs:
+	iounmap(chan->regs);
+out_free_chan:
+	kfree(chan);
+out_return:
+	return err;
+}
+
+static void fsl_dma_chan_remove(struct fsldma_chan *chan)
+{
+	irq_dispose_mapping(chan->irq);
+	list_del(&chan->common.device_node);
+	iounmap(chan->regs);
+	kfree(chan);
+}
+
+static int fsldma_of_probe(struct platform_device *op)
+{
+	struct fsldma_device *fdev;
+	struct device_node *child;
+	int err;
+
+	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
+	if (!fdev) {
+		err = -ENOMEM;
+		goto out_return;
+	}
+
+	fdev->dev = &op->dev;
+	INIT_LIST_HEAD(&fdev->common.channels);
+
+	/* ioremap the registers for use */
+	fdev->regs = of_iomap(op->dev.of_node, 0);
+	if (!fdev->regs) {
+		dev_err(&op->dev, "unable to ioremap registers\n");
+		err = -ENOMEM;
+		goto out_free;
+	}
+
+	/* map the channel IRQ if it exists, but don't hookup the handler yet */
+	fdev->irq = irq_of_parse_and_map(op->dev.of_node, 0);
+
+	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, fdev->common.cap_mask);
+	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_tx_status = fsl_tx_status;
+	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+	fdev->common.device_config = fsl_dma_device_config;
+	fdev->common.device_terminate_all = fsl_dma_device_terminate_all;
+	fdev->common.dev = &op->dev;
+
+	fdev->common.src_addr_widths = FSL_DMA_BUSWIDTHS;
+	fdev->common.dst_addr_widths = FSL_DMA_BUSWIDTHS;
+	fdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	fdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	dma_set_mask(&(op->dev), DMA_BIT_MASK(36));
+
+	platform_set_drvdata(op, fdev);
+
+	/*
+	 * We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove(). Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(op->dev.of_node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) {
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		}
+
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel")) {
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+		}
+	}
+
+	/*
+	 * Hookup the IRQ handler(s)
+	 *
+	 * If we have a per-controller interrupt, we prefer that to the
+	 * per-channel interrupts to reduce the number of shared interrupt
+	 * handlers on the same IRQ line
+	 */
+	err = fsldma_request_irqs(fdev);
+	if (err) {
+		dev_err(fdev->dev, "unable to request IRQs\n");
+		goto out_free_fdev;
+	}
+
+	dma_async_device_register(&fdev->common);
+	return 0;
+
+out_free_fdev:
+	irq_dispose_mapping(fdev->irq);
+	iounmap(fdev->regs);
+out_free:
+	kfree(fdev);
+out_return:
+	return err;
+}
+
+static int fsldma_of_remove(struct platform_device *op)
+{
+	struct fsldma_device *fdev;
+	unsigned int i;
+
+	fdev = platform_get_drvdata(op);
+	dma_async_device_unregister(&fdev->common);
+
+	fsldma_free_irqs(fdev);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+	}
+
+	iounmap(fdev->regs);
+	kfree(fdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int fsldma_suspend_late(struct device *dev)
+{
+	struct fsldma_device *fdev = dev_get_drvdata(dev);
+	struct fsldma_chan *chan;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		if (unlikely(!chan->idle))
+			goto out;
+		chan->regs_save.mr = get_mr(chan);
+		chan->pm_state = SUSPENDED;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return 0;
+
+out:
+	for (; i >= 0; i--) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+		chan->pm_state = RUNNING;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+	return -EBUSY;
+}
+
+static int fsldma_resume_early(struct device *dev)
+{
+	struct fsldma_device *fdev = dev_get_drvdata(dev);
+	struct fsldma_chan *chan;
+	u32 mode;
+	int i;
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+		chan = fdev->chan[i];
+		if (!chan)
+			continue;
+
+		spin_lock_bh(&chan->desc_lock);
+		mode = chan->regs_save.mr
+			& ~FSL_DMA_MR_CS & ~FSL_DMA_MR_CC & ~FSL_DMA_MR_CA;
+		set_mr(chan, mode);
+		chan->pm_state = RUNNING;
+		spin_unlock_bh(&chan->desc_lock);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops fsldma_pm_ops = {
+	.suspend_late	= fsldma_suspend_late,
+	.resume_early	= fsldma_resume_early,
+};
+#endif
+
+static const struct of_device_id fsldma_of_ids[] = {
+	{ .compatible = "fsl,elo3-dma", },
+	{ .compatible = "fsl,eloplus-dma", },
+	{ .compatible = "fsl,elo-dma", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, fsldma_of_ids);
+
+static struct platform_driver fsldma_of_driver = {
+	.driver = {
+		.name = "fsl-elo-dma",
+		.of_match_table = fsldma_of_ids,
+#ifdef CONFIG_PM
+		.pm = &fsldma_pm_ops,
+#endif
+	},
+	.probe = fsldma_of_probe,
+	.remove = fsldma_of_remove,
+};
+
+/*----------------------------------------------------------------------------*/
+/* Module Init / Exit                                                         */
+/*----------------------------------------------------------------------------*/
+
+static __init int fsldma_init(void)
+{
+	pr_info("Freescale Elo series DMA driver\n");
+	return platform_driver_register(&fsldma_of_driver);
+}
+
+static void __exit fsldma_exit(void)
+{
+	platform_driver_unregister(&fsldma_of_driver);
+}
+
+subsys_initcall(fsldma_init);
+module_exit(fsldma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo series DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 0000000..4787d48
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2007-2010 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS		0x00000001
+#define FSL_DMA_MR_CC		0x00000002
+#define FSL_DMA_MR_CA		0x00000008
+#define FSL_DMA_MR_EIE		0x00000040
+#define FSL_DMA_MR_XFE		0x00000020
+#define FSL_DMA_MR_EOLNIE	0x00000100
+#define FSL_DMA_MR_EOLSIE	0x00000080
+#define FSL_DMA_MR_EOSIE	0x00000200
+#define FSL_DMA_MR_CDSM		0x00000010
+#define FSL_DMA_MR_CTM		0x00000004
+#define FSL_DMA_MR_EMP_EN	0x00200000
+#define FSL_DMA_MR_EMS_EN	0x00040000
+#define FSL_DMA_MR_DAHE		0x00002000
+#define FSL_DMA_MR_SAHE		0x00001000
+
+#define FSL_DMA_MR_SAHTS_MASK	0x0000C000
+#define FSL_DMA_MR_DAHTS_MASK	0x00030000
+#define FSL_DMA_MR_BWC_MASK	0x0f000000
+
+/*
+ * Bandwidth/pause control determines how many bytes a given
+ * channel is allowed to transfer before the DMA engine pauses
+ * the current channel and switches to the next channel
+ */
+#define FSL_DMA_MR_BWC         0x0A000000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE	0x00000080
+#define FSL_DMA_MR_PRC_RM	0x00000800
+
+#define FSL_DMA_SR_CH		0x00000020
+#define FSL_DMA_SR_PE		0x00000010
+#define FSL_DMA_SR_CB		0x00000004
+#define FSL_DMA_SR_TE		0x00000080
+#define FSL_DMA_SR_EOSI		0x00000002
+#define FSL_DMA_SR_EOLSI	0x00000001
+#define FSL_DMA_SR_EOCDI	0x00000001
+#define FSL_DMA_SR_EOLNI	0x00000008
+
+#define FSL_DMA_SATR_SBPATMU			0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO		0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ	0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH		0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD		0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD		0x00070000
+
+#define FSL_DMA_DATR_DBPATMU			0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO		0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE	0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH	0x00010000
+
+#define FSL_DMA_EOL		((u64)0x1)
+#define FSL_DMA_SNEN		((u64)0x10)
+#define FSL_DMA_EOSIE		0x8
+#define FSL_DMA_NLDA_MASK	(~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT	0x03ffffffu
+
+#define FSL_DMA_DGSR_TE		0x80
+#define FSL_DMA_DGSR_CH		0x20
+#define FSL_DMA_DGSR_PE		0x10
+#define FSL_DMA_DGSR_EOLNI	0x08
+#define FSL_DMA_DGSR_CB		0x04
+#define FSL_DMA_DGSR_EOSI	0x02
+#define FSL_DMA_DGSR_EOLSI	0x01
+
+#define FSL_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+	v64 src_addr;
+	v64 dst_addr;
+	v64 next_ln_addr;
+	v32 count;
+	v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+	struct fsl_dma_ld_hw hw;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+} __attribute__((aligned(32)));
+
+struct fsldma_chan_regs {
+	u32 mr;		/* 0x00 - Mode Register */
+	u32 sr;		/* 0x04 - Status Register */
+	u64 cdar;	/* 0x08 - Current descriptor address register */
+	u64 sar;	/* 0x10 - Source Address Register */
+	u64 dar;	/* 0x18 - Destination Address Register */
+	u32 bcr;	/* 0x20 - Byte Count Register */
+	u64 ndar;	/* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsldma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 8
+
+struct fsldma_device {
+	void __iomem *regs;	/* DGSR register base */
+	struct device *dev;
+	struct dma_device common;
+	struct fsldma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
+};
+
+/* Define macros for fsldma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN	0x00000000
+#define FSL_DMA_BIG_ENDIAN	0x00000001
+
+#define FSL_DMA_IP_MASK		0x00000ff0
+#define FSL_DMA_IP_85XX		0x00000010
+#define FSL_DMA_IP_83XX		0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
+#define FSL_DMA_CHAN_START_EXT	0x00002000
+
+#ifdef CONFIG_PM
+struct fsldma_chan_regs_save {
+	u32 mr;
+};
+
+enum fsldma_pm_state {
+	RUNNING = 0,
+	SUSPENDED,
+};
+#endif
+
+struct fsldma_chan {
+	char name[8];			/* Channel name */
+	struct fsldma_chan_regs __iomem *regs;
+	spinlock_t desc_lock;		/* Descriptor operation lock */
+	/*
+	 * Descriptors which are queued to run, but have not yet been
+	 * submitted to the hardware for execution
+	 */
+	struct list_head ld_pending;
+	/*
+	 * Descriptors which are currently being executed by the hardware
+	 */
+	struct list_head ld_running;
+	/*
+	 * Descriptors which have finished execution by the hardware. These
+	 * descriptors have already had their cleanup actions run. They are
+	 * waiting for the ACK bit to be set by the async_tx API.
+	 */
+	struct list_head ld_completed;	/* Link descriptors queue */
+	struct dma_chan common;		/* DMA common channel */
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+	struct device *dev;		/* Channel device */
+	int irq;			/* Channel IRQ */
+	int id;				/* Raw id of this channel */
+	struct tasklet_struct tasklet;
+	u32 feature;
+	bool idle;			/* DMA controller is idle */
+#ifdef CONFIG_PM
+	struct fsldma_chan_regs_save regs_save;
+	enum fsldma_pm_state pm_state;
+#endif
+
+	void (*toggle_ext_pause)(struct fsldma_chan *fsl_chan, int enable);
+	void (*toggle_ext_start)(struct fsldma_chan *fsl_chan, int enable);
+	void (*set_src_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_dst_loop_size)(struct fsldma_chan *fsl_chan, int size);
+	void (*set_request_count)(struct fsldma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsldma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifndef __powerpc64__
+static u64 in_be64(const u64 __iomem *addr)
+{
+	return ((u64)in_be32((u32 __iomem *)addr) << 32) |
+		(in_be32((u32 __iomem *)addr + 1));
+}
+
+static void out_be64(u64 __iomem *addr, u64 val)
+{
+	out_be32((u32 __iomem *)addr, val >> 32);
+	out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static u64 in_le64(const u64 __iomem *addr)
+{
+	return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
+		(in_le32((u32 __iomem *)addr));
+}
+
+static void out_le64(u64 __iomem *addr, u64 val)
+{
+	out_le32((u32 __iomem *)addr + 1, val >> 32);
+	out_le32((u32 __iomem *)addr, (u32)val);
+}
+#endif
+
+#define DMA_IN(fsl_chan, addr, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			in_be##width(addr) : in_le##width(addr))
+#define DMA_OUT(fsl_chan, addr, val, width)				\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			out_be##width(addr, val) : out_le##width(addr, val))
+
+#define DMA_TO_CPU(fsl_chan, d, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			be##width##_to_cpu((__force __be##width)(v##width)d) : \
+			le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			(__force v##width)cpu_to_be##width(c) :		\
+			(__force v##width)cpu_to_le##width(c))
+
+#endif	/* __DMA_FSLDMA_H */
diff --git a/drivers/dma/hsu/Kconfig b/drivers/dma/hsu/Kconfig
new file mode 100644
index 0000000..c708417
--- /dev/null
+++ b/drivers/dma/hsu/Kconfig
@@ -0,0 +1,9 @@
+# DMA engine configuration for hsu
+config HSU_DMA
+	tristate
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
+config HSU_DMA_PCI
+	tristate
+	depends on HSU_DMA && PCI
diff --git a/drivers/dma/hsu/Makefile b/drivers/dma/hsu/Makefile
new file mode 100644
index 0000000..b8f9af0
--- /dev/null
+++ b/drivers/dma/hsu/Makefile
@@ -0,0 +1,5 @@
+obj-$(CONFIG_HSU_DMA)		+= hsu_dma.o
+hsu_dma-objs		:= hsu.o
+
+obj-$(CONFIG_HSU_DMA_PCI)	+= hsu_dma_pci.o
+hsu_dma_pci-objs	:= pci.o
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
new file mode 100644
index 0000000..202ffa9
--- /dev/null
+++ b/drivers/dma/hsu/hsu.c
@@ -0,0 +1,508 @@
+/*
+ * Core driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * DMA channel allocation:
+ * 1. Even number chans are used for DMA Read (UART TX), odd chans for DMA
+ *    Write (UART RX).
+ * 2. 0/1 channel are assigned to port 0, 2/3 chan to port 1, 4/5 chan to
+ *    port 3, and so on.
+ */
+
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include "hsu.h"
+
+#define HSU_DMA_BUSWIDTHS				\
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED)	|	\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_3_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_16_BYTES)
+
+static inline void hsu_chan_disable(struct hsu_dma_chan *hsuc)
+{
+	hsu_chan_writel(hsuc, HSU_CH_CR, 0);
+}
+
+static inline void hsu_chan_enable(struct hsu_dma_chan *hsuc)
+{
+	u32 cr = HSU_CH_CR_CHA;
+
+	if (hsuc->direction == DMA_MEM_TO_DEV)
+		cr &= ~HSU_CH_CR_CHD;
+	else if (hsuc->direction == DMA_DEV_TO_MEM)
+		cr |= HSU_CH_CR_CHD;
+
+	hsu_chan_writel(hsuc, HSU_CH_CR, cr);
+}
+
+static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc)
+{
+	struct dma_slave_config *config = &hsuc->config;
+	struct hsu_dma_desc *desc = hsuc->desc;
+	u32 bsr = 0, mtsr = 0;	/* to shut the compiler up */
+	u32 dcr = HSU_CH_DCR_CHSOE | HSU_CH_DCR_CHEI;
+	unsigned int i, count;
+
+	if (hsuc->direction == DMA_MEM_TO_DEV) {
+		bsr = config->dst_maxburst;
+		mtsr = config->src_addr_width;
+	} else if (hsuc->direction == DMA_DEV_TO_MEM) {
+		bsr = config->src_maxburst;
+		mtsr = config->dst_addr_width;
+	}
+
+	hsu_chan_disable(hsuc);
+
+	hsu_chan_writel(hsuc, HSU_CH_DCR, 0);
+	hsu_chan_writel(hsuc, HSU_CH_BSR, bsr);
+	hsu_chan_writel(hsuc, HSU_CH_MTSR, mtsr);
+
+	/* Set descriptors */
+	count = desc->nents - desc->active;
+	for (i = 0; i < count && i < HSU_DMA_CHAN_NR_DESC; i++) {
+		hsu_chan_writel(hsuc, HSU_CH_DxSAR(i), desc->sg[i].addr);
+		hsu_chan_writel(hsuc, HSU_CH_DxTSR(i), desc->sg[i].len);
+
+		/* Prepare value for DCR */
+		dcr |= HSU_CH_DCR_DESCA(i);
+		dcr |= HSU_CH_DCR_CHTOI(i);	/* timeout bit, see HSU Errata 1 */
+
+		desc->active++;
+	}
+	/* Only for the last descriptor in the chain */
+	dcr |= HSU_CH_DCR_CHSOD(count - 1);
+	dcr |= HSU_CH_DCR_CHDI(count - 1);
+
+	hsu_chan_writel(hsuc, HSU_CH_DCR, dcr);
+
+	hsu_chan_enable(hsuc);
+}
+
+static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc)
+{
+	hsu_chan_disable(hsuc);
+	hsu_chan_writel(hsuc, HSU_CH_DCR, 0);
+}
+
+static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc)
+{
+	hsu_dma_chan_start(hsuc);
+}
+
+static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc)
+{
+	struct virt_dma_desc *vdesc;
+
+	/* Get the next descriptor */
+	vdesc = vchan_next_desc(&hsuc->vchan);
+	if (!vdesc) {
+		hsuc->desc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+	hsuc->desc = to_hsu_dma_desc(vdesc);
+
+	/* Start the channel with a new descriptor */
+	hsu_dma_start_channel(hsuc);
+}
+
+/*
+ *      hsu_dma_get_status() - get DMA channel status
+ *      @chip: HSUART DMA chip
+ *      @nr: DMA channel number
+ *      @status: pointer for DMA Channel Status Register value
+ *
+ *      Description:
+ *      The function reads and clears the DMA Channel Status Register, checks
+ *      if it was a timeout interrupt and returns a corresponding value.
+ *
+ *      Caller should provide a valid pointer for the DMA Channel Status
+ *      Register value that will be returned in @status.
+ *
+ *      Return:
+ *      1 for DMA timeout status, 0 for other DMA status, or error code for
+ *      invalid parameters or no interrupt pending.
+ */
+int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
+		       u32 *status)
+{
+	struct hsu_dma_chan *hsuc;
+	unsigned long flags;
+	u32 sr;
+
+	/* Sanity check */
+	if (nr >= chip->hsu->nr_channels)
+		return -EINVAL;
+
+	hsuc = &chip->hsu->chan[nr];
+
+	/*
+	 * No matter what situation, need read clear the IRQ status
+	 * There is a bug, see Errata 5, HSD 2900918
+	 */
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	/* Check if any interrupt is pending */
+	sr &= ~(HSU_CH_SR_DESCE_ANY | HSU_CH_SR_CDESC_ANY);
+	if (!sr)
+		return -EIO;
+
+	/* Timeout IRQ, need wait some time, see Errata 2 */
+	if (sr & HSU_CH_SR_DESCTO_ANY)
+		udelay(2);
+
+	/*
+	 * At this point, at least one of Descriptor Time Out, Channel Error
+	 * or Descriptor Done bits must be set. Clear the Descriptor Time Out
+	 * bits and if sr is still non-zero, it must be channel error or
+	 * descriptor done which are higher priority than timeout and handled
+	 * in hsu_dma_do_irq(). Else, it must be a timeout.
+	 */
+	sr &= ~HSU_CH_SR_DESCTO_ANY;
+
+	*status = sr;
+
+	return sr ? 0 : 1;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_get_status);
+
+/*
+ *      hsu_dma_do_irq() - DMA interrupt handler
+ *      @chip: HSUART DMA chip
+ *      @nr: DMA channel number
+ *      @status: Channel Status Register value
+ *
+ *      Description:
+ *      This function handles Channel Error and Descriptor Done interrupts.
+ *      This function should be called after determining that the DMA interrupt
+ *      is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0.
+ *
+ *      Return:
+ *      0 for invalid channel number, 1 otherwise.
+ */
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status)
+{
+	struct hsu_dma_chan *hsuc;
+	struct hsu_dma_desc *desc;
+	unsigned long flags;
+
+	/* Sanity check */
+	if (nr >= chip->hsu->nr_channels)
+		return 0;
+
+	hsuc = &chip->hsu->chan[nr];
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	desc = hsuc->desc;
+	if (desc) {
+		if (status & HSU_CH_SR_CHE) {
+			desc->status = DMA_ERROR;
+		} else if (desc->active < desc->nents) {
+			hsu_dma_start_channel(hsuc);
+		} else {
+			vchan_cookie_complete(&desc->vdesc);
+			desc->status = DMA_COMPLETE;
+			hsu_dma_start_transfer(hsuc);
+		}
+	}
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_do_irq);
+
+static struct hsu_dma_desc *hsu_dma_alloc_desc(unsigned int nents)
+{
+	struct hsu_dma_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->sg = kcalloc(nents, sizeof(*desc->sg), GFP_NOWAIT);
+	if (!desc->sg) {
+		kfree(desc);
+		return NULL;
+	}
+
+	return desc;
+}
+
+static void hsu_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	struct hsu_dma_desc *desc = to_hsu_dma_desc(vdesc);
+
+	kfree(desc->sg);
+	kfree(desc);
+}
+
+static struct dma_async_tx_descriptor *hsu_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	struct hsu_dma_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	desc = hsu_dma_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc->sg[i].addr = sg_dma_address(sg);
+		desc->sg[i].len = sg_dma_len(sg);
+
+		desc->length += sg_dma_len(sg);
+	}
+
+	desc->nents = sg_len;
+	desc->direction = direction;
+	/* desc->active = 0 by kzalloc */
+	desc->status = DMA_IN_PROGRESS;
+
+	return vchan_tx_prep(&hsuc->vchan, &desc->vdesc, flags);
+}
+
+static void hsu_dma_issue_pending(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	if (vchan_issue_pending(&hsuc->vchan) && !hsuc->desc)
+		hsu_dma_start_transfer(hsuc);
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+}
+
+static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc)
+{
+	struct hsu_dma_desc *desc = hsuc->desc;
+	size_t bytes = 0;
+	int i;
+
+	for (i = desc->active; i < desc->nents; i++)
+		bytes += desc->sg[i].len;
+
+	i = HSU_DMA_CHAN_NR_DESC - 1;
+	do {
+		bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i));
+	} while (--i >= 0);
+
+	return bytes;
+}
+
+static enum dma_status hsu_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	size_t bytes;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, state);
+	if (status == DMA_COMPLETE)
+		return status;
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	vdesc = vchan_find_desc(&hsuc->vchan, cookie);
+	if (hsuc->desc && cookie == hsuc->desc->vdesc.tx.cookie) {
+		bytes = hsu_dma_active_desc_size(hsuc);
+		dma_set_residue(state, bytes);
+		status = hsuc->desc->status;
+	} else if (vdesc) {
+		bytes = to_hsu_dma_desc(vdesc)->length;
+		dma_set_residue(state, bytes);
+	}
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	return status;
+}
+
+static int hsu_dma_slave_config(struct dma_chan *chan,
+				struct dma_slave_config *config)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+
+	/* Check if chan will be configured for slave transfers */
+	if (!is_slave_direction(config->direction))
+		return -EINVAL;
+
+	memcpy(&hsuc->config, config, sizeof(hsuc->config));
+
+	return 0;
+}
+
+static int hsu_dma_pause(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) {
+		hsu_chan_disable(hsuc);
+		hsuc->desc->status = DMA_PAUSED;
+	}
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	return 0;
+}
+
+static int hsu_dma_resume(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+	if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) {
+		hsuc->desc->status = DMA_IN_PROGRESS;
+		hsu_chan_enable(hsuc);
+	}
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+
+	return 0;
+}
+
+static int hsu_dma_terminate_all(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
+
+	hsu_dma_stop_channel(hsuc);
+	if (hsuc->desc) {
+		hsu_dma_desc_free(&hsuc->desc->vdesc);
+		hsuc->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&hsuc->vchan, &head);
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
+	vchan_dma_desc_free_list(&hsuc->vchan, &head);
+
+	return 0;
+}
+
+static void hsu_dma_free_chan_resources(struct dma_chan *chan)
+{
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static void hsu_dma_synchronize(struct dma_chan *chan)
+{
+	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
+
+	vchan_synchronize(&hsuc->vchan);
+}
+
+int hsu_dma_probe(struct hsu_dma_chip *chip)
+{
+	struct hsu_dma *hsu;
+	void __iomem *addr = chip->regs + chip->offset;
+	unsigned short i;
+	int ret;
+
+	hsu = devm_kzalloc(chip->dev, sizeof(*hsu), GFP_KERNEL);
+	if (!hsu)
+		return -ENOMEM;
+
+	chip->hsu = hsu;
+
+	/* Calculate nr_channels from the IO space length */
+	hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
+
+	hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels,
+				 sizeof(*hsu->chan), GFP_KERNEL);
+	if (!hsu->chan)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&hsu->dma.channels);
+	for (i = 0; i < hsu->nr_channels; i++) {
+		struct hsu_dma_chan *hsuc = &hsu->chan[i];
+
+		hsuc->vchan.desc_free = hsu_dma_desc_free;
+		vchan_init(&hsuc->vchan, &hsu->dma);
+
+		hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
+		hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH;
+	}
+
+	dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, hsu->dma.cap_mask);
+
+	hsu->dma.device_free_chan_resources = hsu_dma_free_chan_resources;
+
+	hsu->dma.device_prep_slave_sg = hsu_dma_prep_slave_sg;
+
+	hsu->dma.device_issue_pending = hsu_dma_issue_pending;
+	hsu->dma.device_tx_status = hsu_dma_tx_status;
+
+	hsu->dma.device_config = hsu_dma_slave_config;
+	hsu->dma.device_pause = hsu_dma_pause;
+	hsu->dma.device_resume = hsu_dma_resume;
+	hsu->dma.device_terminate_all = hsu_dma_terminate_all;
+	hsu->dma.device_synchronize = hsu_dma_synchronize;
+
+	hsu->dma.src_addr_widths = HSU_DMA_BUSWIDTHS;
+	hsu->dma.dst_addr_widths = HSU_DMA_BUSWIDTHS;
+	hsu->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	hsu->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	hsu->dma.dev = chip->dev;
+
+	dma_set_max_seg_size(hsu->dma.dev, HSU_CH_DxTSR_MASK);
+
+	ret = dma_async_device_register(&hsu->dma);
+	if (ret)
+		return ret;
+
+	dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_probe);
+
+int hsu_dma_remove(struct hsu_dma_chip *chip)
+{
+	struct hsu_dma *hsu = chip->hsu;
+	unsigned short i;
+
+	dma_async_device_unregister(&hsu->dma);
+
+	for (i = 0; i < hsu->nr_channels; i++) {
+		struct hsu_dma_chan *hsuc = &hsu->chan[i];
+
+		tasklet_kill(&hsuc->vchan.task);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hsu_dma_remove);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("High Speed UART DMA core driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/hsu/hsu.h b/drivers/dma/hsu/hsu.h
new file mode 100644
index 0000000..486b023
--- /dev/null
+++ b/drivers/dma/hsu/hsu.h
@@ -0,0 +1,126 @@
+/*
+ * Driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __DMA_HSU_H__
+#define __DMA_HSU_H__
+
+#include <linux/spinlock.h>
+#include <linux/dma/hsu.h>
+
+#include "../virt-dma.h"
+
+#define HSU_CH_SR		0x00			/* channel status */
+#define HSU_CH_CR		0x04			/* channel control */
+#define HSU_CH_DCR		0x08			/* descriptor control */
+#define HSU_CH_BSR		0x10			/* FIFO buffer size */
+#define HSU_CH_MTSR		0x14			/* minimum transfer size */
+#define HSU_CH_DxSAR(x)		(0x20 + 8 * (x))	/* desc start addr */
+#define HSU_CH_DxTSR(x)		(0x24 + 8 * (x))	/* desc transfer size */
+#define HSU_CH_D0SAR		0x20			/* desc 0 start addr */
+#define HSU_CH_D0TSR		0x24			/* desc 0 transfer size */
+#define HSU_CH_D1SAR		0x28
+#define HSU_CH_D1TSR		0x2c
+#define HSU_CH_D2SAR		0x30
+#define HSU_CH_D2TSR		0x34
+#define HSU_CH_D3SAR		0x38
+#define HSU_CH_D3TSR		0x3c
+
+#define HSU_DMA_CHAN_NR_DESC	4
+#define HSU_DMA_CHAN_LENGTH	0x40
+
+/* Bits in HSU_CH_SR */
+#define HSU_CH_SR_DESCTO(x)	BIT(8 + (x))
+#define HSU_CH_SR_DESCTO_ANY	(BIT(11) | BIT(10) | BIT(9) | BIT(8))
+#define HSU_CH_SR_CHE		BIT(15)
+#define HSU_CH_SR_DESCE(x)	BIT(16 + (x))
+#define HSU_CH_SR_DESCE_ANY	(BIT(19) | BIT(18) | BIT(17) | BIT(16))
+#define HSU_CH_SR_CDESC_ANY	(BIT(31) | BIT(30))
+
+/* Bits in HSU_CH_CR */
+#define HSU_CH_CR_CHA		BIT(0)
+#define HSU_CH_CR_CHD		BIT(1)
+
+/* Bits in HSU_CH_DCR */
+#define HSU_CH_DCR_DESCA(x)	BIT(0 + (x))
+#define HSU_CH_DCR_CHSOD(x)	BIT(8 + (x))
+#define HSU_CH_DCR_CHSOTO	BIT(14)
+#define HSU_CH_DCR_CHSOE	BIT(15)
+#define HSU_CH_DCR_CHDI(x)	BIT(16 + (x))
+#define HSU_CH_DCR_CHEI		BIT(23)
+#define HSU_CH_DCR_CHTOI(x)	BIT(24 + (x))
+
+/* Bits in HSU_CH_DxTSR */
+#define HSU_CH_DxTSR_MASK	GENMASK(15, 0)
+#define HSU_CH_DxTSR_TSR(x)	((x) & HSU_CH_DxTSR_MASK)
+
+struct hsu_dma_sg {
+	dma_addr_t addr;
+	unsigned int len;
+};
+
+struct hsu_dma_desc {
+	struct virt_dma_desc vdesc;
+	enum dma_transfer_direction direction;
+	struct hsu_dma_sg *sg;
+	unsigned int nents;
+	size_t length;
+	unsigned int active;
+	enum dma_status status;
+};
+
+static inline struct hsu_dma_desc *to_hsu_dma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct hsu_dma_desc, vdesc);
+}
+
+struct hsu_dma_chan {
+	struct virt_dma_chan vchan;
+
+	void __iomem *reg;
+
+	/* hardware configuration */
+	enum dma_transfer_direction direction;
+	struct dma_slave_config config;
+
+	struct hsu_dma_desc *desc;
+};
+
+static inline struct hsu_dma_chan *to_hsu_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct hsu_dma_chan, vchan.chan);
+}
+
+static inline u32 hsu_chan_readl(struct hsu_dma_chan *hsuc, int offset)
+{
+	return readl(hsuc->reg + offset);
+}
+
+static inline void hsu_chan_writel(struct hsu_dma_chan *hsuc, int offset,
+				   u32 value)
+{
+	writel(value, hsuc->reg + offset);
+}
+
+struct hsu_dma {
+	struct dma_device		dma;
+
+	/* channels */
+	struct hsu_dma_chan		*chan;
+	unsigned short			nr_channels;
+};
+
+static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct hsu_dma, dma);
+}
+
+#endif /* __DMA_HSU_H__ */
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
new file mode 100644
index 0000000..ad45cd3
--- /dev/null
+++ b/drivers/dma/hsu/pci.c
@@ -0,0 +1,146 @@
+/*
+ * PCI driver for the High Speed UART DMA
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * Partially based on the bits found in drivers/tty/serial/mfd.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "hsu.h"
+
+#define HSU_PCI_DMASR		0x00
+#define HSU_PCI_DMAISR		0x04
+
+#define HSU_PCI_CHAN_OFFSET	0x100
+
+#define PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA	0x081e
+#define PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA	0x1192
+
+static irqreturn_t hsu_pci_irq(int irq, void *dev)
+{
+	struct hsu_dma_chip *chip = dev;
+	struct pci_dev *pdev = to_pci_dev(chip->dev);
+	u32 dmaisr;
+	u32 status;
+	unsigned short i;
+	int ret = 0;
+	int err;
+
+	/*
+	 * On Intel Tangier B0 and Anniedale the interrupt line, disregarding
+	 * to have different numbers, is shared between HSU DMA and UART IPs.
+	 * Thus on such SoCs we are expecting that IRQ handler is called in
+	 * UART driver only.
+	 */
+	if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA)
+		return IRQ_HANDLED;
+
+	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
+	for (i = 0; i < chip->hsu->nr_channels; i++) {
+		if (dmaisr & 0x1) {
+			err = hsu_dma_get_status(chip, i, &status);
+			if (err > 0)
+				ret |= 1;
+			else if (err == 0)
+				ret |= hsu_dma_do_irq(chip, i, status);
+		}
+		dmaisr >>= 1;
+	}
+
+	return IRQ_RETVAL(ret);
+}
+
+static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hsu_dma_chip *chip;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+	if (ret) {
+		dev_err(&pdev->dev, "I/O memory remapping failed\n");
+		return ret;
+	}
+
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		return ret;
+
+	chip->dev = &pdev->dev;
+	chip->regs = pcim_iomap_table(pdev)[0];
+	chip->length = pci_resource_len(pdev, 0);
+	chip->offset = HSU_PCI_CHAN_OFFSET;
+	chip->irq = pci_irq_vector(pdev, 0);
+
+	ret = hsu_dma_probe(chip);
+	if (ret)
+		return ret;
+
+	ret = request_irq(chip->irq, hsu_pci_irq, 0, "hsu_dma_pci", chip);
+	if (ret)
+		goto err_register_irq;
+
+	pci_set_drvdata(pdev, chip);
+
+	return 0;
+
+err_register_irq:
+	hsu_dma_remove(chip);
+	return ret;
+}
+
+static void hsu_pci_remove(struct pci_dev *pdev)
+{
+	struct hsu_dma_chip *chip = pci_get_drvdata(pdev);
+
+	free_irq(chip->irq, chip);
+	hsu_dma_remove(chip);
+}
+
+static const struct pci_device_id hsu_pci_id_table[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MFLD_HSU_DMA), 0 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA), 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, hsu_pci_id_table);
+
+static struct pci_driver hsu_pci_driver = {
+	.name		= "hsu_dma_pci",
+	.id_table	= hsu_pci_id_table,
+	.probe		= hsu_pci_probe,
+	.remove		= hsu_pci_remove,
+};
+
+module_pci_driver(hsu_pci_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("High Speed UART DMA PCI driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
new file mode 100644
index 0000000..1fbf9cb
--- /dev/null
+++ b/drivers/dma/idma64.c
@@ -0,0 +1,715 @@
+/*
+ * Core driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "idma64.h"
+
+/* Platform driver name */
+#define DRV_NAME		"idma64"
+
+/* For now we support only two channels */
+#define IDMA64_NR_CHAN		2
+
+/* ---------------------------------------------------------------------- */
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_off(struct idma64 *idma64)
+{
+	unsigned short count = 100;
+
+	dma_writel(idma64, CFG, 0);
+
+	channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(BLOCK), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(SRC_TRAN), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(DST_TRAN), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
+
+	do {
+		cpu_relax();
+	} while (dma_readl(idma64, CFG) & IDMA64_CFG_DMA_EN && --count);
+}
+
+static void idma64_on(struct idma64 *idma64)
+{
+	dma_writel(idma64, CFG, IDMA64_CFG_DMA_EN);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_init(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	u32 cfghi = IDMA64C_CFGH_SRC_PER(1) | IDMA64C_CFGH_DST_PER(0);
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA64C_CFGL_DST_BURST_ALIGN | IDMA64C_CFGL_SRC_BURST_ALIGN;
+
+	channel_writel(idma64c, CFG_LO, cfglo);
+	channel_writel(idma64c, CFG_HI, cfghi);
+
+	/* Enable interrupts */
+	channel_set_bit(idma64, MASK(XFER), idma64c->mask);
+	channel_set_bit(idma64, MASK(ERROR), idma64c->mask);
+
+	/*
+	 * Enforce the controller to be turned on.
+	 *
+	 * The iDMA is turned off in ->probe() and looses context during system
+	 * suspend / resume cycle. That's why we have to enable it each time we
+	 * use it.
+	 */
+	idma64_on(idma64);
+}
+
+static void idma64_chan_stop(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	channel_clear_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_chan_start(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	struct idma64_desc *desc = idma64c->desc;
+	struct idma64_hw_desc *hw = &desc->hw[0];
+
+	channel_writeq(idma64c, SAR, 0);
+	channel_writeq(idma64c, DAR, 0);
+
+	channel_writel(idma64c, CTL_HI, IDMA64C_CTLH_BLOCK_TS(~0UL));
+	channel_writel(idma64c, CTL_LO, IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
+
+	channel_writeq(idma64c, LLP, hw->llp);
+
+	channel_set_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_stop_transfer(struct idma64_chan *idma64c)
+{
+	struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+
+	idma64_chan_stop(idma64, idma64c);
+}
+
+static void idma64_start_transfer(struct idma64_chan *idma64c)
+{
+	struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+	struct virt_dma_desc *vdesc;
+
+	/* Get the next descriptor */
+	vdesc = vchan_next_desc(&idma64c->vchan);
+	if (!vdesc) {
+		idma64c->desc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+	idma64c->desc = to_idma64_desc(vdesc);
+
+	/* Configure the channel */
+	idma64_chan_init(idma64, idma64c);
+
+	/* Start the channel with a new descriptor */
+	idma64_chan_start(idma64, idma64c);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_irq(struct idma64 *idma64, unsigned short c,
+		u32 status_err, u32 status_xfer)
+{
+	struct idma64_chan *idma64c = &idma64->chan[c];
+	struct idma64_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	desc = idma64c->desc;
+	if (desc) {
+		if (status_err & (1 << c)) {
+			dma_writel(idma64, CLEAR(ERROR), idma64c->mask);
+			desc->status = DMA_ERROR;
+		} else if (status_xfer & (1 << c)) {
+			dma_writel(idma64, CLEAR(XFER), idma64c->mask);
+			desc->status = DMA_COMPLETE;
+			vchan_cookie_complete(&desc->vdesc);
+			idma64_start_transfer(idma64c);
+		}
+
+		/* idma64_start_transfer() updates idma64c->desc */
+		if (idma64c->desc == NULL || desc->status == DMA_ERROR)
+			idma64_stop_transfer(idma64c);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+}
+
+static irqreturn_t idma64_irq(int irq, void *dev)
+{
+	struct idma64 *idma64 = dev;
+	u32 status = dma_readl(idma64, STATUS_INT);
+	u32 status_xfer;
+	u32 status_err;
+	unsigned short i;
+
+	dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status);
+
+	/* Check if we have any interrupt from the DMA controller */
+	if (!status)
+		return IRQ_NONE;
+
+	status_xfer = dma_readl(idma64, RAW(XFER));
+	status_err = dma_readl(idma64, RAW(ERROR));
+
+	for (i = 0; i < idma64->dma.chancnt; i++)
+		idma64_chan_irq(idma64, i, status_err, status_xfer);
+
+	return IRQ_HANDLED;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct idma64_desc *idma64_alloc_desc(unsigned int ndesc)
+{
+	struct idma64_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->hw = kcalloc(ndesc, sizeof(*desc->hw), GFP_NOWAIT);
+	if (!desc->hw) {
+		kfree(desc);
+		return NULL;
+	}
+
+	return desc;
+}
+
+static void idma64_desc_free(struct idma64_chan *idma64c,
+		struct idma64_desc *desc)
+{
+	struct idma64_hw_desc *hw;
+
+	if (desc->ndesc) {
+		unsigned int i = desc->ndesc;
+
+		do {
+			hw = &desc->hw[--i];
+			dma_pool_free(idma64c->pool, hw->lli, hw->llp);
+		} while (i);
+	}
+
+	kfree(desc->hw);
+	kfree(desc);
+}
+
+static void idma64_vdesc_free(struct virt_dma_desc *vdesc)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(vdesc->tx.chan);
+
+	idma64_desc_free(idma64c, to_idma64_desc(vdesc));
+}
+
+static void idma64_hw_desc_fill(struct idma64_hw_desc *hw,
+		struct dma_slave_config *config,
+		enum dma_transfer_direction direction, u64 llp)
+{
+	struct idma64_lli *lli = hw->lli;
+	u64 sar, dar;
+	u32 ctlhi = IDMA64C_CTLH_BLOCK_TS(hw->len);
+	u32 ctllo = IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN;
+	u32 src_width, dst_width;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		sar = hw->phys;
+		dar = config->dst_addr;
+		ctllo |= IDMA64C_CTLL_DST_FIX | IDMA64C_CTLL_SRC_INC |
+			 IDMA64C_CTLL_FC_M2P;
+		src_width = __ffs(sar | hw->len | 4);
+		dst_width = __ffs(config->dst_addr_width);
+	} else {	/* DMA_DEV_TO_MEM */
+		sar = config->src_addr;
+		dar = hw->phys;
+		ctllo |= IDMA64C_CTLL_DST_INC | IDMA64C_CTLL_SRC_FIX |
+			 IDMA64C_CTLL_FC_P2M;
+		src_width = __ffs(config->src_addr_width);
+		dst_width = __ffs(dar | hw->len | 4);
+	}
+
+	lli->sar = sar;
+	lli->dar = dar;
+
+	lli->ctlhi = ctlhi;
+	lli->ctllo = ctllo |
+		     IDMA64C_CTLL_SRC_MSIZE(config->src_maxburst) |
+		     IDMA64C_CTLL_DST_MSIZE(config->dst_maxburst) |
+		     IDMA64C_CTLL_DST_WIDTH(dst_width) |
+		     IDMA64C_CTLL_SRC_WIDTH(src_width);
+
+	lli->llp = llp;
+}
+
+static void idma64_desc_fill(struct idma64_chan *idma64c,
+		struct idma64_desc *desc)
+{
+	struct dma_slave_config *config = &idma64c->config;
+	unsigned int i = desc->ndesc;
+	struct idma64_hw_desc *hw = &desc->hw[i - 1];
+	struct idma64_lli *lli = hw->lli;
+	u64 llp = 0;
+
+	/* Fill the hardware descriptors and link them to a list */
+	do {
+		hw = &desc->hw[--i];
+		idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		llp = hw->llp;
+		desc->length += hw->len;
+	} while (i);
+
+	/* Trigger an interrupt after the last block is transfered */
+	lli->ctllo |= IDMA64C_CTLL_INT_EN;
+
+	/* Disable LLP transfer in the last block */
+	lli->ctllo &= ~(IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
+}
+
+static struct dma_async_tx_descriptor *idma64_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	struct idma64_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	desc = idma64_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct idma64_hw_desc *hw = &desc->hw[i];
+
+		/* Allocate DMA capable memory for hardware descriptor */
+		hw->lli = dma_pool_alloc(idma64c->pool, GFP_NOWAIT, &hw->llp);
+		if (!hw->lli) {
+			desc->ndesc = i;
+			idma64_desc_free(idma64c, desc);
+			return NULL;
+		}
+
+		hw->phys = sg_dma_address(sg);
+		hw->len = sg_dma_len(sg);
+	}
+
+	desc->ndesc = sg_len;
+	desc->direction = direction;
+	desc->status = DMA_IN_PROGRESS;
+
+	idma64_desc_fill(idma64c, desc);
+	return vchan_tx_prep(&idma64c->vchan, &desc->vdesc, flags);
+}
+
+static void idma64_issue_pending(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (vchan_issue_pending(&idma64c->vchan) && !idma64c->desc)
+		idma64_start_transfer(idma64c);
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+}
+
+static size_t idma64_active_desc_size(struct idma64_chan *idma64c)
+{
+	struct idma64_desc *desc = idma64c->desc;
+	struct idma64_hw_desc *hw;
+	size_t bytes = desc->length;
+	u64 llp = channel_readq(idma64c, LLP);
+	u32 ctlhi = channel_readl(idma64c, CTL_HI);
+	unsigned int i = 0;
+
+	do {
+		hw = &desc->hw[i];
+		if (hw->llp == llp)
+			break;
+		bytes -= hw->len;
+	} while (++i < desc->ndesc);
+
+	if (!i)
+		return bytes;
+
+	/* The current chunk is not fully transfered yet */
+	bytes += desc->hw[--i].len;
+
+	return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi);
+}
+
+static enum dma_status idma64_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	size_t bytes;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, state);
+	if (status == DMA_COMPLETE)
+		return status;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	vdesc = vchan_find_desc(&idma64c->vchan, cookie);
+	if (idma64c->desc && cookie == idma64c->desc->vdesc.tx.cookie) {
+		bytes = idma64_active_desc_size(idma64c);
+		dma_set_residue(state, bytes);
+		status = idma64c->desc->status;
+	} else if (vdesc) {
+		bytes = to_idma64_desc(vdesc)->length;
+		dma_set_residue(state, bytes);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return status;
+}
+
+static void convert_burst(u32 *maxburst)
+{
+	if (*maxburst)
+		*maxburst = __fls(*maxburst);
+	else
+		*maxburst = 0;
+}
+
+static int idma64_slave_config(struct dma_chan *chan,
+		struct dma_slave_config *config)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	/* Check if chan will be configured for slave transfers */
+	if (!is_slave_direction(config->direction))
+		return -EINVAL;
+
+	memcpy(&idma64c->config, config, sizeof(idma64c->config));
+
+	convert_burst(&idma64c->config.src_maxburst);
+	convert_burst(&idma64c->config.dst_maxburst);
+
+	return 0;
+}
+
+static void idma64_chan_deactivate(struct idma64_chan *idma64c, bool drain)
+{
+	unsigned short count = 100;
+	u32 cfglo;
+
+	cfglo = channel_readl(idma64c, CFG_LO);
+	if (drain)
+		cfglo |= IDMA64C_CFGL_CH_DRAIN;
+	else
+		cfglo &= ~IDMA64C_CFGL_CH_DRAIN;
+
+	channel_writel(idma64c, CFG_LO, cfglo | IDMA64C_CFGL_CH_SUSP);
+	do {
+		udelay(1);
+		cfglo = channel_readl(idma64c, CFG_LO);
+	} while (!(cfglo & IDMA64C_CFGL_FIFO_EMPTY) && --count);
+}
+
+static void idma64_chan_activate(struct idma64_chan *idma64c)
+{
+	u32 cfglo;
+
+	cfglo = channel_readl(idma64c, CFG_LO);
+	channel_writel(idma64c, CFG_LO, cfglo & ~IDMA64C_CFGL_CH_SUSP);
+}
+
+static int idma64_pause(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (idma64c->desc && idma64c->desc->status == DMA_IN_PROGRESS) {
+		idma64_chan_deactivate(idma64c, false);
+		idma64c->desc->status = DMA_PAUSED;
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return 0;
+}
+
+static int idma64_resume(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (idma64c->desc && idma64c->desc->status == DMA_PAUSED) {
+		idma64c->desc->status = DMA_IN_PROGRESS;
+		idma64_chan_activate(idma64c);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return 0;
+}
+
+static int idma64_terminate_all(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	idma64_chan_deactivate(idma64c, true);
+	idma64_stop_transfer(idma64c);
+	if (idma64c->desc) {
+		idma64_vdesc_free(&idma64c->desc->vdesc);
+		idma64c->desc = NULL;
+	}
+	vchan_get_all_descriptors(&idma64c->vchan, &head);
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&idma64c->vchan, &head);
+	return 0;
+}
+
+static void idma64_synchronize(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	vchan_synchronize(&idma64c->vchan);
+}
+
+static int idma64_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	idma64c->pool = dma_pool_create(dev_name(chan2dev(chan)),
+					chan->device->dev,
+					sizeof(struct idma64_lli), 8, 0);
+	if (!idma64c->pool) {
+		dev_err(chan2dev(chan), "No memory for descriptors\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void idma64_free_chan_resources(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	vchan_free_chan_resources(to_virt_chan(chan));
+	dma_pool_destroy(idma64c->pool);
+	idma64c->pool = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define IDMA64_BUSWIDTHS				\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+static int idma64_probe(struct idma64_chip *chip)
+{
+	struct idma64 *idma64;
+	unsigned short nr_chan = IDMA64_NR_CHAN;
+	unsigned short i;
+	int ret;
+
+	idma64 = devm_kzalloc(chip->dev, sizeof(*idma64), GFP_KERNEL);
+	if (!idma64)
+		return -ENOMEM;
+
+	idma64->regs = chip->regs;
+	chip->idma64 = idma64;
+
+	idma64->chan = devm_kcalloc(chip->dev, nr_chan, sizeof(*idma64->chan),
+				    GFP_KERNEL);
+	if (!idma64->chan)
+		return -ENOMEM;
+
+	idma64->all_chan_mask = (1 << nr_chan) - 1;
+
+	/* Turn off iDMA controller */
+	idma64_off(idma64);
+
+	ret = devm_request_irq(chip->dev, chip->irq, idma64_irq, IRQF_SHARED,
+			       dev_name(chip->dev), idma64);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&idma64->dma.channels);
+	for (i = 0; i < nr_chan; i++) {
+		struct idma64_chan *idma64c = &idma64->chan[i];
+
+		idma64c->vchan.desc_free = idma64_vdesc_free;
+		vchan_init(&idma64c->vchan, &idma64->dma);
+
+		idma64c->regs = idma64->regs + i * IDMA64_CH_LENGTH;
+		idma64c->mask = BIT(i);
+	}
+
+	dma_cap_set(DMA_SLAVE, idma64->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, idma64->dma.cap_mask);
+
+	idma64->dma.device_alloc_chan_resources = idma64_alloc_chan_resources;
+	idma64->dma.device_free_chan_resources = idma64_free_chan_resources;
+
+	idma64->dma.device_prep_slave_sg = idma64_prep_slave_sg;
+
+	idma64->dma.device_issue_pending = idma64_issue_pending;
+	idma64->dma.device_tx_status = idma64_tx_status;
+
+	idma64->dma.device_config = idma64_slave_config;
+	idma64->dma.device_pause = idma64_pause;
+	idma64->dma.device_resume = idma64_resume;
+	idma64->dma.device_terminate_all = idma64_terminate_all;
+	idma64->dma.device_synchronize = idma64_synchronize;
+
+	idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS;
+	idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS;
+	idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	idma64->dma.dev = chip->dev;
+
+	dma_set_max_seg_size(idma64->dma.dev, IDMA64C_CTLH_BLOCK_TS_MASK);
+
+	ret = dma_async_device_register(&idma64->dma);
+	if (ret)
+		return ret;
+
+	dev_info(chip->dev, "Found Intel integrated DMA 64-bit\n");
+	return 0;
+}
+
+static int idma64_remove(struct idma64_chip *chip)
+{
+	struct idma64 *idma64 = chip->idma64;
+	unsigned short i;
+
+	dma_async_device_unregister(&idma64->dma);
+
+	/*
+	 * Explicitly call devm_request_irq() to avoid the side effects with
+	 * the scheduled tasklets.
+	 */
+	devm_free_irq(chip->dev, chip->irq, idma64);
+
+	for (i = 0; i < idma64->dma.chancnt; i++) {
+		struct idma64_chan *idma64c = &idma64->chan[i];
+
+		tasklet_kill(&idma64c->vchan.task);
+	}
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int idma64_platform_probe(struct platform_device *pdev)
+{
+	struct idma64_chip *chip;
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	int ret;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	chip->dev = dev;
+
+	ret = idma64_probe(chip);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, chip);
+	return 0;
+}
+
+static int idma64_platform_remove(struct platform_device *pdev)
+{
+	struct idma64_chip *chip = platform_get_drvdata(pdev);
+
+	return idma64_remove(chip);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int idma64_pm_suspend(struct device *dev)
+{
+	struct idma64_chip *chip = dev_get_drvdata(dev);
+
+	idma64_off(chip->idma64);
+	return 0;
+}
+
+static int idma64_pm_resume(struct device *dev)
+{
+	struct idma64_chip *chip = dev_get_drvdata(dev);
+
+	idma64_on(chip->idma64);
+	return 0;
+}
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops idma64_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(idma64_pm_suspend, idma64_pm_resume)
+};
+
+static struct platform_driver idma64_platform_driver = {
+	.probe		= idma64_platform_probe,
+	.remove		= idma64_platform_remove,
+	.driver = {
+		.name	= DRV_NAME,
+		.pm	= &idma64_dev_pm_ops,
+	},
+};
+
+module_platform_driver(idma64_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("iDMA64 core driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/dma/idma64.h b/drivers/dma/idma64.h
new file mode 100644
index 0000000..6b81687
--- /dev/null
+++ b/drivers/dma/idma64.h
@@ -0,0 +1,230 @@
+/*
+ * Driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __DMA_IDMA64_H__
+#define __DMA_IDMA64_H__
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "virt-dma.h"
+
+/* Channel registers */
+
+#define IDMA64_CH_SAR		0x00	/* Source Address Register */
+#define IDMA64_CH_DAR		0x08	/* Destination Address Register */
+#define IDMA64_CH_LLP		0x10	/* Linked List Pointer */
+#define IDMA64_CH_CTL_LO	0x18	/* Control Register Low */
+#define IDMA64_CH_CTL_HI	0x1c	/* Control Register High */
+#define IDMA64_CH_SSTAT		0x20
+#define IDMA64_CH_DSTAT		0x28
+#define IDMA64_CH_SSTATAR	0x30
+#define IDMA64_CH_DSTATAR	0x38
+#define IDMA64_CH_CFG_LO	0x40	/* Configuration Register Low */
+#define IDMA64_CH_CFG_HI	0x44	/* Configuration Register High */
+#define IDMA64_CH_SGR		0x48
+#define IDMA64_CH_DSR		0x50
+
+#define IDMA64_CH_LENGTH	0x58
+
+/* Bitfields in CTL_LO */
+#define IDMA64C_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
+#define IDMA64C_CTLL_DST_WIDTH(x)	((x) << 1)	/* bytes per element */
+#define IDMA64C_CTLL_SRC_WIDTH(x)	((x) << 4)
+#define IDMA64C_CTLL_DST_INC		(0 << 8)	/* DAR update/not */
+#define IDMA64C_CTLL_DST_FIX		(1 << 8)
+#define IDMA64C_CTLL_SRC_INC		(0 << 10)	/* SAR update/not */
+#define IDMA64C_CTLL_SRC_FIX		(1 << 10)
+#define IDMA64C_CTLL_DST_MSIZE(x)	((x) << 11)	/* burst, #elements */
+#define IDMA64C_CTLL_SRC_MSIZE(x)	((x) << 14)
+#define IDMA64C_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
+#define IDMA64C_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
+#define IDMA64C_CTLL_LLP_D_EN		(1 << 27)	/* dest block chain */
+#define IDMA64C_CTLL_LLP_S_EN		(1 << 28)	/* src block chain */
+
+/* Bitfields in CTL_HI */
+#define IDMA64C_CTLH_BLOCK_TS_MASK	((1 << 17) - 1)
+#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & IDMA64C_CTLH_BLOCK_TS_MASK)
+#define IDMA64C_CTLH_DONE		(1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA64C_CFGL_DST_BURST_ALIGN	(1 << 0)	/* dst burst align */
+#define IDMA64C_CFGL_SRC_BURST_ALIGN	(1 << 1)	/* src burst align */
+#define IDMA64C_CFGL_CH_SUSP		(1 << 8)
+#define IDMA64C_CFGL_FIFO_EMPTY		(1 << 9)
+#define IDMA64C_CFGL_CH_DRAIN		(1 << 10)	/* drain FIFO */
+#define IDMA64C_CFGL_DST_OPT_BL		(1 << 20)	/* optimize dst burst length */
+#define IDMA64C_CFGL_SRC_OPT_BL		(1 << 21)	/* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA64C_CFGH_SRC_PER(x)		((x) << 0)	/* src peripheral */
+#define IDMA64C_CFGH_DST_PER(x)		((x) << 4)	/* dst peripheral */
+#define IDMA64C_CFGH_RD_ISSUE_THD(x)	((x) << 8)
+#define IDMA64C_CFGH_WR_ISSUE_THD(x)	((x) << 18)
+
+/* Interrupt registers */
+
+#define IDMA64_INT_XFER		0x00
+#define IDMA64_INT_BLOCK	0x08
+#define IDMA64_INT_SRC_TRAN	0x10
+#define IDMA64_INT_DST_TRAN	0x18
+#define IDMA64_INT_ERROR	0x20
+
+#define IDMA64_RAW(x)		(0x2c0 + IDMA64_INT_##x)	/* r */
+#define IDMA64_STATUS(x)	(0x2e8 + IDMA64_INT_##x)	/* r (raw & mask) */
+#define IDMA64_MASK(x)		(0x310 + IDMA64_INT_##x)	/* rw (set = irq enabled) */
+#define IDMA64_CLEAR(x)		(0x338 + IDMA64_INT_##x)	/* w (ack, affects "raw") */
+
+/* Common registers */
+
+#define IDMA64_STATUS_INT	0x360	/* r */
+#define IDMA64_CFG		0x398
+#define IDMA64_CH_EN		0x3a0
+
+/* Bitfields in CFG */
+#define IDMA64_CFG_DMA_EN		(1 << 0)
+
+/* Hardware descriptor for Linked LIst transfers */
+struct idma64_lli {
+	u64		sar;
+	u64		dar;
+	u64		llp;
+	u32		ctllo;
+	u32		ctlhi;
+	u32		sstat;
+	u32		dstat;
+};
+
+struct idma64_hw_desc {
+	struct idma64_lli *lli;
+	dma_addr_t llp;
+	dma_addr_t phys;
+	unsigned int len;
+};
+
+struct idma64_desc {
+	struct virt_dma_desc vdesc;
+	enum dma_transfer_direction direction;
+	struct idma64_hw_desc *hw;
+	unsigned int ndesc;
+	size_t length;
+	enum dma_status status;
+};
+
+static inline struct idma64_desc *to_idma64_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct idma64_desc, vdesc);
+}
+
+struct idma64_chan {
+	struct virt_dma_chan vchan;
+
+	void __iomem *regs;
+
+	/* hardware configuration */
+	enum dma_transfer_direction direction;
+	unsigned int mask;
+	struct dma_slave_config config;
+
+	void *pool;
+	struct idma64_desc *desc;
+};
+
+static inline struct idma64_chan *to_idma64_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct idma64_chan, vchan.chan);
+}
+
+#define channel_set_bit(idma64, reg, mask)	\
+	dma_writel(idma64, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(idma64, reg, mask)	\
+	dma_writel(idma64, reg, ((mask) << 8) | 0)
+
+static inline u32 idma64c_readl(struct idma64_chan *idma64c, int offset)
+{
+	return readl(idma64c->regs + offset);
+}
+
+static inline void idma64c_writel(struct idma64_chan *idma64c, int offset,
+				  u32 value)
+{
+	writel(value, idma64c->regs + offset);
+}
+
+#define channel_readl(idma64c, reg)		\
+	idma64c_readl(idma64c, IDMA64_CH_##reg)
+#define channel_writel(idma64c, reg, value)	\
+	idma64c_writel(idma64c, IDMA64_CH_##reg, (value))
+
+static inline u64 idma64c_readq(struct idma64_chan *idma64c, int offset)
+{
+	return lo_hi_readq(idma64c->regs + offset);
+}
+
+static inline void idma64c_writeq(struct idma64_chan *idma64c, int offset,
+				  u64 value)
+{
+	lo_hi_writeq(value, idma64c->regs + offset);
+}
+
+#define channel_readq(idma64c, reg)		\
+	idma64c_readq(idma64c, IDMA64_CH_##reg)
+#define channel_writeq(idma64c, reg, value)	\
+	idma64c_writeq(idma64c, IDMA64_CH_##reg, (value))
+
+struct idma64 {
+	struct dma_device dma;
+
+	void __iomem *regs;
+
+	/* channels */
+	unsigned short all_chan_mask;
+	struct idma64_chan *chan;
+};
+
+static inline struct idma64 *to_idma64(struct dma_device *ddev)
+{
+	return container_of(ddev, struct idma64, dma);
+}
+
+static inline u32 idma64_readl(struct idma64 *idma64, int offset)
+{
+	return readl(idma64->regs + offset);
+}
+
+static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value)
+{
+	writel(value, idma64->regs + offset);
+}
+
+#define dma_readl(idma64, reg)			\
+	idma64_readl(idma64, IDMA64_##reg)
+#define dma_writel(idma64, reg, value)		\
+	idma64_writel(idma64, IDMA64_##reg, (value))
+
+/**
+ * struct idma64_chip - representation of iDMA 64-bit controller hardware
+ * @dev:		struct device of the DMA controller
+ * @irq:		irq line
+ * @regs:		memory mapped I/O space
+ * @idma64:		struct idma64 that is filed by idma64_probe()
+ */
+struct idma64_chip {
+	struct device	*dev;
+	int		irq;
+	void __iomem	*regs;
+	struct idma64	*idma64;
+};
+
+#endif /* __DMA_IDMA64_H__ */
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c
new file mode 100644
index 0000000..25cec9c
--- /dev/null
+++ b/drivers/dma/img-mdc-dma.c
@@ -0,0 +1,1093 @@
+/*
+ * IMG Multi-threaded DMA Controller (MDC)
+ *
+ * Copyright (C) 2009,2012,2013 Imagination Technologies Ltd.
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MDC_MAX_DMA_CHANNELS			32
+
+#define MDC_GENERAL_CONFIG			0x000
+#define MDC_GENERAL_CONFIG_LIST_IEN		BIT(31)
+#define MDC_GENERAL_CONFIG_IEN			BIT(29)
+#define MDC_GENERAL_CONFIG_LEVEL_INT		BIT(28)
+#define MDC_GENERAL_CONFIG_INC_W		BIT(12)
+#define MDC_GENERAL_CONFIG_INC_R		BIT(8)
+#define MDC_GENERAL_CONFIG_PHYSICAL_W		BIT(7)
+#define MDC_GENERAL_CONFIG_WIDTH_W_SHIFT	4
+#define MDC_GENERAL_CONFIG_WIDTH_W_MASK		0x7
+#define MDC_GENERAL_CONFIG_PHYSICAL_R		BIT(3)
+#define MDC_GENERAL_CONFIG_WIDTH_R_SHIFT	0
+#define MDC_GENERAL_CONFIG_WIDTH_R_MASK		0x7
+
+#define MDC_READ_PORT_CONFIG			0x004
+#define MDC_READ_PORT_CONFIG_STHREAD_SHIFT	28
+#define MDC_READ_PORT_CONFIG_STHREAD_MASK	0xf
+#define MDC_READ_PORT_CONFIG_RTHREAD_SHIFT	24
+#define MDC_READ_PORT_CONFIG_RTHREAD_MASK	0xf
+#define MDC_READ_PORT_CONFIG_WTHREAD_SHIFT	16
+#define MDC_READ_PORT_CONFIG_WTHREAD_MASK	0xf
+#define MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT	4
+#define MDC_READ_PORT_CONFIG_BURST_SIZE_MASK	0xff
+#define MDC_READ_PORT_CONFIG_DREQ_ENABLE	BIT(1)
+
+#define MDC_READ_ADDRESS			0x008
+
+#define MDC_WRITE_ADDRESS			0x00c
+
+#define MDC_TRANSFER_SIZE			0x010
+#define MDC_TRANSFER_SIZE_MASK			0xffffff
+
+#define MDC_LIST_NODE_ADDRESS			0x014
+
+#define MDC_CMDS_PROCESSED			0x018
+#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT	16
+#define MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK	0x3f
+#define MDC_CMDS_PROCESSED_INT_ACTIVE		BIT(8)
+#define MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT	0
+#define MDC_CMDS_PROCESSED_CMDS_DONE_MASK	0x3f
+
+#define MDC_CONTROL_AND_STATUS			0x01c
+#define MDC_CONTROL_AND_STATUS_CANCEL		BIT(20)
+#define MDC_CONTROL_AND_STATUS_LIST_EN		BIT(4)
+#define MDC_CONTROL_AND_STATUS_EN		BIT(0)
+
+#define MDC_ACTIVE_TRANSFER_SIZE		0x030
+
+#define MDC_GLOBAL_CONFIG_A				0x900
+#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT	16
+#define MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK	0xff
+#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT		8
+#define MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK		0xff
+#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT		0
+#define MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK		0xff
+
+struct mdc_hw_list_desc {
+	u32 gen_conf;
+	u32 readport_conf;
+	u32 read_addr;
+	u32 write_addr;
+	u32 xfer_size;
+	u32 node_addr;
+	u32 cmds_done;
+	u32 ctrl_status;
+	/*
+	 * Not part of the list descriptor, but instead used by the CPU to
+	 * traverse the list.
+	 */
+	struct mdc_hw_list_desc *next_desc;
+};
+
+struct mdc_tx_desc {
+	struct mdc_chan *chan;
+	struct virt_dma_desc vd;
+	dma_addr_t list_phys;
+	struct mdc_hw_list_desc *list;
+	bool cyclic;
+	bool cmd_loaded;
+	unsigned int list_len;
+	unsigned int list_period_len;
+	size_t list_xfer_size;
+	unsigned int list_cmds_done;
+};
+
+struct mdc_chan {
+	struct mdc_dma *mdma;
+	struct virt_dma_chan vc;
+	struct dma_slave_config config;
+	struct mdc_tx_desc *desc;
+	int irq;
+	unsigned int periph;
+	unsigned int thread;
+	unsigned int chan_nr;
+};
+
+struct mdc_dma_soc_data {
+	void (*enable_chan)(struct mdc_chan *mchan);
+	void (*disable_chan)(struct mdc_chan *mchan);
+};
+
+struct mdc_dma {
+	struct dma_device dma_dev;
+	void __iomem *regs;
+	struct clk *clk;
+	struct dma_pool *desc_pool;
+	struct regmap *periph_regs;
+	spinlock_t lock;
+	unsigned int nr_threads;
+	unsigned int nr_channels;
+	unsigned int bus_width;
+	unsigned int max_burst_mult;
+	unsigned int max_xfer_size;
+	const struct mdc_dma_soc_data *soc;
+	struct mdc_chan channels[MDC_MAX_DMA_CHANNELS];
+};
+
+static inline u32 mdc_readl(struct mdc_dma *mdma, u32 reg)
+{
+	return readl(mdma->regs + reg);
+}
+
+static inline void mdc_writel(struct mdc_dma *mdma, u32 val, u32 reg)
+{
+	writel(val, mdma->regs + reg);
+}
+
+static inline u32 mdc_chan_readl(struct mdc_chan *mchan, u32 reg)
+{
+	return mdc_readl(mchan->mdma, mchan->chan_nr * 0x040 + reg);
+}
+
+static inline void mdc_chan_writel(struct mdc_chan *mchan, u32 val, u32 reg)
+{
+	mdc_writel(mchan->mdma, val, mchan->chan_nr * 0x040 + reg);
+}
+
+static inline struct mdc_chan *to_mdc_chan(struct dma_chan *c)
+{
+	return container_of(to_virt_chan(c), struct mdc_chan, vc);
+}
+
+static inline struct mdc_tx_desc *to_mdc_desc(struct dma_async_tx_descriptor *t)
+{
+	struct virt_dma_desc *vdesc = container_of(t, struct virt_dma_desc, tx);
+
+	return container_of(vdesc, struct mdc_tx_desc, vd);
+}
+
+static inline struct device *mdma2dev(struct mdc_dma *mdma)
+{
+	return mdma->dma_dev.dev;
+}
+
+static inline unsigned int to_mdc_width(unsigned int bytes)
+{
+	return ffs(bytes) - 1;
+}
+
+static inline void mdc_set_read_width(struct mdc_hw_list_desc *ldesc,
+				      unsigned int bytes)
+{
+	ldesc->gen_conf |= to_mdc_width(bytes) <<
+		MDC_GENERAL_CONFIG_WIDTH_R_SHIFT;
+}
+
+static inline void mdc_set_write_width(struct mdc_hw_list_desc *ldesc,
+				       unsigned int bytes)
+{
+	ldesc->gen_conf |= to_mdc_width(bytes) <<
+		MDC_GENERAL_CONFIG_WIDTH_W_SHIFT;
+}
+
+static void mdc_list_desc_config(struct mdc_chan *mchan,
+				 struct mdc_hw_list_desc *ldesc,
+				 enum dma_transfer_direction dir,
+				 dma_addr_t src, dma_addr_t dst, size_t len)
+{
+	struct mdc_dma *mdma = mchan->mdma;
+	unsigned int max_burst, burst_size;
+
+	ldesc->gen_conf = MDC_GENERAL_CONFIG_IEN | MDC_GENERAL_CONFIG_LIST_IEN |
+		MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W |
+		MDC_GENERAL_CONFIG_PHYSICAL_R;
+	ldesc->readport_conf =
+		(mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) |
+		(mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) |
+		(mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT);
+	ldesc->read_addr = src;
+	ldesc->write_addr = dst;
+	ldesc->xfer_size = len - 1;
+	ldesc->node_addr = 0;
+	ldesc->cmds_done = 0;
+	ldesc->ctrl_status = MDC_CONTROL_AND_STATUS_LIST_EN |
+		MDC_CONTROL_AND_STATUS_EN;
+	ldesc->next_desc = NULL;
+
+	if (IS_ALIGNED(dst, mdma->bus_width) &&
+	    IS_ALIGNED(src, mdma->bus_width))
+		max_burst = mdma->bus_width * mdma->max_burst_mult;
+	else
+		max_burst = mdma->bus_width * (mdma->max_burst_mult - 1);
+
+	if (dir == DMA_MEM_TO_DEV) {
+		ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R;
+		ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE;
+		mdc_set_read_width(ldesc, mdma->bus_width);
+		mdc_set_write_width(ldesc, mchan->config.dst_addr_width);
+		burst_size = min(max_burst, mchan->config.dst_maxburst *
+				 mchan->config.dst_addr_width);
+	} else if (dir == DMA_DEV_TO_MEM) {
+		ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_W;
+		ldesc->readport_conf |= MDC_READ_PORT_CONFIG_DREQ_ENABLE;
+		mdc_set_read_width(ldesc, mchan->config.src_addr_width);
+		mdc_set_write_width(ldesc, mdma->bus_width);
+		burst_size = min(max_burst, mchan->config.src_maxburst *
+				 mchan->config.src_addr_width);
+	} else {
+		ldesc->gen_conf |= MDC_GENERAL_CONFIG_INC_R |
+			MDC_GENERAL_CONFIG_INC_W;
+		mdc_set_read_width(ldesc, mdma->bus_width);
+		mdc_set_write_width(ldesc, mdma->bus_width);
+		burst_size = max_burst;
+	}
+	ldesc->readport_conf |= (burst_size - 1) <<
+		MDC_READ_PORT_CONFIG_BURST_SIZE_SHIFT;
+}
+
+static void mdc_list_desc_free(struct mdc_tx_desc *mdesc)
+{
+	struct mdc_dma *mdma = mdesc->chan->mdma;
+	struct mdc_hw_list_desc *curr, *next;
+	dma_addr_t curr_phys, next_phys;
+
+	curr = mdesc->list;
+	curr_phys = mdesc->list_phys;
+	while (curr) {
+		next = curr->next_desc;
+		next_phys = curr->node_addr;
+		dma_pool_free(mdma->desc_pool, curr, curr_phys);
+		curr = next;
+		curr_phys = next_phys;
+	}
+}
+
+static void mdc_desc_free(struct virt_dma_desc *vd)
+{
+	struct mdc_tx_desc *mdesc = to_mdc_desc(&vd->tx);
+
+	mdc_list_desc_free(mdesc);
+	kfree(mdesc);
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, size_t len,
+	unsigned long flags)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct mdc_dma *mdma = mchan->mdma;
+	struct mdc_tx_desc *mdesc;
+	struct mdc_hw_list_desc *curr, *prev = NULL;
+	dma_addr_t curr_phys;
+
+	if (!len)
+		return NULL;
+
+	mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+	if (!mdesc)
+		return NULL;
+	mdesc->chan = mchan;
+	mdesc->list_xfer_size = len;
+
+	while (len > 0) {
+		size_t xfer_size;
+
+		curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT, &curr_phys);
+		if (!curr)
+			goto free_desc;
+
+		if (prev) {
+			prev->node_addr = curr_phys;
+			prev->next_desc = curr;
+		} else {
+			mdesc->list_phys = curr_phys;
+			mdesc->list = curr;
+		}
+
+		xfer_size = min_t(size_t, mdma->max_xfer_size, len);
+
+		mdc_list_desc_config(mchan, curr, DMA_MEM_TO_MEM, src, dest,
+				     xfer_size);
+
+		prev = curr;
+
+		mdesc->list_len++;
+		src += xfer_size;
+		dest += xfer_size;
+		len -= xfer_size;
+	}
+
+	return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+	mdc_desc_free(&mdesc->vd);
+
+	return NULL;
+}
+
+static int mdc_check_slave_width(struct mdc_chan *mchan,
+				 enum dma_transfer_direction dir)
+{
+	enum dma_slave_buswidth width;
+
+	if (dir == DMA_MEM_TO_DEV)
+		width = mchan->config.dst_addr_width;
+	else
+		width = mchan->config.src_addr_width;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (width > mchan->mdma->bus_width)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction dir,
+	unsigned long flags)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct mdc_dma *mdma = mchan->mdma;
+	struct mdc_tx_desc *mdesc;
+	struct mdc_hw_list_desc *curr, *prev = NULL;
+	dma_addr_t curr_phys;
+
+	if (!buf_len && !period_len)
+		return NULL;
+
+	if (!is_slave_direction(dir))
+		return NULL;
+
+	if (mdc_check_slave_width(mchan, dir) < 0)
+		return NULL;
+
+	mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+	if (!mdesc)
+		return NULL;
+	mdesc->chan = mchan;
+	mdesc->cyclic = true;
+	mdesc->list_xfer_size = buf_len;
+	mdesc->list_period_len = DIV_ROUND_UP(period_len,
+					      mdma->max_xfer_size);
+
+	while (buf_len > 0) {
+		size_t remainder = min(period_len, buf_len);
+
+		while (remainder > 0) {
+			size_t xfer_size;
+
+			curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT,
+					      &curr_phys);
+			if (!curr)
+				goto free_desc;
+
+			if (!prev) {
+				mdesc->list_phys = curr_phys;
+				mdesc->list = curr;
+			} else {
+				prev->node_addr = curr_phys;
+				prev->next_desc = curr;
+			}
+
+			xfer_size = min_t(size_t, mdma->max_xfer_size,
+					  remainder);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				mdc_list_desc_config(mchan, curr, dir,
+						     buf_addr,
+						     mchan->config.dst_addr,
+						     xfer_size);
+			} else {
+				mdc_list_desc_config(mchan, curr, dir,
+						     mchan->config.src_addr,
+						     buf_addr,
+						     xfer_size);
+			}
+
+			prev = curr;
+
+			mdesc->list_len++;
+			buf_addr += xfer_size;
+			buf_len -= xfer_size;
+			remainder -= xfer_size;
+		}
+	}
+	prev->node_addr = mdesc->list_phys;
+
+	return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+	mdc_desc_free(&mdesc->vd);
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mdc_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction dir,
+	unsigned long flags, void *context)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct mdc_dma *mdma = mchan->mdma;
+	struct mdc_tx_desc *mdesc;
+	struct scatterlist *sg;
+	struct mdc_hw_list_desc *curr, *prev = NULL;
+	dma_addr_t curr_phys;
+	unsigned int i;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(dir))
+		return NULL;
+
+	if (mdc_check_slave_width(mchan, dir) < 0)
+		return NULL;
+
+	mdesc = kzalloc(sizeof(*mdesc), GFP_NOWAIT);
+	if (!mdesc)
+		return NULL;
+	mdesc->chan = mchan;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t buf = sg_dma_address(sg);
+		size_t buf_len = sg_dma_len(sg);
+
+		while (buf_len > 0) {
+			size_t xfer_size;
+
+			curr = dma_pool_alloc(mdma->desc_pool, GFP_NOWAIT,
+					      &curr_phys);
+			if (!curr)
+				goto free_desc;
+
+			if (!prev) {
+				mdesc->list_phys = curr_phys;
+				mdesc->list = curr;
+			} else {
+				prev->node_addr = curr_phys;
+				prev->next_desc = curr;
+			}
+
+			xfer_size = min_t(size_t, mdma->max_xfer_size,
+					  buf_len);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				mdc_list_desc_config(mchan, curr, dir, buf,
+						     mchan->config.dst_addr,
+						     xfer_size);
+			} else {
+				mdc_list_desc_config(mchan, curr, dir,
+						     mchan->config.src_addr,
+						     buf, xfer_size);
+			}
+
+			prev = curr;
+
+			mdesc->list_len++;
+			mdesc->list_xfer_size += xfer_size;
+			buf += xfer_size;
+			buf_len -= xfer_size;
+		}
+	}
+
+	return vchan_tx_prep(&mchan->vc, &mdesc->vd, flags);
+
+free_desc:
+	mdc_desc_free(&mdesc->vd);
+
+	return NULL;
+}
+
+static void mdc_issue_desc(struct mdc_chan *mchan)
+{
+	struct mdc_dma *mdma = mchan->mdma;
+	struct virt_dma_desc *vd;
+	struct mdc_tx_desc *mdesc;
+	u32 val;
+
+	vd = vchan_next_desc(&mchan->vc);
+	if (!vd)
+		return;
+
+	list_del(&vd->node);
+
+	mdesc = to_mdc_desc(&vd->tx);
+	mchan->desc = mdesc;
+
+	dev_dbg(mdma2dev(mdma), "Issuing descriptor on channel %d\n",
+		mchan->chan_nr);
+
+	mdma->soc->enable_chan(mchan);
+
+	val = mdc_chan_readl(mchan, MDC_GENERAL_CONFIG);
+	val |= MDC_GENERAL_CONFIG_LIST_IEN | MDC_GENERAL_CONFIG_IEN |
+		MDC_GENERAL_CONFIG_LEVEL_INT | MDC_GENERAL_CONFIG_PHYSICAL_W |
+		MDC_GENERAL_CONFIG_PHYSICAL_R;
+	mdc_chan_writel(mchan, val, MDC_GENERAL_CONFIG);
+	val = (mchan->thread << MDC_READ_PORT_CONFIG_STHREAD_SHIFT) |
+		(mchan->thread << MDC_READ_PORT_CONFIG_RTHREAD_SHIFT) |
+		(mchan->thread << MDC_READ_PORT_CONFIG_WTHREAD_SHIFT);
+	mdc_chan_writel(mchan, val, MDC_READ_PORT_CONFIG);
+	mdc_chan_writel(mchan, mdesc->list_phys, MDC_LIST_NODE_ADDRESS);
+	val = mdc_chan_readl(mchan, MDC_CONTROL_AND_STATUS);
+	val |= MDC_CONTROL_AND_STATUS_LIST_EN;
+	mdc_chan_writel(mchan, val, MDC_CONTROL_AND_STATUS);
+}
+
+static void mdc_issue_pending(struct dma_chan *chan)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mchan->vc.lock, flags);
+	if (vchan_issue_pending(&mchan->vc) && !mchan->desc)
+		mdc_issue_desc(mchan);
+	spin_unlock_irqrestore(&mchan->vc.lock, flags);
+}
+
+static enum dma_status mdc_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct mdc_tx_desc *mdesc;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	size_t bytes = 0;
+	int ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!txstate)
+		return ret;
+
+	spin_lock_irqsave(&mchan->vc.lock, flags);
+	vd = vchan_find_desc(&mchan->vc, cookie);
+	if (vd) {
+		mdesc = to_mdc_desc(&vd->tx);
+		bytes = mdesc->list_xfer_size;
+	} else if (mchan->desc && mchan->desc->vd.tx.cookie == cookie) {
+		struct mdc_hw_list_desc *ldesc;
+		u32 val1, val2, done, processed, residue;
+		int i, cmds;
+
+		mdesc = mchan->desc;
+
+		/*
+		 * Determine the number of commands that haven't been
+		 * processed (handled by the IRQ handler) yet.
+		 */
+		do {
+			val1 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) &
+				~MDC_CMDS_PROCESSED_INT_ACTIVE;
+			residue = mdc_chan_readl(mchan,
+						 MDC_ACTIVE_TRANSFER_SIZE);
+			val2 = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED) &
+				~MDC_CMDS_PROCESSED_INT_ACTIVE;
+		} while (val1 != val2);
+
+		done = (val1 >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+		processed = (val1 >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+		cmds = (done - processed) %
+			(MDC_CMDS_PROCESSED_CMDS_DONE_MASK + 1);
+
+		/*
+		 * If the command loaded event hasn't been processed yet, then
+		 * the difference above includes an extra command.
+		 */
+		if (!mdesc->cmd_loaded)
+			cmds--;
+		else
+			cmds += mdesc->list_cmds_done;
+
+		bytes = mdesc->list_xfer_size;
+		ldesc = mdesc->list;
+		for (i = 0; i < cmds; i++) {
+			bytes -= ldesc->xfer_size + 1;
+			ldesc = ldesc->next_desc;
+		}
+		if (ldesc) {
+			if (residue != MDC_TRANSFER_SIZE_MASK)
+				bytes -= ldesc->xfer_size - residue;
+			else
+				bytes -= ldesc->xfer_size + 1;
+		}
+	}
+	spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+	dma_set_residue(txstate, bytes);
+
+	return ret;
+}
+
+static unsigned int mdc_get_new_events(struct mdc_chan *mchan)
+{
+	u32 val, processed, done1, done2;
+	unsigned int ret;
+
+	val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+	processed = (val >> MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) &
+				MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK;
+	/*
+	 * CMDS_DONE may have incremented between reading CMDS_PROCESSED
+	 * and clearing INT_ACTIVE.  Re-read CMDS_PROCESSED to ensure we
+	 * didn't miss a command completion.
+	 */
+	do {
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done1 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+
+		val &= ~((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK <<
+			  MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT) |
+			 MDC_CMDS_PROCESSED_INT_ACTIVE);
+
+		val |= done1 << MDC_CMDS_PROCESSED_CMDS_PROCESSED_SHIFT;
+
+		mdc_chan_writel(mchan, val, MDC_CMDS_PROCESSED);
+
+		val = mdc_chan_readl(mchan, MDC_CMDS_PROCESSED);
+
+		done2 = (val >> MDC_CMDS_PROCESSED_CMDS_DONE_SHIFT) &
+			MDC_CMDS_PROCESSED_CMDS_DONE_MASK;
+	} while (done1 != done2);
+
+	if (done1 >= processed)
+		ret = done1 - processed;
+	else
+		ret = ((MDC_CMDS_PROCESSED_CMDS_PROCESSED_MASK + 1) -
+			processed) + done1;
+
+	return ret;
+}
+
+static int mdc_terminate_all(struct dma_chan *chan)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&mchan->vc.lock, flags);
+
+	mdc_chan_writel(mchan, MDC_CONTROL_AND_STATUS_CANCEL,
+			MDC_CONTROL_AND_STATUS);
+
+	if (mchan->desc) {
+		vchan_terminate_vdesc(&mchan->desc->vd);
+		mchan->desc = NULL;
+	}
+	vchan_get_all_descriptors(&mchan->vc, &head);
+
+	mdc_get_new_events(mchan);
+
+	spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&mchan->vc, &head);
+
+	return 0;
+}
+
+static void mdc_synchronize(struct dma_chan *chan)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+
+	vchan_synchronize(&mchan->vc);
+}
+
+static int mdc_slave_config(struct dma_chan *chan,
+			    struct dma_slave_config *config)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&mchan->vc.lock, flags);
+	mchan->config = *config;
+	spin_unlock_irqrestore(&mchan->vc.lock, flags);
+
+	return 0;
+}
+
+static int mdc_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct device *dev = mdma2dev(mchan->mdma);
+
+	return pm_runtime_get_sync(dev);
+}
+
+static void mdc_free_chan_resources(struct dma_chan *chan)
+{
+	struct mdc_chan *mchan = to_mdc_chan(chan);
+	struct mdc_dma *mdma = mchan->mdma;
+	struct device *dev = mdma2dev(mdma);
+
+	mdc_terminate_all(chan);
+	mdma->soc->disable_chan(mchan);
+	pm_runtime_put(dev);
+}
+
+static irqreturn_t mdc_chan_irq(int irq, void *dev_id)
+{
+	struct mdc_chan *mchan = (struct mdc_chan *)dev_id;
+	struct mdc_tx_desc *mdesc;
+	unsigned int i, new_events;
+
+	spin_lock(&mchan->vc.lock);
+
+	dev_dbg(mdma2dev(mchan->mdma), "IRQ on channel %d\n", mchan->chan_nr);
+
+	new_events = mdc_get_new_events(mchan);
+
+	if (!new_events)
+		goto out;
+
+	mdesc = mchan->desc;
+	if (!mdesc) {
+		dev_warn(mdma2dev(mchan->mdma),
+			 "IRQ with no active descriptor on channel %d\n",
+			 mchan->chan_nr);
+		goto out;
+	}
+
+	for (i = 0; i < new_events; i++) {
+		/*
+		 * The first interrupt in a transfer indicates that the
+		 * command list has been loaded, not that a command has
+		 * been completed.
+		 */
+		if (!mdesc->cmd_loaded) {
+			mdesc->cmd_loaded = true;
+			continue;
+		}
+
+		mdesc->list_cmds_done++;
+		if (mdesc->cyclic) {
+			mdesc->list_cmds_done %= mdesc->list_len;
+			if (mdesc->list_cmds_done % mdesc->list_period_len == 0)
+				vchan_cyclic_callback(&mdesc->vd);
+		} else if (mdesc->list_cmds_done == mdesc->list_len) {
+			mchan->desc = NULL;
+			vchan_cookie_complete(&mdesc->vd);
+			mdc_issue_desc(mchan);
+			break;
+		}
+	}
+out:
+	spin_unlock(&mchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+static struct dma_chan *mdc_of_xlate(struct of_phandle_args *dma_spec,
+				     struct of_dma *ofdma)
+{
+	struct mdc_dma *mdma = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	list_for_each_entry(chan, &mdma->dma_dev.channels, device_node) {
+		struct mdc_chan *mchan = to_mdc_chan(chan);
+
+		if (!(dma_spec->args[1] & BIT(mchan->chan_nr)))
+			continue;
+		if (dma_get_slave_channel(chan)) {
+			mchan->periph = dma_spec->args[0];
+			mchan->thread = dma_spec->args[2];
+			return chan;
+		}
+	}
+
+	return NULL;
+}
+
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE(ch)	(0x120 + 0x4 * ((ch) / 4))
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(ch) (8 * ((ch) % 4))
+#define PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK	0x3f
+
+static void pistachio_mdc_enable_chan(struct mdc_chan *mchan)
+{
+	struct mdc_dma *mdma = mchan->mdma;
+
+	regmap_update_bits(mdma->periph_regs,
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr),
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK <<
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr),
+			   mchan->periph <<
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr));
+}
+
+static void pistachio_mdc_disable_chan(struct mdc_chan *mchan)
+{
+	struct mdc_dma *mdma = mchan->mdma;
+
+	regmap_update_bits(mdma->periph_regs,
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE(mchan->chan_nr),
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE_MASK <<
+			   PISTACHIO_CR_PERIPH_DMA_ROUTE_SHIFT(mchan->chan_nr),
+			   0);
+}
+
+static const struct mdc_dma_soc_data pistachio_mdc_data = {
+	.enable_chan = pistachio_mdc_enable_chan,
+	.disable_chan = pistachio_mdc_disable_chan,
+};
+
+static const struct of_device_id mdc_dma_of_match[] = {
+	{ .compatible = "img,pistachio-mdc-dma", .data = &pistachio_mdc_data, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, mdc_dma_of_match);
+
+static int img_mdc_runtime_suspend(struct device *dev)
+{
+	struct mdc_dma *mdma = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(mdma->clk);
+
+	return 0;
+}
+
+static int img_mdc_runtime_resume(struct device *dev)
+{
+	struct mdc_dma *mdma = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(mdma->clk);
+}
+
+static int mdc_dma_probe(struct platform_device *pdev)
+{
+	struct mdc_dma *mdma;
+	struct resource *res;
+	unsigned int i;
+	u32 val;
+	int ret;
+
+	mdma = devm_kzalloc(&pdev->dev, sizeof(*mdma), GFP_KERNEL);
+	if (!mdma)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, mdma);
+
+	mdma->soc = of_device_get_match_data(&pdev->dev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mdma->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mdma->regs))
+		return PTR_ERR(mdma->regs);
+
+	mdma->periph_regs = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+							    "img,cr-periph");
+	if (IS_ERR(mdma->periph_regs))
+		return PTR_ERR(mdma->periph_regs);
+
+	mdma->clk = devm_clk_get(&pdev->dev, "sys");
+	if (IS_ERR(mdma->clk))
+		return PTR_ERR(mdma->clk);
+
+	dma_cap_zero(mdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_SLAVE, mdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, mdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, mdma->dma_dev.cap_mask);
+
+	val = mdc_readl(mdma, MDC_GLOBAL_CONFIG_A);
+	mdma->nr_channels = (val >> MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_SHIFT) &
+		MDC_GLOBAL_CONFIG_A_DMA_CONTEXTS_MASK;
+	mdma->nr_threads =
+		1 << ((val >> MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_SHIFT) &
+		      MDC_GLOBAL_CONFIG_A_THREAD_ID_WIDTH_MASK);
+	mdma->bus_width =
+		(1 << ((val >> MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_SHIFT) &
+		       MDC_GLOBAL_CONFIG_A_SYS_DAT_WIDTH_MASK)) / 8;
+	/*
+	 * Although transfer sizes of up to MDC_TRANSFER_SIZE_MASK + 1 bytes
+	 * are supported, this makes it possible for the value reported in
+	 * MDC_ACTIVE_TRANSFER_SIZE to be ambiguous - an active transfer size
+	 * of MDC_TRANSFER_SIZE_MASK may indicate either that 0 bytes or
+	 * MDC_TRANSFER_SIZE_MASK + 1 bytes are remaining.  To eliminate this
+	 * ambiguity, restrict transfer sizes to one bus-width less than the
+	 * actual maximum.
+	 */
+	mdma->max_xfer_size = MDC_TRANSFER_SIZE_MASK + 1 - mdma->bus_width;
+
+	of_property_read_u32(pdev->dev.of_node, "dma-channels",
+			     &mdma->nr_channels);
+	ret = of_property_read_u32(pdev->dev.of_node,
+				   "img,max-burst-multiplier",
+				   &mdma->max_burst_mult);
+	if (ret)
+		return ret;
+
+	mdma->dma_dev.dev = &pdev->dev;
+	mdma->dma_dev.device_prep_slave_sg = mdc_prep_slave_sg;
+	mdma->dma_dev.device_prep_dma_cyclic = mdc_prep_dma_cyclic;
+	mdma->dma_dev.device_prep_dma_memcpy = mdc_prep_dma_memcpy;
+	mdma->dma_dev.device_alloc_chan_resources = mdc_alloc_chan_resources;
+	mdma->dma_dev.device_free_chan_resources = mdc_free_chan_resources;
+	mdma->dma_dev.device_tx_status = mdc_tx_status;
+	mdma->dma_dev.device_issue_pending = mdc_issue_pending;
+	mdma->dma_dev.device_terminate_all = mdc_terminate_all;
+	mdma->dma_dev.device_synchronize = mdc_synchronize;
+	mdma->dma_dev.device_config = mdc_slave_config;
+
+	mdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	mdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	for (i = 1; i <= mdma->bus_width; i <<= 1) {
+		mdma->dma_dev.src_addr_widths |= BIT(i);
+		mdma->dma_dev.dst_addr_widths |= BIT(i);
+	}
+
+	INIT_LIST_HEAD(&mdma->dma_dev.channels);
+	for (i = 0; i < mdma->nr_channels; i++) {
+		struct mdc_chan *mchan = &mdma->channels[i];
+
+		mchan->mdma = mdma;
+		mchan->chan_nr = i;
+		mchan->irq = platform_get_irq(pdev, i);
+		if (mchan->irq < 0)
+			return mchan->irq;
+
+		ret = devm_request_irq(&pdev->dev, mchan->irq, mdc_chan_irq,
+				       IRQ_TYPE_LEVEL_HIGH,
+				       dev_name(&pdev->dev), mchan);
+		if (ret < 0)
+			return ret;
+
+		mchan->vc.desc_free = mdc_desc_free;
+		vchan_init(&mchan->vc, &mdma->dma_dev);
+	}
+
+	mdma->desc_pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+					   sizeof(struct mdc_hw_list_desc),
+					   4, 0);
+	if (!mdma->desc_pool)
+		return -ENOMEM;
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev)) {
+		ret = img_mdc_runtime_resume(&pdev->dev);
+		if (ret)
+			return ret;
+	}
+
+	ret = dma_async_device_register(&mdma->dma_dev);
+	if (ret)
+		goto suspend;
+
+	ret = of_dma_controller_register(pdev->dev.of_node, mdc_of_xlate, mdma);
+	if (ret)
+		goto unregister;
+
+	dev_info(&pdev->dev, "MDC with %u channels and %u threads\n",
+		 mdma->nr_channels, mdma->nr_threads);
+
+	return 0;
+
+unregister:
+	dma_async_device_unregister(&mdma->dma_dev);
+suspend:
+	if (!pm_runtime_enabled(&pdev->dev))
+		img_mdc_runtime_suspend(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int mdc_dma_remove(struct platform_device *pdev)
+{
+	struct mdc_dma *mdma = platform_get_drvdata(pdev);
+	struct mdc_chan *mchan, *next;
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&mdma->dma_dev);
+
+	list_for_each_entry_safe(mchan, next, &mdma->dma_dev.channels,
+				 vc.chan.device_node) {
+		list_del(&mchan->vc.chan.device_node);
+
+		devm_free_irq(&pdev->dev, mchan->irq, mchan);
+
+		tasklet_kill(&mchan->vc.task);
+	}
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		img_mdc_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int img_mdc_suspend_late(struct device *dev)
+{
+	struct mdc_dma *mdma = dev_get_drvdata(dev);
+	int i;
+
+	/* Check that all channels are idle */
+	for (i = 0; i < mdma->nr_channels; i++) {
+		struct mdc_chan *mchan = &mdma->channels[i];
+
+		if (unlikely(mchan->desc))
+			return -EBUSY;
+	}
+
+	return pm_runtime_force_suspend(dev);
+}
+
+static int img_mdc_resume_early(struct device *dev)
+{
+	return pm_runtime_force_resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops img_mdc_pm_ops = {
+	SET_RUNTIME_PM_OPS(img_mdc_runtime_suspend,
+			   img_mdc_runtime_resume, NULL)
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(img_mdc_suspend_late,
+				     img_mdc_resume_early)
+};
+
+static struct platform_driver mdc_dma_driver = {
+	.driver = {
+		.name = "img-mdc-dma",
+		.pm = &img_mdc_pm_ops,
+		.of_match_table = of_match_ptr(mdc_dma_of_match),
+	},
+	.probe = mdc_dma_probe,
+	.remove = mdc_dma_remove,
+};
+module_platform_driver(mdc_dma_driver);
+
+MODULE_DESCRIPTION("IMG Multi-threaded DMA Controller (MDC) driver");
+MODULE_AUTHOR("Andrew Bresticker <abrestic@chromium.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
new file mode 100644
index 0000000..75b6ff0
--- /dev/null
+++ b/drivers/dma/imx-dma.c
@@ -0,0 +1,1268 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// drivers/dma/imx-dma.c
+//
+// This file contains a driver for the Freescale i.MX DMA engine
+// found on i.MX1/21/27
+//
+// Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+// Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include <asm/irq.h>
+#include <linux/platform_data/dma-imx.h>
+
+#include "dmaengine.h"
+#define IMXDMA_MAX_CHAN_DESCRIPTORS	16
+#define IMX_DMA_CHANNELS  16
+
+#define IMX_DMA_2D_SLOTS	2
+#define IMX_DMA_2D_SLOT_A	0
+#define IMX_DMA_2D_SLOT_B	1
+
+#define IMX_DMA_LENGTH_LOOP	((unsigned int)-1)
+#define IMX_DMA_MEMSIZE_32	(0 << 4)
+#define IMX_DMA_MEMSIZE_8	(1 << 4)
+#define IMX_DMA_MEMSIZE_16	(2 << 4)
+#define IMX_DMA_TYPE_LINEAR	(0 << 10)
+#define IMX_DMA_TYPE_2D		(1 << 10)
+#define IMX_DMA_TYPE_FIFO	(2 << 10)
+
+#define IMX_DMA_ERR_BURST     (1 << 0)
+#define IMX_DMA_ERR_REQUEST   (1 << 1)
+#define IMX_DMA_ERR_TRANSFER  (1 << 2)
+#define IMX_DMA_ERR_BUFFER    (1 << 3)
+#define IMX_DMA_ERR_TIMEOUT   (1 << 4)
+
+#define DMA_DCR     0x00		/* Control Register */
+#define DMA_DISR    0x04		/* Interrupt status Register */
+#define DMA_DIMR    0x08		/* Interrupt mask Register */
+#define DMA_DBTOSR  0x0c		/* Burst timeout status Register */
+#define DMA_DRTOSR  0x10		/* Request timeout Register */
+#define DMA_DSESR   0x14		/* Transfer Error Status Register */
+#define DMA_DBOSR   0x18		/* Buffer overflow status Register */
+#define DMA_DBTOCR  0x1c		/* Burst timeout control Register */
+#define DMA_WSRA    0x40		/* W-Size Register A */
+#define DMA_XSRA    0x44		/* X-Size Register A */
+#define DMA_YSRA    0x48		/* Y-Size Register A */
+#define DMA_WSRB    0x4c		/* W-Size Register B */
+#define DMA_XSRB    0x50		/* X-Size Register B */
+#define DMA_YSRB    0x54		/* Y-Size Register B */
+#define DMA_SAR(x)  (0x80 + ((x) << 6))	/* Source Address Registers */
+#define DMA_DAR(x)  (0x84 + ((x) << 6))	/* Destination Address Registers */
+#define DMA_CNTR(x) (0x88 + ((x) << 6))	/* Count Registers */
+#define DMA_CCR(x)  (0x8c + ((x) << 6))	/* Control Registers */
+#define DMA_RSSR(x) (0x90 + ((x) << 6))	/* Request source select Registers */
+#define DMA_BLR(x)  (0x94 + ((x) << 6))	/* Burst length Registers */
+#define DMA_RTOR(x) (0x98 + ((x) << 6))	/* Request timeout Registers */
+#define DMA_BUCR(x) (0x98 + ((x) << 6))	/* Bus Utilization Registers */
+#define DMA_CCNR(x) (0x9C + ((x) << 6))	/* Channel counter Registers */
+
+#define DCR_DRST           (1<<1)
+#define DCR_DEN            (1<<0)
+#define DBTOCR_EN          (1<<15)
+#define DBTOCR_CNT(x)      ((x) & 0x7fff)
+#define CNTR_CNT(x)        ((x) & 0xffffff)
+#define CCR_ACRPT          (1<<14)
+#define CCR_DMOD_LINEAR    (0x0 << 12)
+#define CCR_DMOD_2D        (0x1 << 12)
+#define CCR_DMOD_FIFO      (0x2 << 12)
+#define CCR_DMOD_EOBFIFO   (0x3 << 12)
+#define CCR_SMOD_LINEAR    (0x0 << 10)
+#define CCR_SMOD_2D        (0x1 << 10)
+#define CCR_SMOD_FIFO      (0x2 << 10)
+#define CCR_SMOD_EOBFIFO   (0x3 << 10)
+#define CCR_MDIR_DEC       (1<<9)
+#define CCR_MSEL_B         (1<<8)
+#define CCR_DSIZ_32        (0x0 << 6)
+#define CCR_DSIZ_8         (0x1 << 6)
+#define CCR_DSIZ_16        (0x2 << 6)
+#define CCR_SSIZ_32        (0x0 << 4)
+#define CCR_SSIZ_8         (0x1 << 4)
+#define CCR_SSIZ_16        (0x2 << 4)
+#define CCR_REN            (1<<3)
+#define CCR_RPT            (1<<2)
+#define CCR_FRC            (1<<1)
+#define CCR_CEN            (1<<0)
+#define RTOR_EN            (1<<15)
+#define RTOR_CLK           (1<<14)
+#define RTOR_PSC           (1<<13)
+
+enum  imxdma_prep_type {
+	IMXDMA_DESC_MEMCPY,
+	IMXDMA_DESC_INTERLEAVED,
+	IMXDMA_DESC_SLAVE_SG,
+	IMXDMA_DESC_CYCLIC,
+};
+
+struct imx_dma_2d_config {
+	u16		xsr;
+	u16		ysr;
+	u16		wsr;
+	int		count;
+};
+
+struct imxdma_desc {
+	struct list_head		node;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	dma_addr_t			src;
+	dma_addr_t			dest;
+	size_t				len;
+	enum dma_transfer_direction	direction;
+	enum imxdma_prep_type		type;
+	/* For memcpy and interleaved */
+	unsigned int			config_port;
+	unsigned int			config_mem;
+	/* For interleaved transfers */
+	unsigned int			x;
+	unsigned int			y;
+	unsigned int			w;
+	/* For slave sg and cyclic */
+	struct scatterlist		*sg;
+	unsigned int			sgcount;
+};
+
+struct imxdma_channel {
+	int				hw_chaining;
+	struct timer_list		watchdog;
+	struct imxdma_engine		*imxdma;
+	unsigned int			channel;
+
+	struct tasklet_struct		dma_tasklet;
+	struct list_head		ld_free;
+	struct list_head		ld_queue;
+	struct list_head		ld_active;
+	int				descs_allocated;
+	enum dma_slave_buswidth		word_size;
+	dma_addr_t			per_address;
+	u32				watermark_level;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	enum dma_status			status;
+	int				dma_request;
+	struct scatterlist		*sg_list;
+	u32				ccr_from_device;
+	u32				ccr_to_device;
+	bool				enabled_2d;
+	int				slot_2d;
+	unsigned int			irq;
+};
+
+enum imx_dma_type {
+	IMX1_DMA,
+	IMX21_DMA,
+	IMX27_DMA,
+};
+
+struct imxdma_engine {
+	struct device			*dev;
+	struct device_dma_parameters	dma_parms;
+	struct dma_device		dma_device;
+	void __iomem			*base;
+	struct clk			*dma_ahb;
+	struct clk			*dma_ipg;
+	spinlock_t			lock;
+	struct imx_dma_2d_config	slots_2d[IMX_DMA_2D_SLOTS];
+	struct imxdma_channel		channel[IMX_DMA_CHANNELS];
+	enum imx_dma_type		devtype;
+	unsigned int			irq;
+	unsigned int			irq_err;
+
+};
+
+struct imxdma_filter_data {
+	struct imxdma_engine	*imxdma;
+	int			 request;
+};
+
+static const struct platform_device_id imx_dma_devtype[] = {
+	{
+		.name = "imx1-dma",
+		.driver_data = IMX1_DMA,
+	}, {
+		.name = "imx21-dma",
+		.driver_data = IMX21_DMA,
+	}, {
+		.name = "imx27-dma",
+		.driver_data = IMX27_DMA,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(platform, imx_dma_devtype);
+
+static const struct of_device_id imx_dma_of_dev_id[] = {
+	{
+		.compatible = "fsl,imx1-dma",
+		.data = &imx_dma_devtype[IMX1_DMA],
+	}, {
+		.compatible = "fsl,imx21-dma",
+		.data = &imx_dma_devtype[IMX21_DMA],
+	}, {
+		.compatible = "fsl,imx27-dma",
+		.data = &imx_dma_devtype[IMX27_DMA],
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, imx_dma_of_dev_id);
+
+static inline int is_imx1_dma(struct imxdma_engine *imxdma)
+{
+	return imxdma->devtype == IMX1_DMA;
+}
+
+static inline int is_imx27_dma(struct imxdma_engine *imxdma)
+{
+	return imxdma->devtype == IMX27_DMA;
+}
+
+static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct imxdma_channel, chan);
+}
+
+static inline bool imxdma_chan_is_doing_cyclic(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_desc *desc;
+
+	if (!list_empty(&imxdmac->ld_active)) {
+		desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc,
+					node);
+		if (desc->type == IMXDMA_DESC_CYCLIC)
+			return true;
+	}
+	return false;
+}
+
+
+
+static void imx_dmav1_writel(struct imxdma_engine *imxdma, unsigned val,
+			     unsigned offset)
+{
+	__raw_writel(val, imxdma->base + offset);
+}
+
+static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset)
+{
+	return __raw_readl(imxdma->base + offset);
+}
+
+static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+
+	if (is_imx27_dma(imxdma))
+		return imxdmac->hw_chaining;
+	else
+		return 0;
+}
+
+/*
+ * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
+ */
+static inline int imxdma_sg_next(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct scatterlist *sg = d->sg;
+	unsigned long now;
+
+	now = min(d->len, sg_dma_len(sg));
+	if (d->len != IMX_DMA_LENGTH_LOOP)
+		d->len -= now;
+
+	if (d->direction == DMA_DEV_TO_MEM)
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_DAR(imxdmac->channel));
+	else
+		imx_dmav1_writel(imxdma, sg->dma_address,
+				 DMA_SAR(imxdmac->channel));
+
+	imx_dmav1_writel(imxdma, now, DMA_CNTR(imxdmac->channel));
+
+	dev_dbg(imxdma->dev, " %s channel: %d dst 0x%08x, src 0x%08x, "
+		"size 0x%08x\n", __func__, imxdmac->channel,
+		 imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
+		 imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
+
+	return now;
+}
+
+static void imxdma_enable_hw(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	local_irq_save(flags);
+
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) &
+			 ~(1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) |
+			 CCR_CEN | CCR_ACRPT, DMA_CCR(channel));
+
+	if (!is_imx1_dma(imxdma) &&
+			d->sg && imxdma_hw_chain(imxdmac)) {
+		d->sg = sg_next(d->sg);
+		if (d->sg) {
+			u32 tmp;
+			imxdma_sg_next(d);
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(channel));
+			imx_dmav1_writel(imxdma, tmp | CCR_RPT | CCR_ACRPT,
+					 DMA_CCR(channel));
+		}
+	}
+
+	local_irq_restore(flags);
+}
+
+static void imxdma_disable_hw(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+	unsigned long flags;
+
+	dev_dbg(imxdma->dev, "%s channel %d\n", __func__, channel);
+
+	if (imxdma_hw_chain(imxdmac))
+		del_timer(&imxdmac->watchdog);
+
+	local_irq_save(flags);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_DIMR) |
+			 (1 << channel), DMA_DIMR);
+	imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) &
+			 ~CCR_CEN, DMA_CCR(channel));
+	imx_dmav1_writel(imxdma, 1 << channel, DMA_DISR);
+	local_irq_restore(flags);
+}
+
+static void imxdma_watchdog(struct timer_list *t)
+{
+	struct imxdma_channel *imxdmac = from_timer(imxdmac, t, watchdog);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int channel = imxdmac->channel;
+
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(channel));
+
+	/* Tasklet watchdog error handler */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+	dev_dbg(imxdma->dev, "channel %d: watchdog timeout!\n",
+		imxdmac->channel);
+}
+
+static irqreturn_t imxdma_err_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	unsigned int err_mask;
+	int i, disr;
+	int errcode;
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	err_mask = imx_dmav1_readl(imxdma, DMA_DBTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DRTOSR) |
+		   imx_dmav1_readl(imxdma, DMA_DSESR)  |
+		   imx_dmav1_readl(imxdma, DMA_DBOSR);
+
+	if (!err_mask)
+		return IRQ_HANDLED;
+
+	imx_dmav1_writel(imxdma, disr & err_mask, DMA_DISR);
+
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (!(err_mask & (1 << i)))
+			continue;
+		errcode = 0;
+
+		if (imx_dmav1_readl(imxdma, DMA_DBTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBTOSR);
+			errcode |= IMX_DMA_ERR_BURST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DRTOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DRTOSR);
+			errcode |= IMX_DMA_ERR_REQUEST;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DSESR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DSESR);
+			errcode |= IMX_DMA_ERR_TRANSFER;
+		}
+		if (imx_dmav1_readl(imxdma, DMA_DBOSR) & (1 << i)) {
+			imx_dmav1_writel(imxdma, 1 << i, DMA_DBOSR);
+			errcode |= IMX_DMA_ERR_BUFFER;
+		}
+		/* Tasklet error handler */
+		tasklet_schedule(&imxdma->channel[i].dma_tasklet);
+
+		dev_warn(imxdma->dev,
+			 "DMA timeout on channel %d -%s%s%s%s\n", i,
+			 errcode & IMX_DMA_ERR_BURST ?    " burst" : "",
+			 errcode & IMX_DMA_ERR_REQUEST ?  " request" : "",
+			 errcode & IMX_DMA_ERR_TRANSFER ? " transfer" : "",
+			 errcode & IMX_DMA_ERR_BUFFER ?   " buffer" : "");
+	}
+	return IRQ_HANDLED;
+}
+
+static void dma_irq_handle_channel(struct imxdma_channel *imxdmac)
+{
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int chno = imxdmac->channel;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	if (list_empty(&imxdmac->ld_active)) {
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+		goto out;
+	}
+
+	desc = list_first_entry(&imxdmac->ld_active,
+				struct imxdma_desc,
+				node);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	if (desc->sg) {
+		u32 tmp;
+		desc->sg = sg_next(desc->sg);
+
+		if (desc->sg) {
+			imxdma_sg_next(desc);
+
+			tmp = imx_dmav1_readl(imxdma, DMA_CCR(chno));
+
+			if (imxdma_hw_chain(imxdmac)) {
+				/* FIXME: The timeout should probably be
+				 * configurable
+				 */
+				mod_timer(&imxdmac->watchdog,
+					jiffies + msecs_to_jiffies(500));
+
+				tmp |= CCR_CEN | CCR_RPT | CCR_ACRPT;
+				imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+			} else {
+				imx_dmav1_writel(imxdma, tmp & ~CCR_CEN,
+						 DMA_CCR(chno));
+				tmp |= CCR_CEN;
+			}
+
+			imx_dmav1_writel(imxdma, tmp, DMA_CCR(chno));
+
+			if (imxdma_chan_is_doing_cyclic(imxdmac))
+				/* Tasklet progression */
+				tasklet_schedule(&imxdmac->dma_tasklet);
+
+			return;
+		}
+
+		if (imxdma_hw_chain(imxdmac)) {
+			del_timer(&imxdmac->watchdog);
+			return;
+		}
+	}
+
+out:
+	imx_dmav1_writel(imxdma, 0, DMA_CCR(chno));
+	/* Tasklet irq */
+	tasklet_schedule(&imxdmac->dma_tasklet);
+}
+
+static irqreturn_t dma_irq_handler(int irq, void *dev_id)
+{
+	struct imxdma_engine *imxdma = dev_id;
+	int i, disr;
+
+	if (!is_imx1_dma(imxdma))
+		imxdma_err_handler(irq, dev_id);
+
+	disr = imx_dmav1_readl(imxdma, DMA_DISR);
+
+	dev_dbg(imxdma->dev, "%s called, disr=0x%08x\n", __func__, disr);
+
+	imx_dmav1_writel(imxdma, disr, DMA_DISR);
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		if (disr & (1 << i))
+			dma_irq_handle_channel(&imxdma->channel[i]);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int imxdma_xfer_desc(struct imxdma_desc *d)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	int slot = -1;
+	int i;
+
+	/* Configure and enable */
+	switch (d->type) {
+	case IMXDMA_DESC_INTERLEAVED:
+		/* Try to get a free 2D slot */
+		for (i = 0; i < IMX_DMA_2D_SLOTS; i++) {
+			if ((imxdma->slots_2d[i].count > 0) &&
+			((imxdma->slots_2d[i].xsr != d->x) ||
+			(imxdma->slots_2d[i].ysr != d->y) ||
+			(imxdma->slots_2d[i].wsr != d->w)))
+				continue;
+			slot = i;
+			break;
+		}
+		if (slot < 0)
+			return -EBUSY;
+
+		imxdma->slots_2d[slot].xsr = d->x;
+		imxdma->slots_2d[slot].ysr = d->y;
+		imxdma->slots_2d[slot].wsr = d->w;
+		imxdma->slots_2d[slot].count++;
+
+		imxdmac->slot_2d = slot;
+		imxdmac->enabled_2d = true;
+
+		if (slot == IMX_DMA_2D_SLOT_A) {
+			d->config_mem &= ~CCR_MSEL_B;
+			d->config_port &= ~CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRA);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRA);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRA);
+		} else {
+			d->config_mem |= CCR_MSEL_B;
+			d->config_port |= CCR_MSEL_B;
+			imx_dmav1_writel(imxdma, d->x, DMA_XSRB);
+			imx_dmav1_writel(imxdma, d->y, DMA_YSRB);
+			imx_dmav1_writel(imxdma, d->w, DMA_WSRB);
+		}
+		/*
+		 * We fall-through here intentionally, since a 2D transfer is
+		 * similar to MEMCPY just adding the 2D slot configuration.
+		 */
+	case IMXDMA_DESC_MEMCPY:
+		imx_dmav1_writel(imxdma, d->src, DMA_SAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->dest, DMA_DAR(imxdmac->channel));
+		imx_dmav1_writel(imxdma, d->config_mem | (d->config_port << 2),
+			 DMA_CCR(imxdmac->channel));
+
+		imx_dmav1_writel(imxdma, d->len, DMA_CNTR(imxdmac->channel));
+
+		dev_dbg(imxdma->dev,
+			"%s channel: %d dest=0x%08llx src=0x%08llx dma_length=%zu\n",
+			__func__, imxdmac->channel,
+			(unsigned long long)d->dest,
+			(unsigned long long)d->src, d->len);
+
+		break;
+	/* Cyclic transfer is the same as slave_sg with special sg configuration. */
+	case IMXDMA_DESC_CYCLIC:
+	case IMXDMA_DESC_SLAVE_SG:
+		if (d->direction == DMA_DEV_TO_MEM) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_SAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_from_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev,
+				"%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (dev2mem)\n",
+				__func__, imxdmac->channel,
+				d->sg, d->sgcount, d->len,
+				(unsigned long long)imxdmac->per_address);
+		} else if (d->direction == DMA_MEM_TO_DEV) {
+			imx_dmav1_writel(imxdma, imxdmac->per_address,
+					 DMA_DAR(imxdmac->channel));
+			imx_dmav1_writel(imxdma, imxdmac->ccr_to_device,
+					 DMA_CCR(imxdmac->channel));
+
+			dev_dbg(imxdma->dev,
+				"%s channel: %d sg=%p sgcount=%d total length=%zu dev_addr=0x%08llx (mem2dev)\n",
+				__func__, imxdmac->channel,
+				d->sg, d->sgcount, d->len,
+				(unsigned long long)imxdmac->per_address);
+		} else {
+			dev_err(imxdma->dev, "%s channel: %d bad dma mode\n",
+				__func__, imxdmac->channel);
+			return -EINVAL;
+		}
+
+		imxdma_sg_next(d);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+	imxdma_enable_hw(d);
+	return 0;
+}
+
+static void imxdma_tasklet(unsigned long data)
+{
+	struct imxdma_channel *imxdmac = (void *)data;
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+
+	if (list_empty(&imxdmac->ld_active)) {
+		/* Someone might have called terminate all */
+		spin_unlock_irqrestore(&imxdma->lock, flags);
+		return;
+	}
+	desc = list_first_entry(&imxdmac->ld_active, struct imxdma_desc, node);
+
+	/* If we are dealing with a cyclic descriptor, keep it on ld_active
+	 * and dont mark the descriptor as complete.
+	 * Only in non-cyclic cases it would be marked as complete
+	 */
+	if (imxdma_chan_is_doing_cyclic(imxdmac))
+		goto out;
+	else
+		dma_cookie_complete(&desc->desc);
+
+	/* Free 2D slot if it was an interleaved transfer */
+	if (imxdmac->enabled_2d) {
+		imxdma->slots_2d[imxdmac->slot_2d].count--;
+		imxdmac->enabled_2d = false;
+	}
+
+	list_move_tail(imxdmac->ld_active.next, &imxdmac->ld_free);
+
+	if (!list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue, struct imxdma_desc,
+					node);
+		list_move_tail(imxdmac->ld_queue.next, &imxdmac->ld_active);
+		if (imxdma_xfer_desc(desc) < 0)
+			dev_warn(imxdma->dev, "%s: channel: %d couldn't xfer desc\n",
+				 __func__, imxdmac->channel);
+	}
+out:
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	dmaengine_desc_get_callback_invoke(&desc->desc, NULL);
+}
+
+static int imxdma_terminate_all(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned long flags;
+
+	imxdma_disable_hw(imxdmac);
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+	return 0;
+}
+
+static int imxdma_config(struct dma_chan *chan,
+			 struct dma_slave_config *dmaengine_cfg)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	unsigned int mode = 0;
+
+	if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+		imxdmac->per_address = dmaengine_cfg->src_addr;
+		imxdmac->watermark_level = dmaengine_cfg->src_maxburst;
+		imxdmac->word_size = dmaengine_cfg->src_addr_width;
+	} else {
+		imxdmac->per_address = dmaengine_cfg->dst_addr;
+		imxdmac->watermark_level = dmaengine_cfg->dst_maxburst;
+		imxdmac->word_size = dmaengine_cfg->dst_addr_width;
+	}
+
+	switch (imxdmac->word_size) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		mode = IMX_DMA_MEMSIZE_8;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		mode = IMX_DMA_MEMSIZE_16;
+		break;
+	default:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		mode = IMX_DMA_MEMSIZE_32;
+		break;
+	}
+
+	imxdmac->hw_chaining = 0;
+
+	imxdmac->ccr_from_device = (mode | IMX_DMA_TYPE_FIFO) |
+		((IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) << 2) |
+		CCR_REN;
+	imxdmac->ccr_to_device =
+		(IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR) |
+		((mode | IMX_DMA_TYPE_FIFO) << 2) | CCR_REN;
+	imx_dmav1_writel(imxdma, imxdmac->dma_request,
+			 DMA_RSSR(imxdmac->channel));
+
+	/* Set burst length */
+	imx_dmav1_writel(imxdma, imxdmac->watermark_level *
+			 imxdmac->word_size, DMA_BLR(imxdmac->channel));
+
+	return 0;
+}
+
+static enum dma_status imxdma_tx_status(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static dma_cookie_t imxdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(tx->chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	list_move_tail(imxdmac->ld_free.next, &imxdmac->ld_queue);
+	cookie = dma_cookie_assign(tx);
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	return cookie;
+}
+
+static int imxdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+
+	if (data != NULL)
+		imxdmac->dma_request = data->dma_request;
+
+	while (imxdmac->descs_allocated < IMXDMA_MAX_CHAN_DESCRIPTORS) {
+		struct imxdma_desc *desc;
+
+		desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+		if (!desc)
+			break;
+		memset(&desc->desc, 0, sizeof(struct dma_async_tx_descriptor));
+		dma_async_tx_descriptor_init(&desc->desc, chan);
+		desc->desc.tx_submit = imxdma_tx_submit;
+		/* txd.flags will be overwritten in prep funcs */
+		desc->desc.flags = DMA_CTRL_ACK;
+		desc->status = DMA_COMPLETE;
+
+		list_add_tail(&desc->node, &imxdmac->ld_free);
+		imxdmac->descs_allocated++;
+	}
+
+	if (!imxdmac->descs_allocated)
+		return -ENOMEM;
+
+	return imxdmac->descs_allocated;
+}
+
+static void imxdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+
+	imxdma_disable_hw(imxdmac);
+	list_splice_tail_init(&imxdmac->ld_active, &imxdmac->ld_free);
+	list_splice_tail_init(&imxdmac->ld_queue, &imxdmac->ld_free);
+
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &imxdmac->ld_free, node) {
+		kfree(desc);
+		imxdmac->descs_allocated--;
+	}
+	INIT_LIST_HEAD(&imxdmac->ld_free);
+
+	kfree(imxdmac->sg_list);
+	imxdmac->sg_list = NULL;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct scatterlist *sg;
+	int i, dma_length = 0;
+	struct imxdma_desc *desc;
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_length += sg_dma_len(sg);
+	}
+
+	switch (imxdmac->word_size) {
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		if (sg_dma_len(sgl) & 3 || sgl->dma_address & 3)
+			return NULL;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		if (sg_dma_len(sgl) & 1 || sgl->dma_address & 1)
+			return NULL;
+		break;
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		break;
+	default:
+		return NULL;
+	}
+
+	desc->type = IMXDMA_DESC_SLAVE_SG;
+	desc->sg = sgl;
+	desc->sgcount = sg_len;
+	desc->len = dma_length;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	int i;
+	unsigned int periods = buf_len / period_len;
+
+	dev_dbg(imxdma->dev, "%s channel: %d buf_len=%zu period_len=%zu\n",
+			__func__, imxdmac->channel, buf_len, period_len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	kfree(imxdmac->sg_list);
+
+	imxdmac->sg_list = kcalloc(periods + 1,
+			sizeof(struct scatterlist), GFP_ATOMIC);
+	if (!imxdmac->sg_list)
+		return NULL;
+
+	sg_init_table(imxdmac->sg_list, periods);
+
+	for (i = 0; i < periods; i++) {
+		sg_assign_page(&imxdmac->sg_list[i], NULL);
+		imxdmac->sg_list[i].offset = 0;
+		imxdmac->sg_list[i].dma_address = dma_addr;
+		sg_dma_len(&imxdmac->sg_list[i]) = period_len;
+		dma_addr += period_len;
+	}
+
+	/* close the loop */
+	sg_chain(imxdmac->sg_list, periods + 1, imxdmac->sg_list);
+
+	desc->type = IMXDMA_DESC_CYCLIC;
+	desc->sg = imxdmac->sg_list;
+	desc->sgcount = periods;
+	desc->len = IMX_DMA_LENGTH_LOOP;
+	desc->direction = direction;
+	if (direction == DMA_DEV_TO_MEM) {
+		desc->src = imxdmac->per_address;
+	} else {
+		desc->dest = imxdmac->per_address;
+	}
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src=0x%llx dst=0x%llx len=%zu\n",
+		__func__, imxdmac->channel, (unsigned long long)src,
+		(unsigned long long)dest, len);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_MEMCPY;
+	desc->src = src;
+	desc->dest = dest;
+	desc->len = len;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->config_mem = IMX_DMA_MEMSIZE_32 | IMX_DMA_TYPE_LINEAR;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static struct dma_async_tx_descriptor *imxdma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+
+	dev_dbg(imxdma->dev, "%s channel: %d src_start=0x%llx dst_start=0x%llx\n"
+		"   src_sgl=%s dst_sgl=%s numf=%zu frame_size=%zu\n", __func__,
+		imxdmac->channel, (unsigned long long)xt->src_start,
+		(unsigned long long) xt->dst_start,
+		xt->src_sgl ? "true" : "false", xt->dst_sgl ? "true" : "false",
+		xt->numf, xt->frame_size);
+
+	if (list_empty(&imxdmac->ld_free) ||
+	    imxdma_chan_is_doing_cyclic(imxdmac))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf <= 0 || xt->dir != DMA_MEM_TO_MEM)
+		return NULL;
+
+	desc = list_first_entry(&imxdmac->ld_free, struct imxdma_desc, node);
+
+	desc->type = IMXDMA_DESC_INTERLEAVED;
+	desc->src = xt->src_start;
+	desc->dest = xt->dst_start;
+	desc->x = xt->sgl[0].size;
+	desc->y = xt->numf;
+	desc->w = xt->sgl[0].icg + desc->x;
+	desc->len = desc->x * desc->y;
+	desc->direction = DMA_MEM_TO_MEM;
+	desc->config_port = IMX_DMA_MEMSIZE_32;
+	desc->config_mem = IMX_DMA_MEMSIZE_32;
+	if (xt->src_sgl)
+		desc->config_mem |= IMX_DMA_TYPE_2D;
+	if (xt->dst_sgl)
+		desc->config_port |= IMX_DMA_TYPE_2D;
+	desc->desc.callback = NULL;
+	desc->desc.callback_param = NULL;
+
+	return &desc->desc;
+}
+
+static void imxdma_issue_pending(struct dma_chan *chan)
+{
+	struct imxdma_channel *imxdmac = to_imxdma_chan(chan);
+	struct imxdma_engine *imxdma = imxdmac->imxdma;
+	struct imxdma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imxdma->lock, flags);
+	if (list_empty(&imxdmac->ld_active) &&
+	    !list_empty(&imxdmac->ld_queue)) {
+		desc = list_first_entry(&imxdmac->ld_queue,
+					struct imxdma_desc, node);
+
+		if (imxdma_xfer_desc(desc) < 0) {
+			dev_warn(imxdma->dev,
+				 "%s: channel: %d couldn't issue DMA xfer\n",
+				 __func__, imxdmac->channel);
+		} else {
+			list_move_tail(imxdmac->ld_queue.next,
+				       &imxdmac->ld_active);
+		}
+	}
+	spin_unlock_irqrestore(&imxdma->lock, flags);
+}
+
+static bool imxdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct imxdma_filter_data *fdata = param;
+	struct imxdma_channel *imxdma_chan = to_imxdma_chan(chan);
+
+	if (chan->device->dev != fdata->imxdma->dev)
+		return false;
+
+	imxdma_chan->dma_request = fdata->request;
+	chan->private = NULL;
+
+	return true;
+}
+
+static struct dma_chan *imxdma_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	int count = dma_spec->args_count;
+	struct imxdma_engine *imxdma = ofdma->of_dma_data;
+	struct imxdma_filter_data fdata = {
+		.imxdma = imxdma,
+	};
+
+	if (count != 1)
+		return NULL;
+
+	fdata.request = dma_spec->args[0];
+
+	return dma_request_channel(imxdma->dma_device.cap_mask,
+					imxdma_filter_fn, &fdata);
+}
+
+static int __init imxdma_probe(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma;
+	struct resource *res;
+	const struct of_device_id *of_id;
+	int ret, i;
+	int irq, irq_err;
+
+	of_id = of_match_device(imx_dma_of_dev_id, &pdev->dev);
+	if (of_id)
+		pdev->id_entry = of_id->data;
+
+	imxdma = devm_kzalloc(&pdev->dev, sizeof(*imxdma), GFP_KERNEL);
+	if (!imxdma)
+		return -ENOMEM;
+
+	imxdma->dev = &pdev->dev;
+	imxdma->devtype = pdev->id_entry->driver_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	imxdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(imxdma->base))
+		return PTR_ERR(imxdma->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	imxdma->dma_ipg = devm_clk_get(&pdev->dev, "ipg");
+	if (IS_ERR(imxdma->dma_ipg))
+		return PTR_ERR(imxdma->dma_ipg);
+
+	imxdma->dma_ahb = devm_clk_get(&pdev->dev, "ahb");
+	if (IS_ERR(imxdma->dma_ahb))
+		return PTR_ERR(imxdma->dma_ahb);
+
+	ret = clk_prepare_enable(imxdma->dma_ipg);
+	if (ret)
+		return ret;
+	ret = clk_prepare_enable(imxdma->dma_ahb);
+	if (ret)
+		goto disable_dma_ipg_clk;
+
+	/* reset DMA module */
+	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
+
+	if (is_imx1_dma(imxdma)) {
+		ret = devm_request_irq(&pdev->dev, irq,
+				       dma_irq_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
+			goto disable_dma_ahb_clk;
+		}
+		imxdma->irq = irq;
+
+		irq_err = platform_get_irq(pdev, 1);
+		if (irq_err < 0) {
+			ret = irq_err;
+			goto disable_dma_ahb_clk;
+		}
+
+		ret = devm_request_irq(&pdev->dev, irq_err,
+				       imxdma_err_handler, 0, "DMA", imxdma);
+		if (ret) {
+			dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
+			goto disable_dma_ahb_clk;
+		}
+		imxdma->irq_err = irq_err;
+	}
+
+	/* enable DMA module */
+	imx_dmav1_writel(imxdma, DCR_DEN, DMA_DCR);
+
+	/* clear all interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DISR);
+
+	/* disable interrupts */
+	imx_dmav1_writel(imxdma, (1 << IMX_DMA_CHANNELS) - 1, DMA_DIMR);
+
+	INIT_LIST_HEAD(&imxdma->dma_device.channels);
+
+	dma_cap_set(DMA_SLAVE, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, imxdma->dma_device.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, imxdma->dma_device.cap_mask);
+
+	/* Initialize 2D global parameters */
+	for (i = 0; i < IMX_DMA_2D_SLOTS; i++)
+		imxdma->slots_2d[i].count = 0;
+
+	spin_lock_init(&imxdma->lock);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		if (!is_imx1_dma(imxdma)) {
+			ret = devm_request_irq(&pdev->dev, irq + i,
+					dma_irq_handler, 0, "DMA", imxdma);
+			if (ret) {
+				dev_warn(imxdma->dev, "Can't register IRQ %d "
+					 "for DMA channel %d\n",
+					 irq + i, i);
+				goto disable_dma_ahb_clk;
+			}
+
+			imxdmac->irq = irq + i;
+			timer_setup(&imxdmac->watchdog, imxdma_watchdog, 0);
+		}
+
+		imxdmac->imxdma = imxdma;
+
+		INIT_LIST_HEAD(&imxdmac->ld_queue);
+		INIT_LIST_HEAD(&imxdmac->ld_free);
+		INIT_LIST_HEAD(&imxdmac->ld_active);
+
+		tasklet_init(&imxdmac->dma_tasklet, imxdma_tasklet,
+			     (unsigned long)imxdmac);
+		imxdmac->chan.device = &imxdma->dma_device;
+		dma_cookie_init(&imxdmac->chan);
+		imxdmac->channel = i;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&imxdmac->chan.device_node,
+			      &imxdma->dma_device.channels);
+	}
+
+	imxdma->dma_device.dev = &pdev->dev;
+
+	imxdma->dma_device.device_alloc_chan_resources = imxdma_alloc_chan_resources;
+	imxdma->dma_device.device_free_chan_resources = imxdma_free_chan_resources;
+	imxdma->dma_device.device_tx_status = imxdma_tx_status;
+	imxdma->dma_device.device_prep_slave_sg = imxdma_prep_slave_sg;
+	imxdma->dma_device.device_prep_dma_cyclic = imxdma_prep_dma_cyclic;
+	imxdma->dma_device.device_prep_dma_memcpy = imxdma_prep_dma_memcpy;
+	imxdma->dma_device.device_prep_interleaved_dma = imxdma_prep_dma_interleaved;
+	imxdma->dma_device.device_config = imxdma_config;
+	imxdma->dma_device.device_terminate_all = imxdma_terminate_all;
+	imxdma->dma_device.device_issue_pending = imxdma_issue_pending;
+
+	platform_set_drvdata(pdev, imxdma);
+
+	imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES;
+	imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms;
+	dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
+
+	ret = dma_async_device_register(&imxdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto disable_dma_ahb_clk;
+	}
+
+	if (pdev->dev.of_node) {
+		ret = of_dma_controller_register(pdev->dev.of_node,
+				imxdma_xlate, imxdma);
+		if (ret) {
+			dev_err(&pdev->dev, "unable to register of_dma_controller\n");
+			goto err_of_dma_controller;
+		}
+	}
+
+	return 0;
+
+err_of_dma_controller:
+	dma_async_device_unregister(&imxdma->dma_device);
+disable_dma_ahb_clk:
+	clk_disable_unprepare(imxdma->dma_ahb);
+disable_dma_ipg_clk:
+	clk_disable_unprepare(imxdma->dma_ipg);
+	return ret;
+}
+
+static void imxdma_free_irq(struct platform_device *pdev, struct imxdma_engine *imxdma)
+{
+	int i;
+
+	if (is_imx1_dma(imxdma)) {
+		disable_irq(imxdma->irq);
+		disable_irq(imxdma->irq_err);
+	}
+
+	for (i = 0; i < IMX_DMA_CHANNELS; i++) {
+		struct imxdma_channel *imxdmac = &imxdma->channel[i];
+
+		if (!is_imx1_dma(imxdma))
+			disable_irq(imxdmac->irq);
+
+		tasklet_kill(&imxdmac->dma_tasklet);
+	}
+}
+
+static int imxdma_remove(struct platform_device *pdev)
+{
+	struct imxdma_engine *imxdma = platform_get_drvdata(pdev);
+
+	imxdma_free_irq(pdev, imxdma);
+
+        dma_async_device_unregister(&imxdma->dma_device);
+
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
+	clk_disable_unprepare(imxdma->dma_ipg);
+	clk_disable_unprepare(imxdma->dma_ahb);
+
+        return 0;
+}
+
+static struct platform_driver imxdma_driver = {
+	.driver		= {
+		.name	= "imx-dma",
+		.of_match_table = imx_dma_of_dev_id,
+	},
+	.id_table	= imx_dma_devtype,
+	.remove		= imxdma_remove,
+};
+
+static int __init imxdma_module_init(void)
+{
+	return platform_driver_probe(&imxdma_driver, imxdma_probe);
+}
+subsys_initcall(imxdma_module_init);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX dma driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
new file mode 100644
index 0000000..cb1b44d
--- /dev/null
+++ b/drivers/dma/imx-sdma.c
@@ -0,0 +1,2161 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// drivers/dma/imx-sdma.c
+//
+// This file contains a driver for the Freescale Smart DMA engine
+//
+// Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+//
+// Based on code from Freescale:
+//
+// Copyright 2004-2009 Freescale Semiconductor, Inc. All Rights Reserved.
+
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/workqueue.h>
+
+#include <asm/irq.h>
+#include <linux/platform_data/dma-imx-sdma.h>
+#include <linux/platform_data/dma-imx.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/* SDMA registers */
+#define SDMA_H_C0PTR		0x000
+#define SDMA_H_INTR		0x004
+#define SDMA_H_STATSTOP		0x008
+#define SDMA_H_START		0x00c
+#define SDMA_H_EVTOVR		0x010
+#define SDMA_H_DSPOVR		0x014
+#define SDMA_H_HOSTOVR		0x018
+#define SDMA_H_EVTPEND		0x01c
+#define SDMA_H_DSPENBL		0x020
+#define SDMA_H_RESET		0x024
+#define SDMA_H_EVTERR		0x028
+#define SDMA_H_INTRMSK		0x02c
+#define SDMA_H_PSW		0x030
+#define SDMA_H_EVTERRDBG	0x034
+#define SDMA_H_CONFIG		0x038
+#define SDMA_ONCE_ENB		0x040
+#define SDMA_ONCE_DATA		0x044
+#define SDMA_ONCE_INSTR		0x048
+#define SDMA_ONCE_STAT		0x04c
+#define SDMA_ONCE_CMD		0x050
+#define SDMA_EVT_MIRROR		0x054
+#define SDMA_ILLINSTADDR	0x058
+#define SDMA_CHN0ADDR		0x05c
+#define SDMA_ONCE_RTB		0x060
+#define SDMA_XTRIG_CONF1	0x070
+#define SDMA_XTRIG_CONF2	0x074
+#define SDMA_CHNENBL0_IMX35	0x200
+#define SDMA_CHNENBL0_IMX31	0x080
+#define SDMA_CHNPRI_0		0x100
+
+/*
+ * Buffer descriptor status values.
+ */
+#define BD_DONE  0x01
+#define BD_WRAP  0x02
+#define BD_CONT  0x04
+#define BD_INTR  0x08
+#define BD_RROR  0x10
+#define BD_LAST  0x20
+#define BD_EXTD  0x80
+
+/*
+ * Data Node descriptor status values.
+ */
+#define DND_END_OF_FRAME  0x80
+#define DND_END_OF_XFER   0x40
+#define DND_DONE          0x20
+#define DND_UNUSED        0x01
+
+/*
+ * IPCV2 descriptor status values.
+ */
+#define BD_IPCV2_END_OF_FRAME  0x40
+
+#define IPCV2_MAX_NODES        50
+/*
+ * Error bit set in the CCB status field by the SDMA,
+ * in setbd routine, in case of a transfer error
+ */
+#define DATA_ERROR  0x10000000
+
+/*
+ * Buffer descriptor commands.
+ */
+#define C0_ADDR             0x01
+#define C0_LOAD             0x02
+#define C0_DUMP             0x03
+#define C0_SETCTX           0x07
+#define C0_GETCTX           0x03
+#define C0_SETDM            0x01
+#define C0_SETPM            0x04
+#define C0_GETDM            0x02
+#define C0_GETPM            0x08
+/*
+ * Change endianness indicator in the BD command field
+ */
+#define CHANGE_ENDIANNESS   0x80
+
+/*
+ *  p_2_p watermark_level description
+ *	Bits		Name			Description
+ *	0-7		Lower WML		Lower watermark level
+ *	8		PS			1: Pad Swallowing
+ *						0: No Pad Swallowing
+ *	9		PA			1: Pad Adding
+ *						0: No Pad Adding
+ *	10		SPDIF			If this bit is set both source
+ *						and destination are on SPBA
+ *	11		Source Bit(SP)		1: Source on SPBA
+ *						0: Source on AIPS
+ *	12		Destination Bit(DP)	1: Destination on SPBA
+ *						0: Destination on AIPS
+ *	13-15		---------		MUST BE 0
+ *	16-23		Higher WML		HWML
+ *	24-27		N			Total number of samples after
+ *						which Pad adding/Swallowing
+ *						must be done. It must be odd.
+ *	28		Lower WML Event(LWE)	SDMA events reg to check for
+ *						LWML event mask
+ *						0: LWE in EVENTS register
+ *						1: LWE in EVENTS2 register
+ *	29		Higher WML Event(HWE)	SDMA events reg to check for
+ *						HWML event mask
+ *						0: HWE in EVENTS register
+ *						1: HWE in EVENTS2 register
+ *	30		---------		MUST BE 0
+ *	31		CONT			1: Amount of samples to be
+ *						transferred is unknown and
+ *						script will keep on
+ *						transferring samples as long as
+ *						both events are detected and
+ *						script must be manually stopped
+ *						by the application
+ *						0: The amount of samples to be
+ *						transferred is equal to the
+ *						count field of mode word
+ */
+#define SDMA_WATERMARK_LEVEL_LWML	0xFF
+#define SDMA_WATERMARK_LEVEL_PS		BIT(8)
+#define SDMA_WATERMARK_LEVEL_PA		BIT(9)
+#define SDMA_WATERMARK_LEVEL_SPDIF	BIT(10)
+#define SDMA_WATERMARK_LEVEL_SP		BIT(11)
+#define SDMA_WATERMARK_LEVEL_DP		BIT(12)
+#define SDMA_WATERMARK_LEVEL_HWML	(0xFF << 16)
+#define SDMA_WATERMARK_LEVEL_LWE	BIT(28)
+#define SDMA_WATERMARK_LEVEL_HWE	BIT(29)
+#define SDMA_WATERMARK_LEVEL_CONT	BIT(31)
+
+#define SDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+#define SDMA_DMA_DIRECTIONS	(BIT(DMA_DEV_TO_MEM) | \
+				 BIT(DMA_MEM_TO_DEV) | \
+				 BIT(DMA_DEV_TO_DEV))
+
+/*
+ * Mode/Count of data node descriptors - IPCv2
+ */
+struct sdma_mode_count {
+#define SDMA_BD_MAX_CNT	0xffff
+	u32 count   : 16; /* size of the buffer pointed by this BD */
+	u32 status  :  8; /* E,R,I,C,W,D status bits stored here */
+	u32 command :  8; /* command mostly used for channel 0 */
+};
+
+/*
+ * Buffer descriptor
+ */
+struct sdma_buffer_descriptor {
+	struct sdma_mode_count  mode;
+	u32 buffer_addr;	/* address of the buffer described */
+	u32 ext_buffer_addr;	/* extended buffer address */
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_channel_control - Channel control Block
+ *
+ * @current_bd_ptr:	current buffer descriptor processed
+ * @base_bd_ptr:	first element of buffer descriptor array
+ * @unused:		padding. The SDMA engine expects an array of 128 byte
+ *			control blocks
+ */
+struct sdma_channel_control {
+	u32 current_bd_ptr;
+	u32 base_bd_ptr;
+	u32 unused[2];
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_state_registers - SDMA context for a channel
+ *
+ * @pc:		program counter
+ * @unused1:	unused
+ * @t:		test bit: status of arithmetic & test instruction
+ * @rpc:	return program counter
+ * @unused0:	unused
+ * @sf:		source fault while loading data
+ * @spc:	loop start program counter
+ * @unused2:	unused
+ * @df:		destination fault while storing data
+ * @epc:	loop end program counter
+ * @lm:		loop mode
+ */
+struct sdma_state_registers {
+	u32 pc     :14;
+	u32 unused1: 1;
+	u32 t      : 1;
+	u32 rpc    :14;
+	u32 unused0: 1;
+	u32 sf     : 1;
+	u32 spc    :14;
+	u32 unused2: 1;
+	u32 df     : 1;
+	u32 epc    :14;
+	u32 lm     : 2;
+} __attribute__ ((packed));
+
+/**
+ * struct sdma_context_data - sdma context specific to a channel
+ *
+ * @channel_state:	channel state bits
+ * @gReg:		general registers
+ * @mda:		burst dma destination address register
+ * @msa:		burst dma source address register
+ * @ms:			burst dma status register
+ * @md:			burst dma data register
+ * @pda:		peripheral dma destination address register
+ * @psa:		peripheral dma source address register
+ * @ps:			peripheral dma status register
+ * @pd:			peripheral dma data register
+ * @ca:			CRC polynomial register
+ * @cs:			CRC accumulator register
+ * @dda:		dedicated core destination address register
+ * @dsa:		dedicated core source address register
+ * @ds:			dedicated core status register
+ * @dd:			dedicated core data register
+ * @scratch0:		1st word of dedicated ram for context switch
+ * @scratch1:		2nd word of dedicated ram for context switch
+ * @scratch2:		3rd word of dedicated ram for context switch
+ * @scratch3:		4th word of dedicated ram for context switch
+ * @scratch4:		5th word of dedicated ram for context switch
+ * @scratch5:		6th word of dedicated ram for context switch
+ * @scratch6:		7th word of dedicated ram for context switch
+ * @scratch7:		8th word of dedicated ram for context switch
+ */
+struct sdma_context_data {
+	struct sdma_state_registers  channel_state;
+	u32  gReg[8];
+	u32  mda;
+	u32  msa;
+	u32  ms;
+	u32  md;
+	u32  pda;
+	u32  psa;
+	u32  ps;
+	u32  pd;
+	u32  ca;
+	u32  cs;
+	u32  dda;
+	u32  dsa;
+	u32  ds;
+	u32  dd;
+	u32  scratch0;
+	u32  scratch1;
+	u32  scratch2;
+	u32  scratch3;
+	u32  scratch4;
+	u32  scratch5;
+	u32  scratch6;
+	u32  scratch7;
+} __attribute__ ((packed));
+
+
+struct sdma_engine;
+
+/**
+ * struct sdma_desc - descriptor structor for one transfer
+ * @vd:			descriptor for virt dma
+ * @num_bd:		number of descriptors currently handling
+ * @bd_phys:		physical address of bd
+ * @buf_tail:		ID of the buffer that was processed
+ * @buf_ptail:		ID of the previous buffer that was processed
+ * @period_len:		period length, used in cyclic.
+ * @chn_real_count:	the real count updated from bd->mode.count
+ * @chn_count:		the transfer count set
+ * @sdmac:		sdma_channel pointer
+ * @bd:			pointer of allocate bd
+ */
+struct sdma_desc {
+	struct virt_dma_desc	vd;
+	unsigned int		num_bd;
+	dma_addr_t		bd_phys;
+	unsigned int		buf_tail;
+	unsigned int		buf_ptail;
+	unsigned int		period_len;
+	unsigned int		chn_real_count;
+	unsigned int		chn_count;
+	struct sdma_channel	*sdmac;
+	struct sdma_buffer_descriptor *bd;
+};
+
+/**
+ * struct sdma_channel - housekeeping for a SDMA channel
+ *
+ * @vc:			virt_dma base structure
+ * @desc:		sdma description including vd and other special member
+ * @sdma:		pointer to the SDMA engine for this channel
+ * @channel:		the channel number, matches dmaengine chan_id + 1
+ * @direction:		transfer type. Needed for setting SDMA script
+ * @peripheral_type:	Peripheral type. Needed for setting SDMA script
+ * @event_id0:		aka dma request line
+ * @event_id1:		for channels that use 2 events
+ * @word_size:		peripheral access size
+ * @pc_from_device:	script address for those device_2_memory
+ * @pc_to_device:	script address for those memory_2_device
+ * @device_to_device:	script address for those device_2_device
+ * @pc_to_pc:		script address for those memory_2_memory
+ * @flags:		loop mode or not
+ * @per_address:	peripheral source or destination address in common case
+ *                      destination address in p_2_p case
+ * @per_address2:	peripheral source address in p_2_p case
+ * @event_mask:		event mask used in p_2_p script
+ * @watermark_level:	value for gReg[7], some script will extend it from
+ *			basic watermark such as p_2_p
+ * @shp_addr:		value for gReg[6]
+ * @per_addr:		value for gReg[2]
+ * @status:		status of dma channel
+ * @data:		specific sdma interface structure
+ * @bd_pool:		dma_pool for bd
+ */
+struct sdma_channel {
+	struct virt_dma_chan		vc;
+	struct sdma_desc		*desc;
+	struct sdma_engine		*sdma;
+	unsigned int			channel;
+	enum dma_transfer_direction		direction;
+	enum sdma_peripheral_type	peripheral_type;
+	unsigned int			event_id0;
+	unsigned int			event_id1;
+	enum dma_slave_buswidth		word_size;
+	unsigned int			pc_from_device, pc_to_device;
+	unsigned int			device_to_device;
+	unsigned int                    pc_to_pc;
+	unsigned long			flags;
+	dma_addr_t			per_address, per_address2;
+	unsigned long			event_mask[2];
+	unsigned long			watermark_level;
+	u32				shp_addr, per_addr;
+	enum dma_status			status;
+	struct imx_dma_data		data;
+	struct work_struct		terminate_worker;
+};
+
+#define IMX_DMA_SG_LOOP		BIT(0)
+
+#define MAX_DMA_CHANNELS 32
+#define MXC_SDMA_DEFAULT_PRIORITY 1
+#define MXC_SDMA_MIN_PRIORITY 1
+#define MXC_SDMA_MAX_PRIORITY 7
+
+#define SDMA_FIRMWARE_MAGIC 0x414d4453
+
+/**
+ * struct sdma_firmware_header - Layout of the firmware image
+ *
+ * @magic:		"SDMA"
+ * @version_major:	increased whenever layout of struct
+ *			sdma_script_start_addrs changes.
+ * @version_minor:	firmware minor version (for binary compatible changes)
+ * @script_addrs_start:	offset of struct sdma_script_start_addrs in this image
+ * @num_script_addrs:	Number of script addresses in this image
+ * @ram_code_start:	offset of SDMA ram image in this firmware image
+ * @ram_code_size:	size of SDMA ram image
+ * @script_addrs:	Stores the start address of the SDMA scripts
+ *			(in SDMA memory space)
+ */
+struct sdma_firmware_header {
+	u32	magic;
+	u32	version_major;
+	u32	version_minor;
+	u32	script_addrs_start;
+	u32	num_script_addrs;
+	u32	ram_code_start;
+	u32	ram_code_size;
+};
+
+struct sdma_driver_data {
+	int chnenbl0;
+	int num_events;
+	struct sdma_script_start_addrs	*script_addrs;
+};
+
+struct sdma_engine {
+	struct device			*dev;
+	struct device_dma_parameters	dma_parms;
+	struct sdma_channel		channel[MAX_DMA_CHANNELS];
+	struct sdma_channel_control	*channel_control;
+	void __iomem			*regs;
+	struct sdma_context_data	*context;
+	dma_addr_t			context_phys;
+	struct dma_device		dma_device;
+	struct clk			*clk_ipg;
+	struct clk			*clk_ahb;
+	spinlock_t			channel_0_lock;
+	u32				script_number;
+	struct sdma_script_start_addrs	*script_addrs;
+	const struct sdma_driver_data	*drvdata;
+	u32				spba_start_addr;
+	u32				spba_end_addr;
+	unsigned int			irq;
+	dma_addr_t			bd0_phys;
+	struct sdma_buffer_descriptor	*bd0;
+};
+
+static struct sdma_driver_data sdma_imx31 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX31,
+	.num_events = 32,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx25 = {
+	.ap_2_ap_addr = 729,
+	.uart_2_mcu_addr = 904,
+	.per_2_app_addr = 1255,
+	.mcu_2_app_addr = 834,
+	.uartsh_2_mcu_addr = 1120,
+	.per_2_shp_addr = 1329,
+	.mcu_2_shp_addr = 1048,
+	.ata_2_mcu_addr = 1560,
+	.mcu_2_ata_addr = 1479,
+	.app_2_per_addr = 1189,
+	.app_2_mcu_addr = 770,
+	.shp_2_per_addr = 1407,
+	.shp_2_mcu_addr = 979,
+};
+
+static struct sdma_driver_data sdma_imx25 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx25,
+};
+
+static struct sdma_driver_data sdma_imx35 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx51 = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.mcu_2_shp_addr = 961,
+	.ata_2_mcu_addr = 1473,
+	.mcu_2_ata_addr = 1392,
+	.app_2_per_addr = 1033,
+	.app_2_mcu_addr = 683,
+	.shp_2_per_addr = 1251,
+	.shp_2_mcu_addr = 892,
+};
+
+static struct sdma_driver_data sdma_imx51 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx51,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx53 = {
+	.ap_2_ap_addr = 642,
+	.app_2_mcu_addr = 683,
+	.mcu_2_app_addr = 747,
+	.uart_2_mcu_addr = 817,
+	.shp_2_mcu_addr = 891,
+	.mcu_2_shp_addr = 960,
+	.uartsh_2_mcu_addr = 1032,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+	.firi_2_mcu_addr = 1193,
+	.mcu_2_firi_addr = 1290,
+};
+
+static struct sdma_driver_data sdma_imx53 = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx53,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx6q = {
+	.ap_2_ap_addr = 642,
+	.uart_2_mcu_addr = 817,
+	.mcu_2_app_addr = 747,
+	.per_2_per_addr = 6331,
+	.uartsh_2_mcu_addr = 1032,
+	.mcu_2_shp_addr = 960,
+	.app_2_mcu_addr = 683,
+	.shp_2_mcu_addr = 891,
+	.spdif_2_mcu_addr = 1100,
+	.mcu_2_spdif_addr = 1134,
+};
+
+static struct sdma_driver_data sdma_imx6q = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx6q,
+};
+
+static struct sdma_script_start_addrs sdma_script_imx7d = {
+	.ap_2_ap_addr = 644,
+	.uart_2_mcu_addr = 819,
+	.mcu_2_app_addr = 749,
+	.uartsh_2_mcu_addr = 1034,
+	.mcu_2_shp_addr = 962,
+	.app_2_mcu_addr = 685,
+	.shp_2_mcu_addr = 893,
+	.spdif_2_mcu_addr = 1102,
+	.mcu_2_spdif_addr = 1136,
+};
+
+static struct sdma_driver_data sdma_imx7d = {
+	.chnenbl0 = SDMA_CHNENBL0_IMX35,
+	.num_events = 48,
+	.script_addrs = &sdma_script_imx7d,
+};
+
+static const struct platform_device_id sdma_devtypes[] = {
+	{
+		.name = "imx25-sdma",
+		.driver_data = (unsigned long)&sdma_imx25,
+	}, {
+		.name = "imx31-sdma",
+		.driver_data = (unsigned long)&sdma_imx31,
+	}, {
+		.name = "imx35-sdma",
+		.driver_data = (unsigned long)&sdma_imx35,
+	}, {
+		.name = "imx51-sdma",
+		.driver_data = (unsigned long)&sdma_imx51,
+	}, {
+		.name = "imx53-sdma",
+		.driver_data = (unsigned long)&sdma_imx53,
+	}, {
+		.name = "imx6q-sdma",
+		.driver_data = (unsigned long)&sdma_imx6q,
+	}, {
+		.name = "imx7d-sdma",
+		.driver_data = (unsigned long)&sdma_imx7d,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(platform, sdma_devtypes);
+
+static const struct of_device_id sdma_dt_ids[] = {
+	{ .compatible = "fsl,imx6q-sdma", .data = &sdma_imx6q, },
+	{ .compatible = "fsl,imx53-sdma", .data = &sdma_imx53, },
+	{ .compatible = "fsl,imx51-sdma", .data = &sdma_imx51, },
+	{ .compatible = "fsl,imx35-sdma", .data = &sdma_imx35, },
+	{ .compatible = "fsl,imx31-sdma", .data = &sdma_imx31, },
+	{ .compatible = "fsl,imx25-sdma", .data = &sdma_imx25, },
+	{ .compatible = "fsl,imx7d-sdma", .data = &sdma_imx7d, },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sdma_dt_ids);
+
+#define SDMA_H_CONFIG_DSPDMA	BIT(12) /* indicates if the DSPDMA is used */
+#define SDMA_H_CONFIG_RTD_PINS	BIT(11) /* indicates if Real-Time Debug pins are enabled */
+#define SDMA_H_CONFIG_ACR	BIT(4)  /* indicates if AHB freq /core freq = 2 or 1 */
+#define SDMA_H_CONFIG_CSM	(3)       /* indicates which context switch mode is selected*/
+
+static inline u32 chnenbl_ofs(struct sdma_engine *sdma, unsigned int event)
+{
+	u32 chnenbl0 = sdma->drvdata->chnenbl0;
+	return chnenbl0 + event * 4;
+}
+
+static int sdma_config_ownership(struct sdma_channel *sdmac,
+		bool event_override, bool mcu_override, bool dsp_override)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	unsigned long evt, mcu, dsp;
+
+	if (event_override && mcu_override && dsp_override)
+		return -EINVAL;
+
+	evt = readl_relaxed(sdma->regs + SDMA_H_EVTOVR);
+	mcu = readl_relaxed(sdma->regs + SDMA_H_HOSTOVR);
+	dsp = readl_relaxed(sdma->regs + SDMA_H_DSPOVR);
+
+	if (dsp_override)
+		__clear_bit(channel, &dsp);
+	else
+		__set_bit(channel, &dsp);
+
+	if (event_override)
+		__clear_bit(channel, &evt);
+	else
+		__set_bit(channel, &evt);
+
+	if (mcu_override)
+		__clear_bit(channel, &mcu);
+	else
+		__set_bit(channel, &mcu);
+
+	writel_relaxed(evt, sdma->regs + SDMA_H_EVTOVR);
+	writel_relaxed(mcu, sdma->regs + SDMA_H_HOSTOVR);
+	writel_relaxed(dsp, sdma->regs + SDMA_H_DSPOVR);
+
+	return 0;
+}
+
+static void sdma_enable_channel(struct sdma_engine *sdma, int channel)
+{
+	writel(BIT(channel), sdma->regs + SDMA_H_START);
+}
+
+/*
+ * sdma_run_channel0 - run a channel and wait till it's done
+ */
+static int sdma_run_channel0(struct sdma_engine *sdma)
+{
+	int ret;
+	u32 reg;
+
+	sdma_enable_channel(sdma, 0);
+
+	ret = readl_relaxed_poll_timeout_atomic(sdma->regs + SDMA_H_STATSTOP,
+						reg, !(reg & 1), 1, 500);
+	if (ret)
+		dev_err(sdma->dev, "Timeout waiting for CH0 ready\n");
+
+	/* Set bits of CONFIG register with dynamic context switching */
+	if (readl(sdma->regs + SDMA_H_CONFIG) == 0)
+		writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+
+	return ret;
+}
+
+static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
+		u32 address)
+{
+	struct sdma_buffer_descriptor *bd0 = sdma->bd0;
+	void *buf_virt;
+	dma_addr_t buf_phys;
+	int ret;
+	unsigned long flags;
+
+	buf_virt = dma_alloc_coherent(NULL,
+			size,
+			&buf_phys, GFP_KERNEL);
+	if (!buf_virt) {
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&sdma->channel_0_lock, flags);
+
+	bd0->mode.command = C0_SETPM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = size / 2;
+	bd0->buffer_addr = buf_phys;
+	bd0->ext_buffer_addr = address;
+
+	memcpy(buf_virt, buf, size);
+
+	ret = sdma_run_channel0(sdma);
+
+	spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
+
+	dma_free_coherent(NULL, size, buf_virt, buf_phys);
+
+	return ret;
+}
+
+static void sdma_event_enable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	unsigned long val;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__set_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
+}
+
+static void sdma_event_disable(struct sdma_channel *sdmac, unsigned int event)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	u32 chnenbl = chnenbl_ofs(sdma, event);
+	unsigned long val;
+
+	val = readl_relaxed(sdma->regs + chnenbl);
+	__clear_bit(channel, &val);
+	writel_relaxed(val, sdma->regs + chnenbl);
+}
+
+static struct sdma_desc *to_sdma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct sdma_desc, vd.tx);
+}
+
+static void sdma_start_desc(struct sdma_channel *sdmac)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&sdmac->vc);
+	struct sdma_desc *desc;
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (!vd) {
+		sdmac->desc = NULL;
+		return;
+	}
+	sdmac->desc = desc = to_sdma_desc(&vd->tx);
+	/*
+	 * Do not delete the node in desc_issued list in cyclic mode, otherwise
+	 * the desc allocated will never be freed in vchan_dma_desc_free_list
+	 */
+	if (!(sdmac->flags & IMX_DMA_SG_LOOP))
+		list_del(&vd->node);
+
+	sdma->channel_control[channel].base_bd_ptr = desc->bd_phys;
+	sdma->channel_control[channel].current_bd_ptr = desc->bd_phys;
+	sdma_enable_channel(sdma, sdmac->channel);
+}
+
+static void sdma_update_channel_loop(struct sdma_channel *sdmac)
+{
+	struct sdma_buffer_descriptor *bd;
+	int error = 0;
+	enum dma_status	old_status = sdmac->status;
+
+	/*
+	 * loop mode. Iterate over descriptors, re-setup them and
+	 * call callback function.
+	 */
+	while (sdmac->desc) {
+		struct sdma_desc *desc = sdmac->desc;
+
+		bd = &desc->bd[desc->buf_tail];
+
+		if (bd->mode.status & BD_DONE)
+			break;
+
+		if (bd->mode.status & BD_RROR) {
+			bd->mode.status &= ~BD_RROR;
+			sdmac->status = DMA_ERROR;
+			error = -EIO;
+		}
+
+	       /*
+		* We use bd->mode.count to calculate the residue, since contains
+		* the number of bytes present in the current buffer descriptor.
+		*/
+
+		desc->chn_real_count = bd->mode.count;
+		bd->mode.status |= BD_DONE;
+		bd->mode.count = desc->period_len;
+		desc->buf_ptail = desc->buf_tail;
+		desc->buf_tail = (desc->buf_tail + 1) % desc->num_bd;
+
+		/*
+		 * The callback is called from the interrupt context in order
+		 * to reduce latency and to avoid the risk of altering the
+		 * SDMA transaction status by the time the client tasklet is
+		 * executed.
+		 */
+		spin_unlock(&sdmac->vc.lock);
+		dmaengine_desc_get_callback_invoke(&desc->vd.tx, NULL);
+		spin_lock(&sdmac->vc.lock);
+
+		if (error)
+			sdmac->status = old_status;
+	}
+}
+
+static void mxc_sdma_handle_channel_normal(struct sdma_channel *data)
+{
+	struct sdma_channel *sdmac = (struct sdma_channel *) data;
+	struct sdma_buffer_descriptor *bd;
+	int i, error = 0;
+
+	sdmac->desc->chn_real_count = 0;
+	/*
+	 * non loop mode. Iterate over all descriptors, collect
+	 * errors and call callback function
+	 */
+	for (i = 0; i < sdmac->desc->num_bd; i++) {
+		bd = &sdmac->desc->bd[i];
+
+		 if (bd->mode.status & (BD_DONE | BD_RROR))
+			error = -EIO;
+		 sdmac->desc->chn_real_count += bd->mode.count;
+	}
+
+	if (error)
+		sdmac->status = DMA_ERROR;
+	else
+		sdmac->status = DMA_COMPLETE;
+}
+
+static irqreturn_t sdma_int_handler(int irq, void *dev_id)
+{
+	struct sdma_engine *sdma = dev_id;
+	unsigned long stat;
+
+	stat = readl_relaxed(sdma->regs + SDMA_H_INTR);
+	writel_relaxed(stat, sdma->regs + SDMA_H_INTR);
+	/* channel 0 is special and not handled here, see run_channel0() */
+	stat &= ~1;
+
+	while (stat) {
+		int channel = fls(stat) - 1;
+		struct sdma_channel *sdmac = &sdma->channel[channel];
+		struct sdma_desc *desc;
+
+		spin_lock(&sdmac->vc.lock);
+		desc = sdmac->desc;
+		if (desc) {
+			if (sdmac->flags & IMX_DMA_SG_LOOP) {
+				sdma_update_channel_loop(sdmac);
+			} else {
+				mxc_sdma_handle_channel_normal(sdmac);
+				vchan_cookie_complete(&desc->vd);
+				sdma_start_desc(sdmac);
+			}
+		}
+
+		spin_unlock(&sdmac->vc.lock);
+		__clear_bit(channel, &stat);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * sets the pc of SDMA script according to the peripheral type
+ */
+static void sdma_get_pc(struct sdma_channel *sdmac,
+		enum sdma_peripheral_type peripheral_type)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int per_2_emi = 0, emi_2_per = 0;
+	/*
+	 * These are needed once we start to support transfers between
+	 * two peripherals or memory-to-memory transfers
+	 */
+	int per_2_per = 0, emi_2_emi = 0;
+
+	sdmac->pc_from_device = 0;
+	sdmac->pc_to_device = 0;
+	sdmac->device_to_device = 0;
+	sdmac->pc_to_pc = 0;
+
+	switch (peripheral_type) {
+	case IMX_DMATYPE_MEMORY:
+		emi_2_emi = sdma->script_addrs->ap_2_ap_addr;
+		break;
+	case IMX_DMATYPE_DSP:
+		emi_2_per = sdma->script_addrs->bp_2_ap_addr;
+		per_2_emi = sdma->script_addrs->ap_2_bp_addr;
+		break;
+	case IMX_DMATYPE_FIRI:
+		per_2_emi = sdma->script_addrs->firi_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_firi_addr;
+		break;
+	case IMX_DMATYPE_UART:
+		per_2_emi = sdma->script_addrs->uart_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_UART_SP:
+		per_2_emi = sdma->script_addrs->uartsh_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ATA:
+		per_2_emi = sdma->script_addrs->ata_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_ata_addr;
+		break;
+	case IMX_DMATYPE_CSPI:
+	case IMX_DMATYPE_EXT:
+	case IMX_DMATYPE_SSI:
+	case IMX_DMATYPE_SAI:
+		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
+		break;
+	case IMX_DMATYPE_SSI_DUAL:
+		per_2_emi = sdma->script_addrs->ssish_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_ssish_addr;
+		break;
+	case IMX_DMATYPE_SSI_SP:
+	case IMX_DMATYPE_MMC:
+	case IMX_DMATYPE_SDHC:
+	case IMX_DMATYPE_CSPI_SP:
+	case IMX_DMATYPE_ESAI:
+	case IMX_DMATYPE_MSHC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		break;
+	case IMX_DMATYPE_ASRC:
+		per_2_emi = sdma->script_addrs->asrc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->asrc_2_mcu_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
+	case IMX_DMATYPE_ASRC_SP:
+		per_2_emi = sdma->script_addrs->shp_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_shp_addr;
+		per_2_per = sdma->script_addrs->per_2_per_addr;
+		break;
+	case IMX_DMATYPE_MSHC:
+		per_2_emi = sdma->script_addrs->mshc_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_mshc_addr;
+		break;
+	case IMX_DMATYPE_CCM:
+		per_2_emi = sdma->script_addrs->dptc_dvfs_addr;
+		break;
+	case IMX_DMATYPE_SPDIF:
+		per_2_emi = sdma->script_addrs->spdif_2_mcu_addr;
+		emi_2_per = sdma->script_addrs->mcu_2_spdif_addr;
+		break;
+	case IMX_DMATYPE_IPU_MEMORY:
+		emi_2_per = sdma->script_addrs->ext_mem_2_ipu_addr;
+		break;
+	default:
+		break;
+	}
+
+	sdmac->pc_from_device = per_2_emi;
+	sdmac->pc_to_device = emi_2_per;
+	sdmac->device_to_device = per_2_per;
+	sdmac->pc_to_pc = emi_2_emi;
+}
+
+static int sdma_load_context(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	int load_address;
+	struct sdma_context_data *context = sdma->context;
+	struct sdma_buffer_descriptor *bd0 = sdma->bd0;
+	int ret;
+	unsigned long flags;
+
+	if (sdmac->direction == DMA_DEV_TO_MEM)
+		load_address = sdmac->pc_from_device;
+	else if (sdmac->direction == DMA_DEV_TO_DEV)
+		load_address = sdmac->device_to_device;
+	else if (sdmac->direction == DMA_MEM_TO_MEM)
+		load_address = sdmac->pc_to_pc;
+	else
+		load_address = sdmac->pc_to_device;
+
+	if (load_address < 0)
+		return load_address;
+
+	dev_dbg(sdma->dev, "load_address = %d\n", load_address);
+	dev_dbg(sdma->dev, "wml = 0x%08x\n", (u32)sdmac->watermark_level);
+	dev_dbg(sdma->dev, "shp_addr = 0x%08x\n", sdmac->shp_addr);
+	dev_dbg(sdma->dev, "per_addr = 0x%08x\n", sdmac->per_addr);
+	dev_dbg(sdma->dev, "event_mask0 = 0x%08x\n", (u32)sdmac->event_mask[0]);
+	dev_dbg(sdma->dev, "event_mask1 = 0x%08x\n", (u32)sdmac->event_mask[1]);
+
+	spin_lock_irqsave(&sdma->channel_0_lock, flags);
+
+	memset(context, 0, sizeof(*context));
+	context->channel_state.pc = load_address;
+
+	/* Send by context the event mask,base address for peripheral
+	 * and watermark level
+	 */
+	context->gReg[0] = sdmac->event_mask[1];
+	context->gReg[1] = sdmac->event_mask[0];
+	context->gReg[2] = sdmac->per_addr;
+	context->gReg[6] = sdmac->shp_addr;
+	context->gReg[7] = sdmac->watermark_level;
+
+	bd0->mode.command = C0_SETDM;
+	bd0->mode.status = BD_DONE | BD_INTR | BD_WRAP | BD_EXTD;
+	bd0->mode.count = sizeof(*context) / 4;
+	bd0->buffer_addr = sdma->context_phys;
+	bd0->ext_buffer_addr = 2048 + (sizeof(*context) / 4) * channel;
+	ret = sdma_run_channel0(sdma);
+
+	spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
+
+	return ret;
+}
+
+static struct sdma_channel *to_sdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sdma_channel, vc.chan);
+}
+
+static int sdma_disable_channel(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	writel_relaxed(BIT(channel), sdma->regs + SDMA_H_STATSTOP);
+	sdmac->status = DMA_ERROR;
+
+	return 0;
+}
+static void sdma_channel_terminate_work(struct work_struct *work)
+{
+	struct sdma_channel *sdmac = container_of(work, struct sdma_channel,
+						  terminate_worker);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	/*
+	 * According to NXP R&D team a delay of one BD SDMA cost time
+	 * (maximum is 1ms) should be added after disable of the channel
+	 * bit, to ensure SDMA core has really been stopped after SDMA
+	 * clients call .device_terminate_all.
+	 */
+	usleep_range(1000, 2000);
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	vchan_get_all_descriptors(&sdmac->vc, &head);
+	sdmac->desc = NULL;
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+	vchan_dma_desc_free_list(&sdmac->vc, &head);
+}
+
+static int sdma_disable_channel_async(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+	sdma_disable_channel(chan);
+
+	if (sdmac->desc)
+		schedule_work(&sdmac->terminate_worker);
+
+	return 0;
+}
+
+static void sdma_channel_synchronize(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+	vchan_synchronize(&sdmac->vc);
+
+	flush_work(&sdmac->terminate_worker);
+}
+
+static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML;
+	int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16;
+
+	set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]);
+	set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]);
+
+	if (sdmac->event_id0 > 31)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE;
+
+	if (sdmac->event_id1 > 31)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE;
+
+	/*
+	 * If LWML(src_maxburst) > HWML(dst_maxburst), we need
+	 * swap LWML and HWML of INFO(A.3.2.5.1), also need swap
+	 * r0(event_mask[1]) and r1(event_mask[0]).
+	 */
+	if (lwml > hwml) {
+		sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML |
+						SDMA_WATERMARK_LEVEL_HWML);
+		sdmac->watermark_level |= hwml;
+		sdmac->watermark_level |= lwml << 16;
+		swap(sdmac->event_mask[0], sdmac->event_mask[1]);
+	}
+
+	if (sdmac->per_address2 >= sdma->spba_start_addr &&
+			sdmac->per_address2 <= sdma->spba_end_addr)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP;
+
+	if (sdmac->per_address >= sdma->spba_start_addr &&
+			sdmac->per_address <= sdma->spba_end_addr)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP;
+
+	sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT;
+}
+
+static int sdma_config_channel(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	int ret;
+
+	sdma_disable_channel(chan);
+
+	sdmac->event_mask[0] = 0;
+	sdmac->event_mask[1] = 0;
+	sdmac->shp_addr = 0;
+	sdmac->per_addr = 0;
+
+	if (sdmac->event_id0) {
+		if (sdmac->event_id0 >= sdmac->sdma->drvdata->num_events)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id0);
+	}
+
+	if (sdmac->event_id1) {
+		if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id1);
+	}
+
+	switch (sdmac->peripheral_type) {
+	case IMX_DMATYPE_DSP:
+		sdma_config_ownership(sdmac, false, true, true);
+		break;
+	case IMX_DMATYPE_MEMORY:
+		sdma_config_ownership(sdmac, false, true, false);
+		break;
+	default:
+		sdma_config_ownership(sdmac, true, true, false);
+		break;
+	}
+
+	sdma_get_pc(sdmac, sdmac->peripheral_type);
+
+	if ((sdmac->peripheral_type != IMX_DMATYPE_MEMORY) &&
+			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
+		/* Handle multiple event channels differently */
+		if (sdmac->event_id1) {
+			if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP ||
+			    sdmac->peripheral_type == IMX_DMATYPE_ASRC)
+				sdma_set_watermarklevel_for_p2p(sdmac);
+		} else
+			__set_bit(sdmac->event_id0, sdmac->event_mask);
+
+		/* Address */
+		sdmac->shp_addr = sdmac->per_address;
+		sdmac->per_addr = sdmac->per_address2;
+	} else {
+		sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
+	}
+
+	ret = sdma_load_context(sdmac);
+
+	return ret;
+}
+
+static int sdma_set_channel_priority(struct sdma_channel *sdmac,
+		unsigned int priority)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+
+	if (priority < MXC_SDMA_MIN_PRIORITY
+	    || priority > MXC_SDMA_MAX_PRIORITY) {
+		return -EINVAL;
+	}
+
+	writel_relaxed(priority, sdma->regs + SDMA_CHNPRI_0 + 4 * channel);
+
+	return 0;
+}
+
+static int sdma_request_channel0(struct sdma_engine *sdma)
+{
+	int ret = -EBUSY;
+
+	sdma->bd0 = dma_zalloc_coherent(NULL, PAGE_SIZE, &sdma->bd0_phys,
+					GFP_NOWAIT);
+	if (!sdma->bd0) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	sdma->channel_control[0].base_bd_ptr = sdma->bd0_phys;
+	sdma->channel_control[0].current_bd_ptr = sdma->bd0_phys;
+
+	sdma_set_channel_priority(&sdma->channel[0], MXC_SDMA_DEFAULT_PRIORITY);
+	return 0;
+out:
+
+	return ret;
+}
+
+
+static int sdma_alloc_bd(struct sdma_desc *desc)
+{
+	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
+	int ret = 0;
+
+	desc->bd = dma_zalloc_coherent(NULL, bd_size, &desc->bd_phys,
+					GFP_NOWAIT);
+	if (!desc->bd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static void sdma_free_bd(struct sdma_desc *desc)
+{
+	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
+
+	dma_free_coherent(NULL, bd_size, desc->bd, desc->bd_phys);
+}
+
+static void sdma_desc_free(struct virt_dma_desc *vd)
+{
+	struct sdma_desc *desc = container_of(vd, struct sdma_desc, vd);
+
+	sdma_free_bd(desc);
+	kfree(desc);
+}
+
+static int sdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct imx_dma_data *data = chan->private;
+	struct imx_dma_data mem_data;
+	int prio, ret;
+
+	/*
+	 * MEMCPY may never setup chan->private by filter function such as
+	 * dmatest, thus create 'struct imx_dma_data mem_data' for this case.
+	 * Please note in any other slave case, you have to setup chan->private
+	 * with 'struct imx_dma_data' in your own filter function if you want to
+	 * request dma channel by dma_request_channel() rather than
+	 * dma_request_slave_channel(). Othwise, 'MEMCPY in case?' will appear
+	 * to warn you to correct your filter function.
+	 */
+	if (!data) {
+		dev_dbg(sdmac->sdma->dev, "MEMCPY in case?\n");
+		mem_data.priority = 2;
+		mem_data.peripheral_type = IMX_DMATYPE_MEMORY;
+		mem_data.dma_request = 0;
+		mem_data.dma_request2 = 0;
+		data = &mem_data;
+
+		sdma_get_pc(sdmac, IMX_DMATYPE_MEMORY);
+	}
+
+	switch (data->priority) {
+	case DMA_PRIO_HIGH:
+		prio = 3;
+		break;
+	case DMA_PRIO_MEDIUM:
+		prio = 2;
+		break;
+	case DMA_PRIO_LOW:
+	default:
+		prio = 1;
+		break;
+	}
+
+	sdmac->peripheral_type = data->peripheral_type;
+	sdmac->event_id0 = data->dma_request;
+	sdmac->event_id1 = data->dma_request2;
+
+	ret = clk_enable(sdmac->sdma->clk_ipg);
+	if (ret)
+		return ret;
+	ret = clk_enable(sdmac->sdma->clk_ahb);
+	if (ret)
+		goto disable_clk_ipg;
+
+	ret = sdma_set_channel_priority(sdmac, prio);
+	if (ret)
+		goto disable_clk_ahb;
+
+	return 0;
+
+disable_clk_ahb:
+	clk_disable(sdmac->sdma->clk_ahb);
+disable_clk_ipg:
+	clk_disable(sdmac->sdma->clk_ipg);
+	return ret;
+}
+
+static void sdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	sdma_disable_channel_async(chan);
+
+	sdma_channel_synchronize(chan);
+
+	if (sdmac->event_id0)
+		sdma_event_disable(sdmac, sdmac->event_id0);
+	if (sdmac->event_id1)
+		sdma_event_disable(sdmac, sdmac->event_id1);
+
+	sdmac->event_id0 = 0;
+	sdmac->event_id1 = 0;
+
+	sdma_set_channel_priority(sdmac, 0);
+
+	clk_disable(sdma->clk_ipg);
+	clk_disable(sdma->clk_ahb);
+}
+
+static struct sdma_desc *sdma_transfer_init(struct sdma_channel *sdmac,
+				enum dma_transfer_direction direction, u32 bds)
+{
+	struct sdma_desc *desc;
+
+	desc = kzalloc((sizeof(*desc)), GFP_NOWAIT);
+	if (!desc)
+		goto err_out;
+
+	sdmac->status = DMA_IN_PROGRESS;
+	sdmac->direction = direction;
+	sdmac->flags = 0;
+
+	desc->chn_count = 0;
+	desc->chn_real_count = 0;
+	desc->buf_tail = 0;
+	desc->buf_ptail = 0;
+	desc->sdmac = sdmac;
+	desc->num_bd = bds;
+
+	if (sdma_alloc_bd(desc))
+		goto err_desc_out;
+
+	/* No slave_config called in MEMCPY case, so do here */
+	if (direction == DMA_MEM_TO_MEM)
+		sdma_config_ownership(sdmac, false, true, false);
+
+	if (sdma_load_context(sdmac))
+		goto err_desc_out;
+
+	return desc;
+
+err_desc_out:
+	kfree(desc);
+err_out:
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dma_dst,
+		dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int channel = sdmac->channel;
+	size_t count;
+	int i = 0, param;
+	struct sdma_buffer_descriptor *bd;
+	struct sdma_desc *desc;
+
+	if (!chan || !len)
+		return NULL;
+
+	dev_dbg(sdma->dev, "memcpy: %pad->%pad, len=%zu, channel=%d.\n",
+		&dma_src, &dma_dst, len, channel);
+
+	desc = sdma_transfer_init(sdmac, DMA_MEM_TO_MEM,
+					len / SDMA_BD_MAX_CNT + 1);
+	if (!desc)
+		return NULL;
+
+	do {
+		count = min_t(size_t, len, SDMA_BD_MAX_CNT);
+		bd = &desc->bd[i];
+		bd->buffer_addr = dma_src;
+		bd->ext_buffer_addr = dma_dst;
+		bd->mode.count = count;
+		desc->chn_count += count;
+		bd->mode.command = 0;
+
+		dma_src += count;
+		dma_dst += count;
+		len -= count;
+		i++;
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+		/* last bd */
+		if (!len) {
+			param |= BD_INTR;
+			param |= BD_LAST;
+			param &= ~BD_CONT;
+		}
+
+		dev_dbg(sdma->dev, "entry %d: count: %zd dma: 0x%x %s%s\n",
+				i, count, bd->buffer_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	} while (len);
+
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int i, count;
+	int channel = sdmac->channel;
+	struct scatterlist *sg;
+	struct sdma_desc *desc;
+
+	desc = sdma_transfer_init(sdmac, direction, sg_len);
+	if (!desc)
+		goto err_out;
+
+	dev_dbg(sdma->dev, "setting up %d entries for channel %d.\n",
+			sg_len, channel);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct sdma_buffer_descriptor *bd = &desc->bd[i];
+		int param;
+
+		bd->buffer_addr = sg->dma_address;
+
+		count = sg_dma_len(sg);
+
+		if (count > SDMA_BD_MAX_CNT) {
+			dev_err(sdma->dev, "SDMA channel %d: maximum bytes for sg entry exceeded: %d > %d\n",
+					channel, count, SDMA_BD_MAX_CNT);
+			goto err_bd_out;
+		}
+
+		bd->mode.count = count;
+		desc->chn_count += count;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_bd_out;
+
+		switch (sdmac->word_size) {
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			bd->mode.command = 0;
+			if (count & 3 || sg->dma_address & 3)
+				goto err_bd_out;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			bd->mode.command = 2;
+			if (count & 1 || sg->dma_address & 1)
+				goto err_bd_out;
+			break;
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			bd->mode.command = 1;
+			break;
+		default:
+			goto err_bd_out;
+		}
+
+		param = BD_DONE | BD_EXTD | BD_CONT;
+
+		if (i + 1 == sg_len) {
+			param |= BD_INTR;
+			param |= BD_LAST;
+			param &= ~BD_CONT;
+		}
+
+		dev_dbg(sdma->dev, "entry %d: count: %d dma: %#llx %s%s\n",
+				i, count, (u64)sg->dma_address,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+	}
+
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+	sdma_free_bd(desc);
+	kfree(desc);
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
+	int num_periods = buf_len / period_len;
+	int channel = sdmac->channel;
+	int i = 0, buf = 0;
+	struct sdma_desc *desc;
+
+	dev_dbg(sdma->dev, "%s channel: %d\n", __func__, channel);
+
+	desc = sdma_transfer_init(sdmac, direction, num_periods);
+	if (!desc)
+		goto err_out;
+
+	desc->period_len = period_len;
+
+	sdmac->flags |= IMX_DMA_SG_LOOP;
+
+	if (period_len > SDMA_BD_MAX_CNT) {
+		dev_err(sdma->dev, "SDMA channel %d: maximum period size exceeded: %zu > %d\n",
+				channel, period_len, SDMA_BD_MAX_CNT);
+		goto err_bd_out;
+	}
+
+	while (buf < buf_len) {
+		struct sdma_buffer_descriptor *bd = &desc->bd[i];
+		int param;
+
+		bd->buffer_addr = dma_addr;
+
+		bd->mode.count = period_len;
+
+		if (sdmac->word_size > DMA_SLAVE_BUSWIDTH_4_BYTES)
+			goto err_bd_out;
+		if (sdmac->word_size == DMA_SLAVE_BUSWIDTH_4_BYTES)
+			bd->mode.command = 0;
+		else
+			bd->mode.command = sdmac->word_size;
+
+		param = BD_DONE | BD_EXTD | BD_CONT | BD_INTR;
+		if (i + 1 == num_periods)
+			param |= BD_WRAP;
+
+		dev_dbg(sdma->dev, "entry %d: count: %zu dma: %#llx %s%s\n",
+				i, period_len, (u64)dma_addr,
+				param & BD_WRAP ? "wrap" : "",
+				param & BD_INTR ? " intr" : "");
+
+		bd->mode.status = param;
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+
+	return vchan_tx_prep(&sdmac->vc, &desc->vd, flags);
+err_bd_out:
+	sdma_free_bd(desc);
+	kfree(desc);
+err_out:
+	sdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int sdma_config(struct dma_chan *chan,
+		       struct dma_slave_config *dmaengine_cfg)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+
+	if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+		sdmac->per_address = dmaengine_cfg->src_addr;
+		sdmac->watermark_level = dmaengine_cfg->src_maxburst *
+			dmaengine_cfg->src_addr_width;
+		sdmac->word_size = dmaengine_cfg->src_addr_width;
+	} else if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) {
+		sdmac->per_address2 = dmaengine_cfg->src_addr;
+		sdmac->per_address = dmaengine_cfg->dst_addr;
+		sdmac->watermark_level = dmaengine_cfg->src_maxburst &
+			SDMA_WATERMARK_LEVEL_LWML;
+		sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) &
+			SDMA_WATERMARK_LEVEL_HWML;
+		sdmac->word_size = dmaengine_cfg->dst_addr_width;
+	} else {
+		sdmac->per_address = dmaengine_cfg->dst_addr;
+		sdmac->watermark_level = dmaengine_cfg->dst_maxburst *
+			dmaengine_cfg->dst_addr_width;
+		sdmac->word_size = dmaengine_cfg->dst_addr_width;
+	}
+	sdmac->direction = dmaengine_cfg->direction;
+	return sdma_config_channel(chan);
+}
+
+static enum dma_status sdma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_desc *desc;
+	u32 residue;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	vd = vchan_find_desc(&sdmac->vc, cookie);
+	if (vd) {
+		desc = to_sdma_desc(&vd->tx);
+		if (sdmac->flags & IMX_DMA_SG_LOOP)
+			residue = (desc->num_bd - desc->buf_ptail) *
+				desc->period_len - desc->chn_real_count;
+		else
+			residue = desc->chn_count - desc->chn_real_count;
+	} else if (sdmac->desc && sdmac->desc->vd.tx.cookie == cookie) {
+		residue = sdmac->desc->chn_count - sdmac->desc->chn_real_count;
+	} else {
+		residue = 0;
+	}
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			 residue);
+
+	return sdmac->status;
+}
+
+static void sdma_issue_pending(struct dma_chan *chan)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdmac->vc.lock, flags);
+	if (vchan_issue_pending(&sdmac->vc) && !sdmac->desc)
+		sdma_start_desc(sdmac);
+	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
+}
+
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1	34
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2	38
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3	41
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4	42
+
+static void sdma_add_scripts(struct sdma_engine *sdma,
+		const struct sdma_script_start_addrs *addr)
+{
+	s32 *addr_arr = (u32 *)addr;
+	s32 *saddr_arr = (u32 *)sdma->script_addrs;
+	int i;
+
+	/* use the default firmware in ROM if missing external firmware */
+	if (!sdma->script_number)
+		sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
+
+	for (i = 0; i < sdma->script_number; i++)
+		if (addr_arr[i] > 0)
+			saddr_arr[i] = addr_arr[i];
+}
+
+static void sdma_load_firmware(const struct firmware *fw, void *context)
+{
+	struct sdma_engine *sdma = context;
+	const struct sdma_firmware_header *header;
+	const struct sdma_script_start_addrs *addr;
+	unsigned short *ram_code;
+
+	if (!fw) {
+		dev_info(sdma->dev, "external firmware not found, using ROM firmware\n");
+		/* In this case we just use the ROM firmware. */
+		return;
+	}
+
+	if (fw->size < sizeof(*header))
+		goto err_firmware;
+
+	header = (struct sdma_firmware_header *)fw->data;
+
+	if (header->magic != SDMA_FIRMWARE_MAGIC)
+		goto err_firmware;
+	if (header->ram_code_start + header->ram_code_size > fw->size)
+		goto err_firmware;
+	switch (header->version_major) {
+	case 1:
+		sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
+		break;
+	case 2:
+		sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2;
+		break;
+	case 3:
+		sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3;
+		break;
+	case 4:
+		sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V4;
+		break;
+	default:
+		dev_err(sdma->dev, "unknown firmware version\n");
+		goto err_firmware;
+	}
+
+	addr = (void *)header + header->script_addrs_start;
+	ram_code = (void *)header + header->ram_code_start;
+
+	clk_enable(sdma->clk_ipg);
+	clk_enable(sdma->clk_ahb);
+	/* download the RAM image for SDMA */
+	sdma_load_script(sdma, ram_code,
+			header->ram_code_size,
+			addr->ram_code_start_addr);
+	clk_disable(sdma->clk_ipg);
+	clk_disable(sdma->clk_ahb);
+
+	sdma_add_scripts(sdma, addr);
+
+	dev_info(sdma->dev, "loaded firmware %d.%d\n",
+			header->version_major,
+			header->version_minor);
+
+err_firmware:
+	release_firmware(fw);
+}
+
+#define EVENT_REMAP_CELLS 3
+
+static int sdma_event_remap(struct sdma_engine *sdma)
+{
+	struct device_node *np = sdma->dev->of_node;
+	struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0);
+	struct property *event_remap;
+	struct regmap *gpr;
+	char propname[] = "fsl,sdma-event-remap";
+	u32 reg, val, shift, num_map, i;
+	int ret = 0;
+
+	if (IS_ERR(np) || IS_ERR(gpr_np))
+		goto out;
+
+	event_remap = of_find_property(np, propname, NULL);
+	num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0;
+	if (!num_map) {
+		dev_dbg(sdma->dev, "no event needs to be remapped\n");
+		goto out;
+	} else if (num_map % EVENT_REMAP_CELLS) {
+		dev_err(sdma->dev, "the property %s must modulo %d\n",
+				propname, EVENT_REMAP_CELLS);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	gpr = syscon_node_to_regmap(gpr_np);
+	if (IS_ERR(gpr)) {
+		dev_err(sdma->dev, "failed to get gpr regmap\n");
+		ret = PTR_ERR(gpr);
+		goto out;
+	}
+
+	for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) {
+		ret = of_property_read_u32_index(np, propname, i, &reg);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i);
+			goto out;
+		}
+
+		ret = of_property_read_u32_index(np, propname, i + 1, &shift);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i + 1);
+			goto out;
+		}
+
+		ret = of_property_read_u32_index(np, propname, i + 2, &val);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i + 2);
+			goto out;
+		}
+
+		regmap_update_bits(gpr, reg, BIT(shift), val << shift);
+	}
+
+out:
+	if (!IS_ERR(gpr_np))
+		of_node_put(gpr_np);
+
+	return ret;
+}
+
+static int sdma_get_firmware(struct sdma_engine *sdma,
+		const char *fw_name)
+{
+	int ret;
+
+	ret = request_firmware_nowait(THIS_MODULE,
+			FW_ACTION_HOTPLUG, fw_name, sdma->dev,
+			GFP_KERNEL, sdma, sdma_load_firmware);
+
+	return ret;
+}
+
+static int sdma_init(struct sdma_engine *sdma)
+{
+	int i, ret;
+	dma_addr_t ccb_phys;
+
+	ret = clk_enable(sdma->clk_ipg);
+	if (ret)
+		return ret;
+	ret = clk_enable(sdma->clk_ahb);
+	if (ret)
+		goto disable_clk_ipg;
+
+	/* Be sure SDMA has not started yet */
+	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
+
+	sdma->channel_control = dma_alloc_coherent(NULL,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
+			sizeof(struct sdma_context_data),
+			&ccb_phys, GFP_KERNEL);
+
+	if (!sdma->channel_control) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	sdma->context = (void *)sdma->channel_control +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+	sdma->context_phys = ccb_phys +
+		MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control);
+
+	/* Zero-out the CCB structures array just allocated */
+	memset(sdma->channel_control, 0,
+			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control));
+
+	/* disable all channels */
+	for (i = 0; i < sdma->drvdata->num_events; i++)
+		writel_relaxed(0, sdma->regs + chnenbl_ofs(sdma, i));
+
+	/* All channels have priority 0 */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++)
+		writel_relaxed(0, sdma->regs + SDMA_CHNPRI_0 + i * 4);
+
+	ret = sdma_request_channel0(sdma);
+	if (ret)
+		goto err_dma_alloc;
+
+	sdma_config_ownership(&sdma->channel[0], false, true, false);
+
+	/* Set Command Channel (Channel Zero) */
+	writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
+
+	/* Set bits of CONFIG register but with static context switching */
+	/* FIXME: Check whether to set ACR bit depending on clock ratios */
+	writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
+
+	writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
+
+	/* Initializes channel's priorities */
+	sdma_set_channel_priority(&sdma->channel[0], 7);
+
+	clk_disable(sdma->clk_ipg);
+	clk_disable(sdma->clk_ahb);
+
+	return 0;
+
+err_dma_alloc:
+	clk_disable(sdma->clk_ahb);
+disable_clk_ipg:
+	clk_disable(sdma->clk_ipg);
+	dev_err(sdma->dev, "initialisation failed with %d\n", ret);
+	return ret;
+}
+
+static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct imx_dma_data *data = fn_param;
+
+	if (!imx_dma_is_general_purpose(chan))
+		return false;
+
+	sdmac->data = *data;
+	chan->private = &sdmac->data;
+
+	return true;
+}
+
+static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
+				   struct of_dma *ofdma)
+{
+	struct sdma_engine *sdma = ofdma->of_dma_data;
+	dma_cap_mask_t mask = sdma->dma_device.cap_mask;
+	struct imx_dma_data data;
+
+	if (dma_spec->args_count != 3)
+		return NULL;
+
+	data.dma_request = dma_spec->args[0];
+	data.peripheral_type = dma_spec->args[1];
+	data.priority = dma_spec->args[2];
+	/*
+	 * init dma_request2 to zero, which is not used by the dts.
+	 * For P2P, dma_request2 is init from dma_request_channel(),
+	 * chan->private will point to the imx_dma_data, and in
+	 * device_alloc_chan_resources(), imx_dma_data.dma_request2 will
+	 * be set to sdmac->event_id1.
+	 */
+	data.dma_request2 = 0;
+
+	return dma_request_channel(mask, sdma_filter_fn, &data);
+}
+
+static int sdma_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+			of_match_device(sdma_dt_ids, &pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *spba_bus;
+	const char *fw_name;
+	int ret;
+	int irq;
+	struct resource *iores;
+	struct resource spba_res;
+	struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	int i;
+	struct sdma_engine *sdma;
+	s32 *saddr_arr;
+	const struct sdma_driver_data *drvdata = NULL;
+
+	if (of_id)
+		drvdata = of_id->data;
+	else if (pdev->id_entry)
+		drvdata = (void *)pdev->id_entry->driver_data;
+
+	if (!drvdata) {
+		dev_err(&pdev->dev, "unable to find driver data\n");
+		return -EINVAL;
+	}
+
+	ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	sdma = devm_kzalloc(&pdev->dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma)
+		return -ENOMEM;
+
+	spin_lock_init(&sdma->channel_0_lock);
+
+	sdma->dev = &pdev->dev;
+	sdma->drvdata = drvdata;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sdma->regs = devm_ioremap_resource(&pdev->dev, iores);
+	if (IS_ERR(sdma->regs))
+		return PTR_ERR(sdma->regs);
+
+	sdma->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
+	if (IS_ERR(sdma->clk_ipg))
+		return PTR_ERR(sdma->clk_ipg);
+
+	sdma->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
+	if (IS_ERR(sdma->clk_ahb))
+		return PTR_ERR(sdma->clk_ahb);
+
+	ret = clk_prepare(sdma->clk_ipg);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare(sdma->clk_ahb);
+	if (ret)
+		goto err_clk;
+
+	ret = devm_request_irq(&pdev->dev, irq, sdma_int_handler, 0, "sdma",
+			       sdma);
+	if (ret)
+		goto err_irq;
+
+	sdma->irq = irq;
+
+	sdma->script_addrs = kzalloc(sizeof(*sdma->script_addrs), GFP_KERNEL);
+	if (!sdma->script_addrs) {
+		ret = -ENOMEM;
+		goto err_irq;
+	}
+
+	/* initially no scripts available */
+	saddr_arr = (s32 *)sdma->script_addrs;
+	for (i = 0; i < SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; i++)
+		saddr_arr[i] = -EINVAL;
+
+	dma_cap_set(DMA_SLAVE, sdma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, sdma->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, sdma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&sdma->dma_device.channels);
+	/* Initialize channel parameters */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct sdma_channel *sdmac = &sdma->channel[i];
+
+		sdmac->sdma = sdma;
+
+		sdmac->channel = i;
+		sdmac->vc.desc_free = sdma_desc_free;
+		INIT_WORK(&sdmac->terminate_worker,
+				sdma_channel_terminate_work);
+		/*
+		 * Add the channel to the DMAC list. Do not add channel 0 though
+		 * because we need it internally in the SDMA driver. This also means
+		 * that channel 0 in dmaengine counting matches sdma channel 1.
+		 */
+		if (i)
+			vchan_init(&sdmac->vc, &sdma->dma_device);
+	}
+
+	ret = sdma_init(sdma);
+	if (ret)
+		goto err_init;
+
+	ret = sdma_event_remap(sdma);
+	if (ret)
+		goto err_init;
+
+	if (sdma->drvdata->script_addrs)
+		sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
+	if (pdata && pdata->script_addrs)
+		sdma_add_scripts(sdma, pdata->script_addrs);
+
+	if (pdata) {
+		ret = sdma_get_firmware(sdma, pdata->fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
+	} else {
+		/*
+		 * Because that device tree does not encode ROM script address,
+		 * the RAM script in firmware is mandatory for device tree
+		 * probe, otherwise it fails.
+		 */
+		ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+					      &fw_name);
+		if (ret)
+			dev_warn(&pdev->dev, "failed to get firmware name\n");
+		else {
+			ret = sdma_get_firmware(sdma, fw_name);
+			if (ret)
+				dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
+		}
+	}
+
+	sdma->dma_device.dev = &pdev->dev;
+
+	sdma->dma_device.device_alloc_chan_resources = sdma_alloc_chan_resources;
+	sdma->dma_device.device_free_chan_resources = sdma_free_chan_resources;
+	sdma->dma_device.device_tx_status = sdma_tx_status;
+	sdma->dma_device.device_prep_slave_sg = sdma_prep_slave_sg;
+	sdma->dma_device.device_prep_dma_cyclic = sdma_prep_dma_cyclic;
+	sdma->dma_device.device_config = sdma_config;
+	sdma->dma_device.device_terminate_all = sdma_disable_channel_async;
+	sdma->dma_device.device_synchronize = sdma_channel_synchronize;
+	sdma->dma_device.src_addr_widths = SDMA_DMA_BUSWIDTHS;
+	sdma->dma_device.dst_addr_widths = SDMA_DMA_BUSWIDTHS;
+	sdma->dma_device.directions = SDMA_DMA_DIRECTIONS;
+	sdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy;
+	sdma->dma_device.device_issue_pending = sdma_issue_pending;
+	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
+	dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT);
+
+	platform_set_drvdata(pdev, sdma);
+
+	ret = dma_async_device_register(&sdma->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register\n");
+		goto err_init;
+	}
+
+	if (np) {
+		ret = of_dma_controller_register(np, sdma_xlate, sdma);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to register controller\n");
+			goto err_register;
+		}
+
+		spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus");
+		ret = of_address_to_resource(spba_bus, 0, &spba_res);
+		if (!ret) {
+			sdma->spba_start_addr = spba_res.start;
+			sdma->spba_end_addr = spba_res.end;
+		}
+		of_node_put(spba_bus);
+	}
+
+	return 0;
+
+err_register:
+	dma_async_device_unregister(&sdma->dma_device);
+err_init:
+	kfree(sdma->script_addrs);
+err_irq:
+	clk_unprepare(sdma->clk_ahb);
+err_clk:
+	clk_unprepare(sdma->clk_ipg);
+	return ret;
+}
+
+static int sdma_remove(struct platform_device *pdev)
+{
+	struct sdma_engine *sdma = platform_get_drvdata(pdev);
+	int i;
+
+	devm_free_irq(&pdev->dev, sdma->irq, sdma);
+	dma_async_device_unregister(&sdma->dma_device);
+	kfree(sdma->script_addrs);
+	clk_unprepare(sdma->clk_ahb);
+	clk_unprepare(sdma->clk_ipg);
+	/* Kill the tasklet */
+	for (i = 0; i < MAX_DMA_CHANNELS; i++) {
+		struct sdma_channel *sdmac = &sdma->channel[i];
+
+		tasklet_kill(&sdmac->vc.task);
+		sdma_free_chan_resources(&sdmac->vc.chan);
+	}
+
+	platform_set_drvdata(pdev, NULL);
+	return 0;
+}
+
+static struct platform_driver sdma_driver = {
+	.driver		= {
+		.name	= "imx-sdma",
+		.of_match_table = sdma_dt_ids,
+	},
+	.id_table	= sdma_devtypes,
+	.remove		= sdma_remove,
+	.probe		= sdma_probe,
+};
+
+module_platform_driver(sdma_driver);
+
+MODULE_AUTHOR("Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("i.MX SDMA driver");
+#if IS_ENABLED(CONFIG_SOC_IMX6Q)
+MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin");
+#endif
+#if IS_ENABLED(CONFIG_SOC_IMX7D)
+MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin");
+#endif
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ioat/Makefile b/drivers/dma/ioat/Makefile
new file mode 100644
index 0000000..cf5fedb
--- /dev/null
+++ b/drivers/dma/ioat/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-y := init.o dma.o prep.o dca.o sysfs.o
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
new file mode 100644
index 0000000..eab2fdd
--- /dev/null
+++ b/drivers/dma/ioat/dca.c
@@ -0,0 +1,354 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 - 2009 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "dma.h"
+#include "registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/* expected tag map bytes for I/OAT ver.2 */
+#define DCA2_TAG_MAP_BYTE0 0x80
+#define DCA2_TAG_MAP_BYTE1 0x0
+#define DCA2_TAG_MAP_BYTE2 0x81
+#define DCA2_TAG_MAP_BYTE3 0x82
+#define DCA2_TAG_MAP_BYTE4 0x82
+
+/* verify if tag map matches expected values */
+static inline int dca2_tag_map_valid(u8 *tag_map)
+{
+	return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
+		(tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
+		(tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
+		(tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
+		(tag_map[4] == DCA2_TAG_MAP_BYTE4));
+}
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)		(DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN	8
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+	return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+	/* CPUID level 9 returns DCA configuration */
+	/* Bit 0 indicates DCA enabled by the BIOS */
+	unsigned long cpuid_level_9;
+	int res;
+
+	cpuid_level_9 = cpuid_eax(9);
+	res = test_bit(0, &cpuid_level_9);
+	if (!res)
+		dev_dbg(&pdev->dev, "DCA is disabled in BIOS\n");
+
+	return res;
+}
+
+int system_has_dca_enabled(struct pci_dev *pdev)
+{
+	if (boot_cpu_has(X86_FEATURE_DCA))
+		return dca_enabled_in_bios(pdev);
+
+	dev_dbg(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+	return 0;
+}
+
+struct ioat_dca_slot {
+	struct pci_dev *pdev;	/* requester device */
+	u16 rid;		/* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+	void __iomem		*iobase;
+	void __iomem		*dca_base;
+	int			 max_requesters;
+	int			 requester_count;
+	u8			 tag_map[IOAT_TAG_MAP_LEN];
+	struct ioat_dca_slot 	 req_slots[0];
+};
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+				struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	pdev = to_pci_dev(dev);
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev)
+			return 1;
+	}
+	return 0;
+}
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (!dev_is_pci(dev))
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (!dev_is_pci(dev))
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+			    struct device *dev,
+			    int cpu)
+{
+	u8 tag;
+
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+			bit = entry &
+				~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+			bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+			value = (apic_id & (1 << bit)) ? 0 : 1;
+		} else {
+			value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+
+	return tag;
+}
+
+static const struct dca_ops ioat_dca_ops = {
+	.add_requester		= ioat_dca_add_requester,
+	.remove_requester	= ioat_dca_remove_requester,
+	.get_tag		= ioat_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+static inline int dca3_tag_map_invalid(u8 *tag_map)
+{
+	/*
+	 * If the tag map is not programmed by the BIOS the default is:
+	 * 0x80 0x80 0x80 0x80 0x80 0x00 0x00 0x00
+	 *
+	 * This an invalid map and will result in only 2 possible tags
+	 * 0x1F and 0x00.  0x00 is an invalid DCA tag so we know that
+	 * this entire definition is invalid.
+	 */
+	return ((tag_map[0] == DCA_TAG_MAP_VALID) &&
+		(tag_map[1] == DCA_TAG_MAP_VALID) &&
+		(tag_map[2] == DCA_TAG_MAP_VALID) &&
+		(tag_map[3] == DCA_TAG_MAP_VALID) &&
+		(tag_map[4] == DCA_TAG_MAP_VALID));
+}
+
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	union {
+		u64 full;
+		struct {
+			u32 low;
+			u32 high;
+		};
+	} tag_map;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+		pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map.low =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+	tag_map.high =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+	for (i = 0; i < 8; i++) {
+		bit = tag_map.full >> (8 * i);
+		ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+	}
+
+	if (dca3_tag_map_invalid(ioatdca->tag_map)) {
+		add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+		pr_warn_once("%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n",
+			     dev_driver_string(&pdev->dev),
+			     dev_name(&pdev->dev));
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
new file mode 100644
index 0000000..23fb2fa
--- /dev/null
+++ b/drivers/dma/ioat/dma.c
@@ -0,0 +1,1036 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static char *chanerr_str[] = {
+	"DMA Transfer Source Address Error",
+	"DMA Transfer Destination Address Error",
+	"Next Descriptor Address Error",
+	"Descriptor Error",
+	"Chan Address Value Error",
+	"CHANCMD Error",
+	"Chipset Uncorrectable Data Integrity Error",
+	"DMA Uncorrectable Data Integrity Error",
+	"Read Data Error",
+	"Write Data Error",
+	"Descriptor Control Error",
+	"Descriptor Transfer Size Error",
+	"Completion Address Error",
+	"Interrupt Configuration Error",
+	"Super extended descriptor Address Error",
+	"Unaffiliated Error",
+	"CRC or XOR P Error",
+	"XOR Q Error",
+	"Descriptor Count Error",
+	"DIF All F detect Error",
+	"Guard Tag verification Error",
+	"Application Tag verification Error",
+	"Reference Tag verification Error",
+	"Bundle Bit Error",
+	"Result DIF All F detect Error",
+	"Result Guard Tag verification Error",
+	"Result Application Tag verification Error",
+	"Result Reference Tag verification Error",
+};
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan);
+
+static void ioat_print_chanerrs(struct ioatdma_chan *ioat_chan, u32 chanerr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(chanerr_str); i++) {
+		if ((chanerr >> i) & 1) {
+			dev_err(to_dev(ioat_chan), "Err(%d): %s\n",
+				i, chanerr_str[i]);
+		}
+	}
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+	struct ioatdma_device *instance = data;
+	struct ioatdma_chan *ioat_chan;
+	unsigned long attnstatus;
+	int bit;
+	u8 intrctrl;
+
+	intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+		return IRQ_NONE;
+
+	if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+		writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+		return IRQ_NONE;
+	}
+
+	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+	for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
+		ioat_chan = ioat_chan_by_index(instance, bit);
+		if (test_bit(IOAT_RUN, &ioat_chan->state))
+			tasklet_schedule(&ioat_chan->cleanup_task);
+	}
+
+	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+	return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+	struct ioatdma_chan *ioat_chan = data;
+
+	if (test_bit(IOAT_RUN, &ioat_chan->state))
+		tasklet_schedule(&ioat_chan->cleanup_task);
+
+	return IRQ_HANDLED;
+}
+
+void ioat_stop(struct ioatdma_chan *ioat_chan)
+{
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	int chan_id = chan_num(ioat_chan);
+	struct msix_entry *msix;
+
+	/* 1/ stop irq from firing tasklets
+	 * 2/ stop the tasklet from re-arming irqs
+	 */
+	clear_bit(IOAT_RUN, &ioat_chan->state);
+
+	/* flush inflight interrupts */
+	switch (ioat_dma->irq_mode) {
+	case IOAT_MSIX:
+		msix = &ioat_dma->msix_entries[chan_id];
+		synchronize_irq(msix->vector);
+		break;
+	case IOAT_MSI:
+	case IOAT_INTX:
+		synchronize_irq(pdev->irq);
+		break;
+	default:
+		break;
+	}
+
+	/* flush inflight timers */
+	del_timer_sync(&ioat_chan->timer);
+
+	/* flush inflight tasklet runs */
+	tasklet_kill(&ioat_chan->cleanup_task);
+
+	/* final cleanup now that everything is quiesced and can't re-arm */
+	ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan);
+}
+
+static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan)
+{
+	ioat_chan->dmacount += ioat_ring_pending(ioat_chan);
+	ioat_chan->issued = ioat_chan->head;
+	writew(ioat_chan->dmacount,
+	       ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail,
+		ioat_chan->issued, ioat_chan->dmacount);
+}
+
+void ioat_issue_pending(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	if (ioat_ring_pending(ioat_chan)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		__ioat_issue_pending(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+	}
+}
+
+/**
+ * ioat_update_pending - log pending descriptors
+ * @ioat: ioat+ channel
+ *
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark.  Called with prep_lock held
+ */
+static void ioat_update_pending(struct ioatdma_chan *ioat_chan)
+{
+	if (ioat_ring_pending(ioat_chan) > ioat_pending_level)
+		__ioat_issue_pending(ioat_chan);
+}
+
+static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
+
+	if (ioat_ring_space(ioat_chan) < 1) {
+		dev_err(to_dev(ioat_chan),
+			"Unable to start null desc - ring full\n");
+		return;
+	}
+
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.compl_write = 1;
+	/* set size to non-zero value (channel returns error when size is 0) */
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+	async_tx_ack(&desc->txd);
+	ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+	dump_desc_dbg(ioat_chan, desc);
+	/* make sure descriptors are written before we submit */
+	wmb();
+	ioat_chan->head += 1;
+	__ioat_issue_pending(ioat_chan);
+}
+
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
+{
+	spin_lock_bh(&ioat_chan->prep_lock);
+	if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		__ioat_start_null_desc(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan)
+{
+	/* set the tail to be re-issued */
+	ioat_chan->issued = ioat_chan->tail;
+	ioat_chan->dmacount = 0;
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail,
+		ioat_chan->issued, ioat_chan->dmacount);
+
+	if (ioat_ring_pending(ioat_chan)) {
+		struct ioat_ring_ent *desc;
+
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+		ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+		__ioat_issue_pending(ioat_chan);
+	} else
+		__ioat_start_null_desc(ioat_chan);
+}
+
+static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+	u32 status;
+
+	status = ioat_chansts(ioat_chan);
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		ioat_suspend(ioat_chan);
+	while (is_ioat_active(status) || is_ioat_idle(status)) {
+		if (tmo && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		status = ioat_chansts(ioat_chan);
+		cpu_relax();
+	}
+
+	return err;
+}
+
+static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+
+	ioat_reset(ioat_chan);
+	while (ioat_reset_pending(ioat_chan)) {
+		if (end && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		cpu_relax();
+	}
+
+	return err;
+}
+
+static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+	__releases(&ioat_chan->prep_lock)
+{
+	struct dma_chan *c = tx->chan;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	dma_cookie_t cookie;
+
+	cookie = dma_cookie_assign(tx);
+	dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie);
+
+	if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	/* make descriptor updates visible before advancing ioat->head,
+	 * this is purposefully not smp_wmb() since we are also
+	 * publishing the descriptor updates to a dma device
+	 */
+	wmb();
+
+	ioat_chan->head += ioat_chan->produce;
+
+	ioat_update_pending(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+
+	return cookie;
+}
+
+static struct ioat_ring_ent *
+ioat_alloc_ring_ent(struct dma_chan *chan, int idx, gfp_t flags)
+{
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+	int chunk;
+	dma_addr_t phys;
+	u8 *pos;
+	off_t offs;
+
+	chunk = idx / IOAT_DESCS_PER_2M;
+	idx &= (IOAT_DESCS_PER_2M - 1);
+	offs = idx * IOAT_DESC_SZ;
+	pos = (u8 *)ioat_chan->descs[chunk].virt + offs;
+	phys = ioat_chan->descs[chunk].hw + offs;
+	hw = (struct ioat_dma_descriptor *)pos;
+	memset(hw, 0, sizeof(*hw));
+
+	desc = kmem_cache_zalloc(ioat_cache, flags);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->txd, chan);
+	desc->txd.tx_submit = ioat_tx_submit_unlock;
+	desc->hw = hw;
+	desc->txd.phys = phys;
+	return desc;
+}
+
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+	kmem_cache_free(ioat_cache, desc);
+}
+
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent **ring;
+	int total_descs = 1 << order;
+	int i, chunks;
+
+	/* allocate the array to hold the software ring */
+	ring = kcalloc(total_descs, sizeof(*ring), flags);
+	if (!ring)
+		return NULL;
+
+	ioat_chan->desc_chunks = chunks = (total_descs * IOAT_DESC_SZ) / SZ_2M;
+
+	for (i = 0; i < chunks; i++) {
+		struct ioat_descs *descs = &ioat_chan->descs[i];
+
+		descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
+						 SZ_2M, &descs->hw, flags);
+		if (!descs->virt && (i > 0)) {
+			int idx;
+
+			for (idx = 0; idx < i; idx++) {
+				dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+						  descs->virt, descs->hw);
+				descs->virt = NULL;
+				descs->hw = 0;
+			}
+
+			ioat_chan->desc_chunks = 0;
+			kfree(ring);
+			return NULL;
+		}
+	}
+
+	for (i = 0; i < total_descs; i++) {
+		ring[i] = ioat_alloc_ring_ent(c, i, flags);
+		if (!ring[i]) {
+			int idx;
+
+			while (i--)
+				ioat_free_ring_ent(ring[i], c);
+
+			for (idx = 0; idx < ioat_chan->desc_chunks; idx++) {
+				dma_free_coherent(to_dev(ioat_chan),
+						  SZ_2M,
+						  ioat_chan->descs[idx].virt,
+						  ioat_chan->descs[idx].hw);
+				ioat_chan->descs[idx].virt = NULL;
+				ioat_chan->descs[idx].hw = 0;
+			}
+
+			ioat_chan->desc_chunks = 0;
+			kfree(ring);
+			return NULL;
+		}
+		set_desc_id(ring[i], i);
+	}
+
+	/* link descs */
+	for (i = 0; i < total_descs-1; i++) {
+		struct ioat_ring_ent *next = ring[i+1];
+		struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+		hw->next = next->txd.phys;
+	}
+	ring[i]->hw->next = ring[0]->txd.phys;
+
+	return ring;
+}
+
+/**
+ * ioat_check_space_lock - verify space and grab ring producer lock
+ * @ioat: ioat,3 channel (ring) to operate on
+ * @num_descs: allocation length
+ */
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
+	__acquires(&ioat_chan->prep_lock)
+{
+	spin_lock_bh(&ioat_chan->prep_lock);
+	/* never allow the last descriptor to be consumed, we need at
+	 * least one free at all times to allow for on-the-fly ring
+	 * resizing.
+	 */
+	if (likely(ioat_ring_space(ioat_chan) > num_descs)) {
+		dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n",
+			__func__, num_descs, ioat_chan->head,
+			ioat_chan->tail, ioat_chan->issued);
+		ioat_chan->produce = num_descs;
+		return 0;  /* with ioat->prep_lock held */
+	}
+	spin_unlock_bh(&ioat_chan->prep_lock);
+
+	dev_dbg_ratelimited(to_dev(ioat_chan),
+			    "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+			    __func__, num_descs, ioat_chan->head,
+			    ioat_chan->tail, ioat_chan->issued);
+
+	/* progress reclaim in the allocation failure case we may be
+	 * called under bh_disabled so we need to trigger the timer
+	 * event directly
+	 */
+	if (time_is_before_jiffies(ioat_chan->timer.expires)
+	    && timer_pending(&ioat_chan->timer)) {
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+		ioat_timer_event(&ioat_chan->timer);
+	}
+
+	return -ENOMEM;
+}
+
+static bool desc_has_ext(struct ioat_ring_ent *desc)
+{
+	struct ioat_dma_descriptor *hw = desc->hw;
+
+	if (hw->ctl_f.op == IOAT_OP_XOR ||
+	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+		struct ioat_xor_descriptor *xor = desc->xor;
+
+		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+			return true;
+	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
+		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+		struct ioat_pq_descriptor *pq = desc->pq;
+
+		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+			return true;
+	}
+
+	return false;
+}
+
+static void
+ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed)
+{
+	if (!sed)
+		return;
+
+	dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
+	kmem_cache_free(ioat_sed_cache, sed);
+}
+
+static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan)
+{
+	u64 phys_complete;
+	u64 completion;
+
+	completion = *ioat_chan->completion;
+	phys_complete = ioat_chansts_to_addr(completion);
+
+	dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__,
+		(unsigned long long) phys_complete);
+
+	return phys_complete;
+}
+
+static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan,
+				   u64 *phys_complete)
+{
+	*phys_complete = ioat_get_current_completion(ioat_chan);
+	if (*phys_complete == ioat_chan->last_completion)
+		return false;
+
+	clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	return true;
+}
+
+static void
+desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc)
+{
+	struct ioat_dma_descriptor *hw = desc->hw;
+
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+	{
+		struct ioat_pq_descriptor *pq = desc->pq;
+
+		/* check if there's error written */
+		if (!pq->dwbes_f.wbes)
+			return;
+
+		/* need to set a chanerr var for checking to clear later */
+
+		if (pq->dwbes_f.p_val_err)
+			*desc->result |= SUM_CHECK_P_RESULT;
+
+		if (pq->dwbes_f.q_val_err)
+			*desc->result |= SUM_CHECK_Q_RESULT;
+
+		return;
+	}
+	default:
+		return;
+	}
+}
+
+/**
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
+ */
+static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
+{
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	bool seen_current = false;
+	int idx = ioat_chan->tail, i;
+	u16 active;
+
+	dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+
+	/*
+	 * At restart of the channel, the completion address and the
+	 * channel status will be 0 due to starting a new chain. Since
+	 * it's new chain and the first descriptor "fails", there is
+	 * nothing to clean up. We do not want to reap the entire submitted
+	 * chain due to this 0 address value and then BUG.
+	 */
+	if (!phys_complete)
+		return;
+
+	active = ioat_ring_active(ioat_chan);
+	for (i = 0; i < active && !seen_current; i++) {
+		struct dma_async_tx_descriptor *tx;
+
+		prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		dump_desc_dbg(ioat_chan, desc);
+
+		/* set err stat if we are using dwbes */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			desc_get_errstat(ioat_chan, desc);
+
+		tx = &desc->txd;
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
+			dmaengine_desc_get_callback_invoke(tx, NULL);
+			tx->callback = NULL;
+			tx->callback_result = NULL;
+		}
+
+		if (tx->phys == phys_complete)
+			seen_current = true;
+
+		/* skip extended descriptors */
+		if (desc_has_ext(desc)) {
+			BUG_ON(i + 1 >= active);
+			i++;
+		}
+
+		/* cleanup super extended descriptors */
+		if (desc->sed) {
+			ioat_free_sed(ioat_dma, desc->sed);
+			desc->sed = NULL;
+		}
+	}
+
+	/* finish all descriptor reads before incrementing tail */
+	smp_mb();
+	ioat_chan->tail = idx + i;
+	/* no active descs have written a completion? */
+	BUG_ON(active && !seen_current);
+	ioat_chan->last_completion = phys_complete;
+
+	if (active - i == 0) {
+		dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
+			__func__);
+		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+	}
+
+	/* microsecond delay by sysfs variable  per pending descriptor */
+	if (ioat_chan->intr_coalesce != ioat_chan->prev_intr_coalesce) {
+		writew(min((ioat_chan->intr_coalesce * (active - i)),
+		       IOAT_INTRDELAY_MASK),
+		       ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
+		ioat_chan->prev_intr_coalesce = ioat_chan->intr_coalesce;
+	}
+}
+
+static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
+{
+	u64 phys_complete;
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
+
+	if (is_ioat_halted(*ioat_chan->completion)) {
+		u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		if (chanerr &
+		    (IOAT_CHANERR_HANDLE_MASK | IOAT_CHANERR_RECOVER_MASK)) {
+			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+			ioat_eh(ioat_chan);
+		}
+	}
+
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+void ioat_cleanup_event(unsigned long data)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data);
+
+	ioat_cleanup(ioat_chan);
+	if (!test_bit(IOAT_RUN, &ioat_chan->state))
+		return;
+	writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
+{
+	u64 phys_complete;
+
+	/* set the completion address register again */
+	writel(lower_32_bits(ioat_chan->completion_dma),
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(upper_32_bits(ioat_chan->completion_dma),
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	ioat_quiesce(ioat_chan, 0);
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
+
+	__ioat_restart_chan(ioat_chan);
+}
+
+
+static void ioat_abort_descs(struct ioatdma_chan *ioat_chan)
+{
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	u16 active;
+	int idx = ioat_chan->tail, i;
+
+	/*
+	 * We assume that the failed descriptor has been processed.
+	 * Now we are just returning all the remaining submitted
+	 * descriptors to abort.
+	 */
+	active = ioat_ring_active(ioat_chan);
+
+	/* we skip the failed descriptor that tail points to */
+	for (i = 1; i < active; i++) {
+		struct dma_async_tx_descriptor *tx;
+
+		prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+
+		tx = &desc->txd;
+		if (tx->cookie) {
+			struct dmaengine_result res;
+
+			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
+			res.result = DMA_TRANS_ABORTED;
+			dmaengine_desc_get_callback_invoke(tx, &res);
+			tx->callback = NULL;
+			tx->callback_result = NULL;
+		}
+
+		/* skip extended descriptors */
+		if (desc_has_ext(desc)) {
+			WARN_ON(i + 1 >= active);
+			i++;
+		}
+
+		/* cleanup super extended descriptors */
+		if (desc->sed) {
+			ioat_free_sed(ioat_dma, desc->sed);
+			desc->sed = NULL;
+		}
+	}
+
+	smp_mb(); /* finish all descriptor reads before incrementing tail */
+	ioat_chan->tail = idx + active;
+
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+	ioat_chan->last_completion = *ioat_chan->completion = desc->txd.phys;
+}
+
+static void ioat_eh(struct ioatdma_chan *ioat_chan)
+{
+	struct pci_dev *pdev = to_pdev(ioat_chan);
+	struct ioat_dma_descriptor *hw;
+	struct dma_async_tx_descriptor *tx;
+	u64 phys_complete;
+	struct ioat_ring_ent *desc;
+	u32 err_handled = 0;
+	u32 chanerr_int;
+	u32 chanerr;
+	bool abort = false;
+	struct dmaengine_result res;
+
+	/* cleanup so tail points to descriptor that caused the error */
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
+
+	dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n",
+		__func__, chanerr, chanerr_int);
+
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+	hw = desc->hw;
+	dump_desc_dbg(ioat_chan, desc);
+
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_XOR_VAL:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		break;
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
+			*desc->result |= SUM_CHECK_Q_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
+		}
+		break;
+	}
+
+	if (chanerr & IOAT_CHANERR_RECOVER_MASK) {
+		if (chanerr & IOAT_CHANERR_READ_DATA_ERR) {
+			res.result = DMA_TRANS_READ_FAILED;
+			err_handled |= IOAT_CHANERR_READ_DATA_ERR;
+		} else if (chanerr & IOAT_CHANERR_WRITE_DATA_ERR) {
+			res.result = DMA_TRANS_WRITE_FAILED;
+			err_handled |= IOAT_CHANERR_WRITE_DATA_ERR;
+		}
+
+		abort = true;
+	} else
+		res.result = DMA_TRANS_NOERROR;
+
+	/* fault on unhandled error or spurious halt */
+	if (chanerr ^ err_handled || chanerr == 0) {
+		dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
+			__func__, chanerr, err_handled);
+		dev_err(to_dev(ioat_chan), "Errors handled:\n");
+		ioat_print_chanerrs(ioat_chan, err_handled);
+		dev_err(to_dev(ioat_chan), "Errors not handled:\n");
+		ioat_print_chanerrs(ioat_chan, (chanerr & ~err_handled));
+
+		BUG();
+	}
+
+	/* cleanup the faulty descriptor since we are continuing */
+	tx = &desc->txd;
+	if (tx->cookie) {
+		dma_cookie_complete(tx);
+		dma_descriptor_unmap(tx);
+		dmaengine_desc_get_callback_invoke(tx, &res);
+		tx->callback = NULL;
+		tx->callback_result = NULL;
+	}
+
+	/* mark faulting descriptor as complete */
+	*ioat_chan->completion = desc->txd.phys;
+
+	spin_lock_bh(&ioat_chan->prep_lock);
+	/* we need abort all descriptors */
+	if (abort) {
+		ioat_abort_descs(ioat_chan);
+		/* clean up the channel, we could be in weird state */
+		ioat_reset_hw(ioat_chan);
+	}
+
+	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
+
+	ioat_restart_channel(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void check_active(struct ioatdma_chan *ioat_chan)
+{
+	if (ioat_ring_active(ioat_chan)) {
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+		return;
+	}
+
+	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+}
+
+void ioat_timer_event(struct timer_list *t)
+{
+	struct ioatdma_chan *ioat_chan = from_timer(ioat_chan, t, timer);
+	dma_addr_t phys_complete;
+	u64 status;
+
+	status = ioat_chansts(ioat_chan);
+
+	/* when halted due to errors check for channel
+	 * programming errors before advancing the completion state
+	 */
+	if (is_ioat_halted(status)) {
+		u32 chanerr;
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
+			__func__, chanerr);
+		dev_err(to_dev(ioat_chan), "Errors:\n");
+		ioat_print_chanerrs(ioat_chan, chanerr);
+
+		if (test_bit(IOAT_RUN, &ioat_chan->state)) {
+			spin_lock_bh(&ioat_chan->cleanup_lock);
+			spin_lock_bh(&ioat_chan->prep_lock);
+			set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+			spin_unlock_bh(&ioat_chan->prep_lock);
+
+			ioat_abort_descs(ioat_chan);
+			dev_warn(to_dev(ioat_chan), "Reset channel...\n");
+			ioat_reset_hw(ioat_chan);
+			dev_warn(to_dev(ioat_chan), "Restart channel...\n");
+			ioat_restart_channel(ioat_chan);
+
+			spin_lock_bh(&ioat_chan->prep_lock);
+			clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+			spin_unlock_bh(&ioat_chan->prep_lock);
+			spin_unlock_bh(&ioat_chan->cleanup_lock);
+		}
+
+		return;
+	}
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+
+	/* handle the no-actives case */
+	if (!ioat_ring_active(ioat_chan)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		check_active(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	}
+
+	/* if we haven't made progress and we have already
+	 * acknowledged a pending completion once, then be more
+	 * forceful with a restart
+	 */
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
+	else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+		u32 chanerr;
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_err(to_dev(ioat_chan), "CHANSTS: %#Lx CHANERR: %#x\n",
+			status, chanerr);
+		dev_err(to_dev(ioat_chan), "Errors:\n");
+		ioat_print_chanerrs(ioat_chan, chanerr);
+
+		dev_dbg(to_dev(ioat_chan), "Active descriptors: %d\n",
+			ioat_ring_active(ioat_chan));
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+
+		ioat_abort_descs(ioat_chan);
+		dev_warn(to_dev(ioat_chan), "Resetting channel...\n");
+		ioat_reset_hw(ioat_chan);
+		dev_warn(to_dev(ioat_chan), "Restarting channel...\n");
+		ioat_restart_channel(ioat_chan);
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	} else
+		set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	ioat_cleanup(ioat_chan);
+
+	return dma_cookie_status(c, cookie, txstate);
+}
+
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
+{
+	/* throw away whatever the channel was doing and get it
+	 * initialized, with ioat3 specific workarounds
+	 */
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	u32 chanerr;
+	u16 dev_id;
+	int err;
+
+	ioat_quiesce(ioat_chan, msecs_to_jiffies(100));
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+	if (ioat_dma->version < IOAT_VER_3_3) {
+		/* clear any pending errors */
+		err = pci_read_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
+		if (err) {
+			dev_err(&pdev->dev,
+				"channel error register unreachable\n");
+			return err;
+		}
+		pci_write_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
+
+		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+		 * (workaround for spurious config parity error after restart)
+		 */
+		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+			pci_write_config_dword(pdev,
+					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
+					       0x10);
+		}
+	}
+
+	if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+		ioat_dma->msixtba0 = readq(ioat_dma->reg_base + 0x1000);
+		ioat_dma->msixdata0 = readq(ioat_dma->reg_base + 0x1008);
+		ioat_dma->msixpba = readq(ioat_dma->reg_base + 0x1800);
+	}
+
+
+	err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
+	if (!err) {
+		if (is_bwd_ioat(pdev) && (ioat_dma->irq_mode == IOAT_MSIX)) {
+			writeq(ioat_dma->msixtba0, ioat_dma->reg_base + 0x1000);
+			writeq(ioat_dma->msixdata0, ioat_dma->reg_base + 0x1008);
+			writeq(ioat_dma->msixpba, ioat_dma->reg_base + 0x1800);
+		}
+	}
+
+	if (err)
+		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
+
+	return err;
+}
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
new file mode 100644
index 0000000..1ab42ec
--- /dev/null
+++ b/drivers/dma/ioat/dma.h
@@ -0,0 +1,420 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <linux/circ_buf.h>
+#include <linux/interrupt.h>
+#include "registers.h"
+#include "hw.h"
+
+#define IOAT_DMA_VERSION  "4.00"
+
+#define IOAT_DMA_DCA_ANY_CPU		~0
+
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev)
+#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev)
+#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80)
+
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+enum ioat_irq_mode {
+	IOAT_NOIRQ = 0,
+	IOAT_MSIX,
+	IOAT_MSI,
+	IOAT_INTX
+};
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @completion_pool: DMA buffers for completion ops
+ * @sed_hw_pool: DMA super descriptor pools
+ * @dma_dev: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ * @dca: direct cache access context
+ * @irq_mode: interrupt mode (INTX, MSI, MSIX)
+ * @cap: read DMA capabilities register
+ */
+struct ioatdma_device {
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+	struct dma_pool *completion_pool;
+#define MAX_SED_POOLS	5
+	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
+	struct dma_device dma_dev;
+	u8 version;
+#define IOAT_MAX_CHANS 4
+	struct msix_entry msix_entries[IOAT_MAX_CHANS];
+	struct ioatdma_chan *idx[IOAT_MAX_CHANS];
+	struct dca_provider *dca;
+	enum ioat_irq_mode irq_mode;
+	u32 cap;
+
+	/* shadow version for CB3.3 chan reset errata workaround */
+	u64 msixtba0;
+	u64 msixdata0;
+	u32 msixpba;
+};
+
+struct ioat_descs {
+	void *virt;
+	dma_addr_t hw;
+};
+
+struct ioatdma_chan {
+	struct dma_chan dma_chan;
+	void __iomem *reg_base;
+	dma_addr_t last_completion;
+	spinlock_t cleanup_lock;
+	unsigned long state;
+	#define IOAT_CHAN_DOWN 0
+	#define IOAT_COMPLETION_ACK 1
+	#define IOAT_RESET_PENDING 2
+	#define IOAT_KOBJ_INIT_FAIL 3
+	#define IOAT_RUN 5
+	#define IOAT_CHAN_ACTIVE 6
+	struct timer_list timer;
+	#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
+	#define IDLE_TIMEOUT msecs_to_jiffies(2000)
+	#define RESET_DELAY msecs_to_jiffies(100)
+	struct ioatdma_device *ioat_dma;
+	dma_addr_t completion_dma;
+	u64 *completion;
+	struct tasklet_struct cleanup_task;
+	struct kobject kobj;
+
+/* ioat v2 / v3 channel attributes
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @produce: number of descriptors to produce at submit time
+ * @ring: software ring buffer implementation of hardware ring
+ * @prep_lock: serializes descriptor preparation (producers)
+ */
+	size_t xfercap_log;
+	u16 head;
+	u16 issued;
+	u16 tail;
+	u16 dmacount;
+	u16 alloc_order;
+	u16 produce;
+	struct ioat_ring_ent **ring;
+	spinlock_t prep_lock;
+	struct ioat_descs descs[2];
+	int desc_chunks;
+	int intr_coalesce;
+	int prev_intr_coalesce;
+};
+
+struct ioat_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct dma_chan *, char *);
+	ssize_t (*store)(struct dma_chan *, const char *, size_t);
+};
+
+/**
+ * struct ioat_sed_ent - wrapper around super extended hardware descriptor
+ * @hw: hardware SED
+ * @dma: dma address for the SED
+ * @parent: point to the dma descriptor that's the parent
+ * @hw_pool: descriptor pool index
+ */
+struct ioat_sed_ent {
+	struct ioat_sed_raw_descriptor *hw;
+	dma_addr_t dma;
+	struct ioat_ring_ent *parent;
+	unsigned int hw_pool;
+};
+
+/**
+ * struct ioat_ring_ent - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor (for memcpy)
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
+ * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
+ * @id: identifier for debug
+ * @sed: pointer to super extended descriptor sw desc
+ */
+
+struct ioat_ring_ent {
+	union {
+		struct ioat_dma_descriptor *hw;
+		struct ioat_xor_descriptor *xor;
+		struct ioat_xor_ext_descriptor *xor_ex;
+		struct ioat_pq_descriptor *pq;
+		struct ioat_pq_ext_descriptor *pq_ex;
+		struct ioat_pq_update_descriptor *pqu;
+		struct ioat_raw_descriptor *raw;
+	};
+	size_t len;
+	struct dma_async_tx_descriptor txd;
+	enum sum_check_flags *result;
+	#ifdef DEBUG
+	int id;
+	#endif
+	struct ioat_sed_ent *sed;
+};
+
+extern const struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+extern struct kobj_type ioat_ktype;
+extern struct kmem_cache *ioat_cache;
+extern int ioat_ring_max_alloc_order;
+extern struct kmem_cache *ioat_sed_cache;
+
+static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)
+{
+	return container_of(c, struct ioatdma_chan, dma_chan);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
+#ifdef DEBUG
+#define set_desc_id(desc, i) ((desc)->id = (i))
+#define desc_id(desc) ((desc)->id)
+#else
+#define set_desc_id(desc, i)
+#define desc_id(desc) (0)
+#endif
+
+static inline void
+__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw,
+		struct dma_async_tx_descriptor *tx, int id)
+{
+	struct device *dev = to_dev(ioat_chan);
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
+		" ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id,
+		(unsigned long long) tx->phys,
+		(unsigned long long) hw->next, tx->cookie, tx->flags,
+		hw->ctl, hw->ctl_f.op, hw->ctl_f.int_en, hw->ctl_f.compl_write);
+}
+
+#define dump_desc_dbg(c, d) \
+	({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; })
+
+static inline struct ioatdma_chan *
+ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index)
+{
+	return ioat_dma->idx[index];
+}
+
+static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
+{
+	return readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET);
+}
+
+static inline u64 ioat_chansts_to_addr(u64 status)
+{
+	return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+}
+
+static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan)
+{
+	return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+}
+
+static inline void ioat_suspend(struct ioatdma_chan *ioat_chan)
+{
+	u8 ver = ioat_chan->ioat_dma->version;
+
+	writeb(IOAT_CHANCMD_SUSPEND,
+	       ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline void ioat_reset(struct ioatdma_chan *ioat_chan)
+{
+	u8 ver = ioat_chan->ioat_dma->version;
+
+	writeb(IOAT_CHANCMD_RESET,
+	       ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+}
+
+static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan)
+{
+	u8 ver = ioat_chan->ioat_dma->version;
+	u8 cmd;
+
+	cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+	return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
+}
+
+static inline bool is_ioat_active(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
+}
+
+static inline bool is_ioat_idle(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_DONE);
+}
+
+static inline bool is_ioat_halted(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_HALTED);
+}
+
+static inline bool is_ioat_suspended(unsigned long status)
+{
+	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_SUSPENDED);
+}
+
+/* channel was fatally programmed */
+static inline bool is_ioat_bug(unsigned long err)
+{
+	return !!err;
+}
+
+#define IOAT_MAX_ORDER 16
+#define IOAT_MAX_DESCS 65536
+#define IOAT_DESCS_PER_2M 32768
+
+static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
+{
+	return 1 << ioat_chan->alloc_order;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan)
+{
+	return CIRC_CNT(ioat_chan->head, ioat_chan->tail,
+			ioat_ring_size(ioat_chan));
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan)
+{
+	return CIRC_CNT(ioat_chan->head, ioat_chan->issued,
+			ioat_ring_size(ioat_chan));
+}
+
+static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan)
+{
+	return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan);
+}
+
+static inline u16
+ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len)
+{
+	u16 num_descs = len >> ioat_chan->xfercap_log;
+
+	num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1));
+	return num_descs;
+}
+
+static inline struct ioat_ring_ent *
+ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx)
+{
+	return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)];
+}
+
+static inline void
+ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr)
+{
+	writel(addr & 0x00000000FFFFFFFF,
+	       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+	writel(addr >> 32,
+	       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+/* IOAT Prep functions */
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+	       unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len,
+		    enum sum_check_flags *result, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+	      unsigned int src_cnt, const unsigned char *scf, size_t len,
+	      unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		  unsigned int src_cnt, const unsigned char *scf, size_t len,
+		  enum sum_check_flags *pqres, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+		 unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+		     unsigned int src_cnt, size_t len,
+		     enum sum_check_flags *result, unsigned long flags);
+
+/* IOAT Operation functions */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data);
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data);
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags);
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan);
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan);
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan);
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		struct dma_tx_state *txstate);
+void ioat_cleanup_event(unsigned long data);
+void ioat_timer_event(struct timer_list *t);
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs);
+void ioat_issue_pending(struct dma_chan *chan);
+
+/* IOAT Init functions */
+bool is_bwd_ioat(struct pci_dev *pdev);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *ioat_dma);
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma);
+void ioat_stop(struct ioatdma_chan *ioat_chan);
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
new file mode 100644
index 0000000..abcc51b
--- /dev/null
+++ b/drivers/dma/ioat/hw.h
@@ -0,0 +1,280 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_MMIO_BAR		0
+
+/* CB device ID's */
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3	0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4	0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5	0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6	0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7	0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8	0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9	0x0e2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW0	0x2f20
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW1	0x2f21
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW2	0x2f22
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW3	0x2f23
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW4	0x2f24
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW5	0x2f25
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW6	0x2f26
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW7	0x2f27
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW8	0x2f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_HSW9	0x2f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD0	0x0C50
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD1	0x0C51
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD2	0x0C52
+#define PCI_DEVICE_ID_INTEL_IOAT_BWD3	0x0C53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE0	0x6f50
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE1	0x6f51
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2	0x6f52
+#define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3	0x6f53
+
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0	0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1	0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2	0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3	0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4	0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5	0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6	0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7	0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8	0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9	0x6f2f
+
+#define PCI_DEVICE_ID_INTEL_IOAT_SKX	0x2021
+
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+#define IOAT_VER_3_2            0x32    /* Version 3.2 */
+#define IOAT_VER_3_3            0x33    /* Version 3.3 */
+
+
+int system_has_dca_enabled(struct pci_dev *pdev);
+
+#define IOAT_DESC_SZ	64
+
+struct ioat_dma_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int null:1;
+			unsigned int src_brk:1;
+			unsigned int dest_brk:1;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int rsvd2:13;
+			#define IOAT_OP_COPY 0x00
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	rsv1;
+	uint64_t	rsv2;
+	/* store some driver data in an unused portion of the descriptor */
+	union {
+		uint64_t	user1;
+		uint64_t	tx_cnt;
+	};
+	uint64_t	user2;
+};
+
+struct ioat_xor_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int rsvd:13;
+			#define IOAT_OP_XOR 0x87
+			#define IOAT_OP_XOR_VAL 0x88
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	uint64_t	src_addr3;
+	uint64_t	src_addr4;
+	uint64_t	src_addr5;
+};
+
+struct ioat_xor_ext_descriptor {
+	uint64_t	src_addr6;
+	uint64_t	src_addr7;
+	uint64_t	src_addr8;
+	uint64_t	next;
+	uint64_t	rsvd[4];
+};
+
+struct ioat_pq_descriptor {
+	union {
+		uint32_t	size;
+		uint32_t	dwbes;
+		struct {
+			unsigned int rsvd:25;
+			unsigned int p_val_err:1;
+			unsigned int q_val_err:1;
+			unsigned int rsvd1:4;
+			unsigned int wbes:1;
+		} dwbes_f;
+	};
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int p_disable:1;
+			unsigned int q_disable:1;
+			unsigned int rsvd2:2;
+			unsigned int wb_en:1;
+			unsigned int prl_en:1;
+			unsigned int rsvd3:7;
+			#define IOAT_OP_PQ 0x89
+			#define IOAT_OP_PQ_VAL 0x8a
+			#define IOAT_OP_PQ_16S 0xa0
+			#define IOAT_OP_PQ_VAL_16S 0xa1
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	p_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	union {
+		uint64_t	src_addr3;
+		uint64_t	sed_addr;
+	};
+	uint8_t		coef[8];
+	uint64_t	q_addr;
+};
+
+struct ioat_pq_ext_descriptor {
+	uint64_t	src_addr4;
+	uint64_t	src_addr5;
+	uint64_t	src_addr6;
+	uint64_t	next;
+	uint64_t	src_addr7;
+	uint64_t	src_addr8;
+	uint64_t	rsvd[2];
+};
+
+struct ioat_pq_update_descriptor {
+	uint32_t	size;
+	union {
+		uint32_t ctl;
+		struct {
+			unsigned int int_en:1;
+			unsigned int src_snoop_dis:1;
+			unsigned int dest_snoop_dis:1;
+			unsigned int compl_write:1;
+			unsigned int fence:1;
+			unsigned int src_cnt:3;
+			unsigned int bundle:1;
+			unsigned int dest_dca:1;
+			unsigned int hint:1;
+			unsigned int p_disable:1;
+			unsigned int q_disable:1;
+			unsigned int rsvd:3;
+			unsigned int coef:8;
+			#define IOAT_OP_PQ_UP 0x8b
+			unsigned int op:8;
+		} ctl_f;
+	};
+	uint64_t	src_addr;
+	uint64_t	p_addr;
+	uint64_t	next;
+	uint64_t	src_addr2;
+	uint64_t	p_src;
+	uint64_t	q_src;
+	uint64_t	q_addr;
+};
+
+struct ioat_raw_descriptor {
+	uint64_t	field[8];
+};
+
+struct ioat_pq16a_descriptor {
+	uint8_t coef[8];
+	uint64_t src_addr3;
+	uint64_t src_addr4;
+	uint64_t src_addr5;
+	uint64_t src_addr6;
+	uint64_t src_addr7;
+	uint64_t src_addr8;
+	uint64_t src_addr9;
+};
+
+struct ioat_pq16b_descriptor {
+	uint64_t src_addr10;
+	uint64_t src_addr11;
+	uint64_t src_addr12;
+	uint64_t src_addr13;
+	uint64_t src_addr14;
+	uint64_t src_addr15;
+	uint64_t src_addr16;
+	uint64_t rsvd;
+};
+
+union ioat_sed_pq_descriptor {
+	struct ioat_pq16a_descriptor a;
+	struct ioat_pq16b_descriptor b;
+};
+
+#define SED_SIZE	64
+
+struct ioat_sed_raw_descriptor {
+	uint64_t	a[8];
+	uint64_t	b[8];
+	uint64_t	c[8];
+};
+
+#endif
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
new file mode 100644
index 0000000..21a5708
--- /dev/null
+++ b/drivers/dma/ioat/init.c
@@ -0,0 +1,1435 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/dca.h>
+#include <linux/aer.h>
+#include <linux/sizes.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static const struct pci_device_id ioat_pci_tbl[] = {
+	/* I/OAT v3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SKX) },
+
+	/* I/OAT v3.3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
+
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void ioat_remove(struct pci_dev *pdev);
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+		  struct ioatdma_chan *ioat_chan, int idx);
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma);
+static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma);
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+		 "high-water mark for pushing ioat descriptors (default: 4)");
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+		    sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+		 "set ioat interrupt style: msix (default), msi, intx");
+
+struct kmem_cache *ioat_cache;
+struct kmem_cache *ioat_sed_cache;
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_ivb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_hsw_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_bdx_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX3:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX4:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX5:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX6:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX7:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX8:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static inline bool is_skx_ioat(struct pci_dev *pdev)
+{
+	return (pdev->device == PCI_DEVICE_ID_INTEL_IOAT_SKX) ? true : false;
+}
+
+static bool is_xeon_cb32(struct pci_dev *pdev)
+{
+	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
+		is_hsw_ioat(pdev) || is_bdx_ioat(pdev) || is_skx_ioat(pdev);
+}
+
+bool is_bwd_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+	/* even though not Atom, BDX-DE has same DMA silicon */
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_bwd_noraid(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+	struct completion *cmp = dma_async_param;
+
+	complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @ioat_dma: dma device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+	int i;
+	u8 *src;
+	u8 *dest;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_chan *dma_chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	unsigned long flags;
+
+	src = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOAT_TEST_SIZE; i++)
+		src[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		dev_err(dev, "selftest cannot allocate chan resource\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_src)) {
+		dev_err(dev, "mapping src buffer failed\n");
+		err = -ENOMEM;
+		goto free_resources;
+	}
+	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma_dest)) {
+		dev_err(dev, "mapping dest buffer failed\n");
+		err = -ENOMEM;
+		goto unmap_src;
+	}
+	flags = DMA_PREP_INTERRUPT;
+	tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest,
+						      dma_src, IOAT_TEST_SIZE,
+						      flags);
+	if (!tx) {
+		dev_err(dev, "Self-test prep failed, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test setup failed, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL)
+					!= DMA_COMPLETE) {
+		dev_err(dev, "Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+		dev_err(dev, "Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+
+unmap_dma:
+	dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+unmap_src:
+	dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @ioat_dma: ioat dma device
+ */
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	struct device *dev = &pdev->dev;
+	struct msix_entry *msix;
+	int i, j, msixcnt;
+	int err = -EINVAL;
+	u8 intrctrl = 0;
+
+	if (!strcmp(ioat_interrupt_style, "msix"))
+		goto msix;
+	if (!strcmp(ioat_interrupt_style, "msi"))
+		goto msi;
+	if (!strcmp(ioat_interrupt_style, "intx"))
+		goto intx;
+	dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+	goto err_no_irq;
+
+msix:
+	/* The number of MSI-X vectors should equal the number of channels */
+	msixcnt = ioat_dma->dma_dev.chancnt;
+	for (i = 0; i < msixcnt; i++)
+		ioat_dma->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt);
+	if (err)
+		goto msi;
+
+	for (i = 0; i < msixcnt; i++) {
+		msix = &ioat_dma->msix_entries[i];
+		ioat_chan = ioat_chan_by_index(ioat_dma, i);
+		err = devm_request_irq(dev, msix->vector,
+				       ioat_dma_do_interrupt_msix, 0,
+				       "ioat-msix", ioat_chan);
+		if (err) {
+			for (j = 0; j < i; j++) {
+				msix = &ioat_dma->msix_entries[j];
+				ioat_chan = ioat_chan_by_index(ioat_dma, j);
+				devm_free_irq(dev, msix->vector, ioat_chan);
+			}
+			goto msi;
+		}
+	}
+	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	ioat_dma->irq_mode = IOAT_MSIX;
+	goto done;
+
+msi:
+	err = pci_enable_msi(pdev);
+	if (err)
+		goto intx;
+
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+			       "ioat-msi", ioat_dma);
+	if (err) {
+		pci_disable_msi(pdev);
+		goto intx;
+	}
+	ioat_dma->irq_mode = IOAT_MSI;
+	goto done;
+
+intx:
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+			       IRQF_SHARED, "ioat-intx", ioat_dma);
+	if (err)
+		goto err_no_irq;
+
+	ioat_dma->irq_mode = IOAT_INTX;
+done:
+	if (is_bwd_ioat(pdev))
+		ioat_intr_quirk(ioat_dma);
+	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+	writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+	return 0;
+
+err_no_irq:
+	/* Disable all interrupt generation */
+	writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+	ioat_dma->irq_mode = IOAT_NOIRQ;
+	dev_err(dev, "no usable interrupts\n");
+	return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma)
+{
+	/* Disable all interrupt generation */
+	writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+static int ioat_probe(struct ioatdma_device *ioat_dma)
+{
+	int err = -ENODEV;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	struct device *dev = &pdev->dev;
+
+	ioat_dma->completion_pool = dma_pool_create("completion_pool", dev,
+						    sizeof(u64),
+						    SMP_CACHE_BYTES,
+						    SMP_CACHE_BYTES);
+
+	if (!ioat_dma->completion_pool) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	ioat_enumerate_channels(ioat_dma);
+
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma->dev = &pdev->dev;
+
+	if (!dma->chancnt) {
+		dev_err(dev, "channel enumeration error\n");
+		goto err_setup_interrupts;
+	}
+
+	err = ioat_dma_setup_interrupts(ioat_dma);
+	if (err)
+		goto err_setup_interrupts;
+
+	err = ioat3_dma_self_test(ioat_dma);
+	if (err)
+		goto err_self_test;
+
+	return 0;
+
+err_self_test:
+	ioat_disable_interrupts(ioat_dma);
+err_setup_interrupts:
+	dma_pool_destroy(ioat_dma->completion_pool);
+err_out:
+	return err;
+}
+
+static int ioat_register(struct ioatdma_device *ioat_dma)
+{
+	int err = dma_async_device_register(&ioat_dma->dma_dev);
+
+	if (err) {
+		ioat_disable_interrupts(ioat_dma);
+		dma_pool_destroy(ioat_dma->completion_pool);
+	}
+
+	return err;
+}
+
+static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+
+	ioat_disable_interrupts(ioat_dma);
+
+	ioat_kobject_del(ioat_dma);
+
+	dma_async_device_unregister(dma);
+
+	dma_pool_destroy(ioat_dma->completion_pool);
+
+	INIT_LIST_HEAD(&dma->channels);
+}
+
+/**
+ * ioat_enumerate_channels - find and initialize the device's channels
+ * @ioat_dma: the ioat dma device to be enumerated
+ */
+static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	u8 xfercap_log;
+	int i;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET);
+	dma->chancnt &= 0x1f; /* bits [4:0] valid */
+	if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) {
+		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+			 dma->chancnt, ARRAY_SIZE(ioat_dma->idx));
+		dma->chancnt = ARRAY_SIZE(ioat_dma->idx);
+	}
+	xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET);
+	xfercap_log &= 0x1f; /* bits [4:0] valid */
+	if (xfercap_log == 0)
+		return 0;
+	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
+		if (!ioat_chan)
+			break;
+
+		ioat_init_channel(ioat_dma, ioat_chan, i);
+		ioat_chan->xfercap_log = xfercap_log;
+		spin_lock_init(&ioat_chan->prep_lock);
+		if (ioat_reset_hw(ioat_chan)) {
+			i = 0;
+			break;
+		}
+	}
+	dma->chancnt = i;
+	return i;
+}
+
+/**
+ * ioat_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat_free_chan_resources(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	const int total_descs = 1 << ioat_chan->alloc_order;
+	int descs;
+	int i;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (!ioat_chan->ring)
+		return;
+
+	ioat_stop(ioat_chan);
+	ioat_reset_hw(ioat_chan);
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	descs = ioat_ring_space(ioat_chan);
+	dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs);
+	for (i = 0; i < descs; i++) {
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i);
+		ioat_free_ring_ent(desc, c);
+	}
+
+	if (descs < total_descs)
+		dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n",
+			total_descs - descs);
+
+	for (i = 0; i < total_descs - descs; i++) {
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i);
+		dump_desc_dbg(ioat_chan, desc);
+		ioat_free_ring_ent(desc, c);
+	}
+
+	for (i = 0; i < ioat_chan->desc_chunks; i++) {
+		dma_free_coherent(to_dev(ioat_chan), SZ_2M,
+				  ioat_chan->descs[i].virt,
+				  ioat_chan->descs[i].hw);
+		ioat_chan->descs[i].virt = NULL;
+		ioat_chan->descs[i].hw = 0;
+	}
+	ioat_chan->desc_chunks = 0;
+
+	kfree(ioat_chan->ring);
+	ioat_chan->ring = NULL;
+	ioat_chan->alloc_order = 0;
+	dma_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
+		      ioat_chan->completion_dma);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+	ioat_chan->last_completion = 0;
+	ioat_chan->completion_dma = 0;
+	ioat_chan->dmacount = 0;
+}
+
+/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring
+ * @chan: channel to be initialized
+ */
+static int ioat_alloc_chan_resources(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent **ring;
+	u64 status;
+	int order;
+	int i = 0;
+	u32 chanerr;
+
+	/* have we already been set up? */
+	if (ioat_chan->ring)
+		return 1 << ioat_chan->alloc_order;
+
+	/* Setup register to interrupt and write completion status on error */
+	writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+	/* allocate a completion writeback area */
+	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+	ioat_chan->completion =
+		dma_pool_zalloc(ioat_chan->ioat_dma->completion_pool,
+				GFP_NOWAIT, &ioat_chan->completion_dma);
+	if (!ioat_chan->completion)
+		return -ENOMEM;
+
+	writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(((u64)ioat_chan->completion_dma) >> 32,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	order = IOAT_MAX_ORDER;
+	ring = ioat_alloc_ring(c, order, GFP_NOWAIT);
+	if (!ring)
+		return -ENOMEM;
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	ioat_chan->ring = ring;
+	ioat_chan->head = 0;
+	ioat_chan->issued = 0;
+	ioat_chan->tail = 0;
+	ioat_chan->alloc_order = order;
+	set_bit(IOAT_RUN, &ioat_chan->state);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+	ioat_start_null_desc(ioat_chan);
+
+	/* check that we got off the ground */
+	do {
+		udelay(1);
+		status = ioat_chansts(ioat_chan);
+	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		return 1 << ioat_chan->alloc_order;
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+	dev_WARN(to_dev(ioat_chan),
+		 "failed to start channel chanerr: %#x\n", chanerr);
+	ioat_free_chan_resources(c);
+	return -EFAULT;
+}
+
+/* common channel initialization */
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+		  struct ioatdma_chan *ioat_chan, int idx)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c = &ioat_chan->dma_chan;
+	unsigned long data = (unsigned long) c;
+
+	ioat_chan->ioat_dma = ioat_dma;
+	ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1));
+	spin_lock_init(&ioat_chan->cleanup_lock);
+	ioat_chan->dma_chan.device = dma;
+	dma_cookie_init(&ioat_chan->dma_chan);
+	list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
+	ioat_dma->idx[idx] = ioat_chan;
+	timer_setup(&ioat_chan->timer, ioat_timer_event, 0);
+	tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 xor_val_result;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	u8 op = 0;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+		return 0;
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	op = IOAT_OP_XOR;
+
+	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dest_dma)) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i])) {
+			err = -ENOMEM;
+			goto dma_unmap;
+		}
+	}
+	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
+				      DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		dev_err(dev, "Self-test xor prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test xor setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test xor timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+
+		if (ptr[i] != cmp_word) {
+			dev_err(dev, "Self-test xor failed compare\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* skip validate if the capability is not present */
+	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	op = IOAT_OP_XOR_VAL;
+
+	/* validate the sources with the destintation page */
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		xor_val_srcs[i] = xor_srcs[i];
+	xor_val_srcs[i] = dest;
+
+	xor_val_result = 1;
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i])) {
+			err = -ENOMEM;
+			goto dma_unmap;
+		}
+	}
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test zero prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test zero setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test validate timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (xor_val_result != 0) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	/* test for non-zero parity sum */
+	op = IOAT_OP_XOR_VAL;
+
+	xor_val_result = 0;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i])) {
+			err = -ENOMEM;
+			goto dma_unmap;
+		}
+	}
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test 2nd zero prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test  2nd zero setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test 2nd validate timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	if (xor_val_result != SUM_CHECK_P_RESULT) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	goto free_resources;
+dma_unmap:
+	if (op == IOAT_OP_XOR) {
+		while (--i >= 0)
+			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+				       DMA_TO_DEVICE);
+		dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+	} else if (op == IOAT_OP_XOR_VAL) {
+		while (--i >= 0)
+			dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+				       DMA_TO_DEVICE);
+	}
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	src_idx = IOAT_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+	int rc;
+
+	rc = ioat_dma_self_test(ioat_dma);
+	if (rc)
+		return rc;
+
+	rc = ioat_xor_val_self_test(ioat_dma);
+
+	return rc;
+}
+
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioatdma_chan *ioat_chan;
+	u32 errmask;
+
+	dma = &ioat_dma->dma_dev;
+
+	/*
+	 * if we have descriptor write back error status, we mask the
+	 * error interrupts
+	 */
+	if (ioat_dma->cap & IOAT_CAP_DWBES) {
+		list_for_each_entry(c, &dma->channels, device_node) {
+			ioat_chan = to_ioat_chan(c);
+			errmask = readl(ioat_chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
+				   IOAT_CHANERR_XOR_Q_ERR;
+			writel(errmask, ioat_chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+		}
+	}
+}
+
+static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
+{
+	struct pci_dev *pdev = ioat_dma->pdev;
+	int dca_en = system_has_dca_enabled(pdev);
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioatdma_chan *ioat_chan;
+	int err;
+	u16 val16;
+
+	dma = &ioat_dma->dma_dev;
+	dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock;
+	dma->device_issue_pending = ioat_issue_pending;
+	dma->device_alloc_chan_resources = ioat_alloc_chan_resources;
+	dma->device_free_chan_resources = ioat_free_chan_resources;
+
+	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+	dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock;
+
+	ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET);
+
+	if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
+		ioat_dma->cap &=
+			~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
+
+	/* dca is incompatible with raid operations */
+	if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+		ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+	if (ioat_dma->cap & IOAT_CAP_XOR) {
+		dma->max_xor = 8;
+
+		dma_cap_set(DMA_XOR, dma->cap_mask);
+		dma->device_prep_dma_xor = ioat_prep_xor;
+
+		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+		dma->device_prep_dma_xor_val = ioat_prep_xor_val;
+	}
+
+	if (ioat_dma->cap & IOAT_CAP_PQ) {
+
+		dma->device_prep_dma_pq = ioat_prep_pq;
+		dma->device_prep_dma_pq_val = ioat_prep_pq_val;
+		dma_cap_set(DMA_PQ, dma->cap_mask);
+		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+
+		if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+			dma_set_maxpq(dma, 16, 0);
+		else
+			dma_set_maxpq(dma, 8, 0);
+
+		if (!(ioat_dma->cap & IOAT_CAP_XOR)) {
+			dma->device_prep_dma_xor = ioat_prep_pqxor;
+			dma->device_prep_dma_xor_val = ioat_prep_pqxor_val;
+			dma_cap_set(DMA_XOR, dma->cap_mask);
+			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+
+			if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+				dma->max_xor = 16;
+			else
+				dma->max_xor = 8;
+		}
+	}
+
+	dma->device_tx_status = ioat_tx_status;
+
+	/* starting with CB3.3 super extended descriptors are supported */
+	if (ioat_dma->cap & IOAT_CAP_RAID16SS) {
+		char pool_name[14];
+		int i;
+
+		for (i = 0; i < MAX_SED_POOLS; i++) {
+			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
+
+			/* allocate SED DMA pool */
+			ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name,
+					&pdev->dev,
+					SED_SIZE * (i + 1), 64, 0);
+			if (!ioat_dma->sed_hw_pool[i])
+				return -ENOMEM;
+
+		}
+	}
+
+	if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ)))
+		dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	err = ioat_probe(ioat_dma);
+	if (err)
+		return err;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		ioat_chan = to_ioat_chan(c);
+		writel(IOAT_DMA_DCA_ANY_CPU,
+		       ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+	}
+
+	err = ioat_register(ioat_dma);
+	if (err)
+		return err;
+
+	ioat_kobject_add(ioat_dma, &ioat_ktype);
+
+	if (dca)
+		ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
+
+	/* disable relaxed ordering */
+	err = pcie_capability_read_word(pdev, IOAT_DEVCTRL_OFFSET, &val16);
+	if (err)
+		return err;
+
+	/* clear relaxed ordering enable */
+	val16 &= ~IOAT_DEVCTRL_ROE;
+	err = pcie_capability_write_word(pdev, IOAT_DEVCTRL_OFFSET, val16);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static void ioat_shutdown(struct pci_dev *pdev)
+{
+	struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+	struct ioatdma_chan *ioat_chan;
+	int i;
+
+	if (!ioat_dma)
+		return;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++) {
+		ioat_chan = ioat_dma->idx[i];
+		if (!ioat_chan)
+			continue;
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		/*
+		 * Synchronization rule for del_timer_sync():
+		 *  - The caller must not hold locks which would prevent
+		 *    completion of the timer's handler.
+		 * So prep_lock cannot be held before calling it.
+		 */
+		del_timer_sync(&ioat_chan->timer);
+
+		/* this should quiesce then reset */
+		ioat_reset_hw(ioat_chan);
+	}
+
+	ioat_disable_interrupts(ioat_dma);
+}
+
+static void ioat_resume(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	u32 chanerr;
+	int i;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++) {
+		ioat_chan = ioat_dma->idx[i];
+		if (!ioat_chan)
+			continue;
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		/* no need to reset as shutdown already did that */
+	}
+}
+
+#define DRV_NAME "ioatdma"
+
+static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev,
+						 enum pci_channel_state error)
+{
+	dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error);
+
+	/* quiesce and block I/O */
+	ioat_shutdown(pdev);
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev)
+{
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+	int err;
+
+	dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME);
+
+	if (pci_enable_device_mem(pdev) < 0) {
+		dev_err(&pdev->dev,
+			"Failed to enable PCIe device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+	}
+
+	err = pci_cleanup_aer_uncorrect_error_status(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"AER uncorrect error status clear failed: %#x\n", err);
+	}
+
+	return result;
+}
+
+static void ioat_pcie_error_resume(struct pci_dev *pdev)
+{
+	struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME);
+
+	/* initialize and bring everything back */
+	ioat_resume(ioat_dma);
+}
+
+static const struct pci_error_handlers ioat_err_handler = {
+	.error_detected = ioat_pcie_error_detected,
+	.slot_reset = ioat_pcie_error_slot_reset,
+	.resume = ioat_pcie_error_resume,
+};
+
+static struct pci_driver ioat_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= ioat_pci_tbl,
+	.probe		= ioat_pci_probe,
+	.remove		= ioat_remove,
+	.shutdown	= ioat_shutdown,
+	.err_handler	= &ioat_err_handler,
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+	if (!d)
+		return NULL;
+	d->pdev = pdev;
+	d->reg_base = iobase;
+	return d;
+}
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	void __iomem * const *iomap;
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *device;
+	int err;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+	if (err)
+		return err;
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+	if (!device)
+		return -ENOMEM;
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, device);
+
+	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+	if (device->version >= IOAT_VER_3_0) {
+		if (is_skx_ioat(pdev))
+			device->version = IOAT_VER_3_2;
+		err = ioat3_dma_probe(device, ioat_dca_enabled);
+
+		if (device->version >= IOAT_VER_3_3)
+			pci_enable_pcie_error_reporting(pdev);
+	} else
+		return -ENODEV;
+
+	if (err) {
+		dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+		pci_disable_pcie_error_reporting(pdev);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void ioat_remove(struct pci_dev *pdev)
+{
+	struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+	if (!device)
+		return;
+
+	dev_err(&pdev->dev, "Removing dma and dca services\n");
+	if (device->dca) {
+		unregister_dca_provider(device->dca, &pdev->dev);
+		free_dca_provider(device->dca);
+		device->dca = NULL;
+	}
+
+	pci_disable_pcie_error_reporting(pdev);
+	ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+	int err = -ENOMEM;
+
+	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+		DRV_NAME, IOAT_DMA_VERSION);
+
+	ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent),
+					0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ioat_cache)
+		return -ENOMEM;
+
+	ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+	if (!ioat_sed_cache)
+		goto err_ioat_cache;
+
+	err = pci_register_driver(&ioat_pci_driver);
+	if (err)
+		goto err_ioat3_cache;
+
+	return 0;
+
+ err_ioat3_cache:
+	kmem_cache_destroy(ioat_sed_cache);
+
+ err_ioat_cache:
+	kmem_cache_destroy(ioat_cache);
+
+	return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+	pci_unregister_driver(&ioat_pci_driver);
+	kmem_cache_destroy(ioat_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat/prep.c b/drivers/dma/ioat/prep.c
new file mode 100644
index 0000000..243421a
--- /dev/null
+++ b/drivers/dma/ioat/prep.c
@@ -0,0 +1,749 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+
+#define MAX_SCF	256
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc = 0xe0;
+static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+				       2, 2, 2, 2, 2, 2, 2 };
+static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+					0, 1, 2, 3, 4, 5, 6 };
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+			dma_addr_t addr, u32 offset, int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+	raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	return raw->field[pq_idx_to_field[idx]];
+}
+
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	return raw->field[pq16_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+		       dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	raw->field[pq_idx_to_field[idx]] = addr + offset;
+	pq->coef[idx] = coef;
+}
+
+static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
+			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
+	struct ioat_pq16a_descriptor *pq16 =
+		(struct ioat_pq16a_descriptor *)desc[1];
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	raw->field[pq16_idx_to_field[idx]] = addr + offset;
+
+	if (idx < 8)
+		pq->coef[idx] = coef;
+	else
+		pq16->coef[idx - 8] = coef;
+}
+
+static struct ioat_sed_ent *
+ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
+{
+	struct ioat_sed_ent *sed;
+	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
+
+	sed = kmem_cache_alloc(ioat_sed_cache, flags);
+	if (!sed)
+		return NULL;
+
+	sed->hw_pool = hw_pool;
+	sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
+				 flags, &sed->dma);
+	if (!sed->hw) {
+		kmem_cache_free(ioat_sed_cache, sed);
+		return NULL;
+	}
+
+	return sed;
+}
+
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	dma_addr_t dst = dma_dest;
+	dma_addr_t src = dma_src;
+	size_t total_len = len;
+	int num_descs, idx, i;
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		hw = desc->hw;
+
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dst;
+
+		len -= copy;
+		dst += copy;
+		src += copy;
+		dump_desc_dbg(ioat_chan, desc);
+	} while (++i < num_descs);
+
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	dump_desc_dbg(ioat_chan, desc);
+	/* we leave the channel locked to ensure in order submission */
+
+	return &desc->txd;
+}
+
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+		      size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_xor_descriptor *xor;
+	struct ioat_xor_ext_descriptor *xor_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	int num_descs, with_ext, idx, i;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+	BUG_ON(src_cnt < 2);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	/* we need 2x the number of descriptors to cover greater than 5
+	 * sources
+	 */
+	if (src_cnt > 5) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t,
+					 len, 1 << ioat_chan->xfercap_log);
+		int s;
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		xor = desc->xor;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor xor_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
+		xor_ex = ext->xor_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) xor;
+		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+		for (s = 0; s < src_cnt; s++)
+			xor_set_src(descs, src[s], offset, s);
+		xor->size = xfer_size;
+		xor->dst_addr = dest + offset;
+		xor->ctl = 0;
+		xor->ctl_f.op = op;
+		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+		len -= xfer_size;
+		offset += xfer_size;
+		dump_desc_dbg(ioat_chan, desc);
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last xor descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* completion descriptor carries interrupt bit */
+	compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+	hw = compl_desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	dump_desc_dbg(ioat_chan, compl_desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+	       unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len,
+		    enum sum_check_flags *result, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
+				     src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
+		 struct ioat_ring_ent *ext)
+{
+	struct device *dev = to_dev(ioat_chan);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
+		pq->ctl_f.int_en, pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++)
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
+}
+
+static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
+			       struct ioat_ring_ent *desc)
+{
+	struct device *dev = to_dev(ioat_chan);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_raw_descriptor *descs[] = { (void *)pq,
+						(void *)pq,
+						(void *)pq };
+	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	if (desc->sed) {
+		descs[1] = (void *)desc->sed->hw;
+		descs[2] = (void *)desc->sed->hw + 64;
+	}
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) pq->next,
+		desc->txd.flags, pq->size, pq->ctl,
+		pq->ctl_f.op, pq->ctl_f.int_en,
+		pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++) {
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq16_get_src(descs, i),
+			pq->coef[i]);
+	}
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+		     const dma_addr_t *dst, const dma_addr_t *src,
+		     unsigned int src_cnt, const unsigned char *scf,
+		     size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	struct ioat_pq_ext_descriptor *pq_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+	int i, s, idx, with_ext, num_descs;
+	int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
+
+	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+	/* the engine requires at least two sources (we provide
+	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
+	 */
+	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	/* we need 2x the number of descriptors to cover greater than 3
+	 * sources (we need 1 extra source in the q-only continuation
+	 * case and 3 extra sources in the p+q continuation case.
+	 */
+	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t, len,
+					 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		pq = desc->pq;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor pq_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
+		pq_ex = ext->pq_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+		for (s = 0; s < src_cnt; s++)
+			pq_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq_set_src(descs, dst[0], offset, 0, s++);
+			pq_set_src(descs, dst[1], offset, 1, s++);
+			pq_set_src(descs, dst[1], offset, 0, s++);
+		}
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		/* we turn on descriptor write back error status */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
+		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	dump_pq_desc_dbg(ioat_chan, desc, ext);
+
+	if (!cb32) {
+		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		pq->ctl_f.compl_write = 1;
+		compl_desc = desc;
+	} else {
+		/* completion descriptor carries interrupt bit */
+		compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+		hw = compl_desc->hw;
+		hw->ctl = 0;
+		hw->ctl_f.null = 1;
+		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		hw->ctl_f.compl_write = 1;
+		hw->size = NULL_DESC_BUFFER_SIZE;
+		dump_desc_dbg(ioat_chan, compl_desc);
+	}
+
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
+		       const dma_addr_t *dst, const dma_addr_t *src,
+		       unsigned int src_cnt, const unsigned char *scf,
+		       size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	u32 offset = 0;
+	u8 op;
+	int i, s, idx, num_descs;
+
+	/* this function is only called with 9-16 sources */
+	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
+
+	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+
+	/*
+	 * 16 source pq is only available on cb3.3 and has no completion
+	 * write hw bug.
+	 */
+	if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+
+	i = 0;
+
+	do {
+		struct ioat_raw_descriptor *descs[4];
+		size_t xfer_size = min_t(size_t, len,
+					 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		pq = desc->pq;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+
+		desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
+		if (!desc->sed) {
+			dev_err(to_dev(ioat_chan),
+				"%s: no free sed entries\n", __func__);
+			return NULL;
+		}
+
+		pq->sed_addr = desc->sed->dma;
+		desc->sed->parent = desc;
+
+		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
+		descs[2] = (void *)descs[1] + 64;
+
+		for (s = 0; s < src_cnt; s++)
+			pq16_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq16_set_src(descs, dst[0], offset, 0, s++);
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+			pq16_set_src(descs, dst[1], offset, 0, s++);
+		}
+
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
+		/* we turn on descriptor write back error status */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while (++i < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* with cb3.3 we should be able to do completion w/o a null desc */
+	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	pq->ctl_f.compl_write = 1;
+
+	dump_pq16_desc_dbg(ioat_chan, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+	if (dmaf_p_disabled_continue(flags))
+		return src_cnt + 1;
+	else if (dmaf_continue(flags))
+		return src_cnt + 3;
+	else
+		return src_cnt;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+	      unsigned int src_cnt, const unsigned char *scf, size_t len,
+	      unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		dst[0] = dst[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		dst[1] = dst[0];
+
+	/* handle the single source multiply case from the raid6
+	 * recovery path
+	 */
+	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+		dma_addr_t single_source[2];
+		unsigned char single_source_coef[2];
+
+		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+		single_source[0] = src[0];
+		single_source[1] = src[0];
+		single_source_coef[0] = scf[0];
+		single_source_coef[1] = 0;
+
+		return src_cnt_flags(src_cnt, flags) > 8 ?
+			__ioat_prep_pq16_lock(chan, NULL, dst, single_source,
+					       2, single_source_coef, len,
+					       flags) :
+			__ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
+					     single_source_coef, len, flags);
+
+	} else {
+		return src_cnt_flags(src_cnt, flags) > 8 ?
+			__ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
+					       scf, len, flags) :
+			__ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
+					     scf, len, flags);
+	}
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		  unsigned int src_cnt, const unsigned char *scf, size_t len,
+		  enum sum_check_flags *pqres, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pq[0] = pq[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		pq[1] = pq[0];
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*pqres = 0;
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				     flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+		 unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	unsigned char scf[MAX_SCF];
+	dma_addr_t pq[2];
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (src_cnt > MAX_SCF)
+		return NULL;
+
+	memset(scf, 0, src_cnt);
+	pq[0] = dst;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = dst; /* specify valid address for disabled result */
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				     flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+		     unsigned int src_cnt, size_t len,
+		     enum sum_check_flags *result, unsigned long flags)
+{
+	unsigned char scf[MAX_SCF];
+	dma_addr_t pq[2];
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (src_cnt > MAX_SCF)
+		return NULL;
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	memset(scf, 0, src_cnt);
+	pq[0] = src[0];
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = pq[0]; /* specify valid address for disabled result */
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
+				       scf, len, flags) :
+		__ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
+				     scf, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (ioat_check_space_lock(ioat_chan, 1) == 0)
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+	else
+		return NULL;
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+
+	desc->txd.flags = flags;
+	desc->len = 1;
+
+	dump_desc_dbg(ioat_chan, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
new file mode 100644
index 0000000..2f3bbc8
--- /dev/null
+++ b/drivers/dma/ioat/registers.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET			0x48
+#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET		0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET		0x148
+#define IOAT_PCI_CHANERR_INT_OFFSET		0x180
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
+
+/* PCIe config registers */
+
+/* EXPCAPID + N */
+#define IOAT_DEVCTRL_OFFSET			0x8
+/* relaxed ordering enable */
+#define IOAT_DEVCTRL_ROE			0x10
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET			0x01	/*  8-bit */
+#define IOAT_XFERCAP_4KB			12
+#define IOAT_XFERCAP_8KB			13
+#define IOAT_XFERCAP_16KB			14
+#define IOAT_XFERCAP_32KB			15
+#define IOAT_XFERCAP_32GB			0
+
+#define IOAT_GENCTRL_OFFSET			0x02	/*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN			0x01
+
+#define IOAT_INTRCTRL_OFFSET			0x03	/*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08	/* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
+
+#define IOAT_VER_OFFSET				0x08	/*  8-bit */
+#define IOAT_VER_MAJOR_MASK			0xF0
+#define IOAT_VER_MINOR_MASK			0x0F
+#define GET_IOAT_VER_MAJOR(x)			(((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)			((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
+#define IOAT_INTRDELAY_MASK			0x3FFF	/* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE	0x0001
+#define IOAT_DEVICE_MMIO_RESTRICTED		0x0002
+#define IOAT_DEVICE_MEMORY_BYPASS		0x0004
+#define IOAT_DEVICE_ADDRESS_REMAPPING		0x0008
+
+#define IOAT_DMA_CAP_OFFSET			0x10	/* 32-bit */
+#define IOAT_CAP_PAGE_BREAK			0x00000001
+#define IOAT_CAP_CRC				0x00000002
+#define IOAT_CAP_SKIP_MARKER			0x00000004
+#define IOAT_CAP_DCA				0x00000010
+#define IOAT_CAP_CRC_MOVE			0x00000020
+#define IOAT_CAP_FILL_BLOCK			0x00000040
+#define IOAT_CAP_APIC				0x00000080
+#define IOAT_CAP_XOR				0x00000100
+#define IOAT_CAP_PQ				0x00000200
+#define IOAT_CAP_DWBES				0x00002000
+#define IOAT_CAP_RAID16SS			0x00020000
+
+#define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET			0x00	/* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define IOAT3_CHANCTRL_COMPL_DCA_EN		0x0200
+#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define IOAT_CHANCTRL_INT_REARM			0x0001
+#define IOAT_CHANCTRL_RUN			(IOAT_CHANCTRL_INT_REARM |\
+						 IOAT_CHANCTRL_ERR_INT_EN |\
+						 IOAT_CHANCTRL_ERR_COMPLETION_EN |\
+						 IOAT_CHANCTRL_ANY_ERR_ABORT_EN)
+
+#define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
+
+#define IOAT_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	(~0x3fULL)
+#define IOAT_CHANSTS_SOFT_ERR			0x10ULL
+#define IOAT_CHANSTS_UNAFFILIATED_ERR		0x8ULL
+#define IOAT_CHANSTS_STATUS	0x7ULL
+#define IOAT_CHANSTS_ACTIVE	0x0
+#define IOAT_CHANSTS_DONE	0x1
+#define IOAT_CHANSTS_SUSPENDED	0x2
+#define IOAT_CHANSTS_HALTED	0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET	0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET		0x0C	/* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET		0x10	/* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW	0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW	0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH	0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH	0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET		0x14	/*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET		0x04	/*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET			0x20
+#define IOAT_CHANCMD_RESUME			0x10
+#define IOAT_CHANCMD_ABORT			0x08
+#define IOAT_CHANCMD_SUSPEND			0x04
+#define IOAT_CHANCMD_APPEND			0x02
+#define IOAT_CHANCMD_START			0x01
+
+#define IOAT_CHANCMP_OFFSET			0x18	/* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW			0x18
+#define IOAT_CHANCMP_OFFSET_HIGH		0x1C
+
+#define IOAT_CDAR_OFFSET			0x20	/* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW			0x20
+#define IOAT_CDAR_OFFSET_HIGH			0x24
+
+#define IOAT_CHANERR_OFFSET			0x28	/* 32-bit Channel Error Register */
+#define IOAT_CHANERR_SRC_ADDR_ERR	0x0001
+#define IOAT_CHANERR_DEST_ADDR_ERR	0x0002
+#define IOAT_CHANERR_NEXT_ADDR_ERR	0x0004
+#define IOAT_CHANERR_NEXT_DESC_ALIGN_ERR	0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR	0x0010
+#define IOAT_CHANERR_CHANCMD_ERR		0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0080
+#define IOAT_CHANERR_READ_DATA_ERR		0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR		0x0200
+#define IOAT_CHANERR_CONTROL_ERR	0x0400
+#define IOAT_CHANERR_LENGTH_ERR	0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR	0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR	0x2000
+#define IOAT_CHANERR_SOFT_ERR			0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR		0x8000
+#define IOAT_CHANERR_XOR_P_OR_CRC_ERR		0x10000
+#define IOAT_CHANERR_XOR_Q_ERR			0x20000
+#define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR	0x40000
+
+#define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR)
+#define IOAT_CHANERR_RECOVER_MASK (IOAT_CHANERR_READ_DATA_ERR | \
+				   IOAT_CHANERR_WRITE_DATA_ERR)
+
+#define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/ioat/sysfs.c b/drivers/dma/ioat/sysfs.c
new file mode 100644
index 0000000..3ac677f
--- /dev/null
+++ b/drivers/dma/ioat/sysfs.c
@@ -0,0 +1,177 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+
+	return sprintf(page, "copy%s%s%s%s%s\n",
+		       dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+		       dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+		       dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+		       dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+		       dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+	struct ioatdma_device *ioat_dma = to_ioatdma_device(dma);
+
+	return sprintf(page, "%d.%d\n",
+		       ioat_dma->version >> 4, ioat_dma->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+	struct ioat_sysfs_entry *entry;
+	struct ioatdma_chan *ioat_chan;
+
+	entry = container_of(attr, struct ioat_sysfs_entry, attr);
+	ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+	if (!entry->show)
+		return -EIO;
+	return entry->show(&ioat_chan->dma_chan, page);
+}
+
+static ssize_t
+ioat_attr_store(struct kobject *kobj, struct attribute *attr,
+const char *page, size_t count)
+{
+	struct ioat_sysfs_entry *entry;
+	struct ioatdma_chan *ioat_chan;
+
+	entry = container_of(attr, struct ioat_sysfs_entry, attr);
+	ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+	if (!entry->store)
+		return -EIO;
+	return entry->store(&ioat_chan->dma_chan, page, count);
+}
+
+const struct sysfs_ops ioat_sysfs_ops = {
+	.show	= ioat_attr_show,
+	.store  = ioat_attr_store,
+};
+
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+		struct kobject *parent = &c->dev->device.kobj;
+		int err;
+
+		err = kobject_init_and_add(&ioat_chan->kobj, type,
+					   parent, "quickdata");
+		if (err) {
+			dev_warn(to_dev(ioat_chan),
+				 "sysfs init error (%d), continuing...\n", err);
+			kobject_put(&ioat_chan->kobj);
+			set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state);
+		}
+	}
+}
+
+void ioat_kobject_del(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+		if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) {
+			kobject_del(&ioat_chan->kobj);
+			kobject_put(&ioat_chan->kobj);
+		}
+	}
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	/* ...taken outside the lock, no need to be precise */
+	return sprintf(page, "%d\n", ioat_ring_active(ioat_chan));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static ssize_t intr_coalesce_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", ioat_chan->intr_coalesce);
+}
+
+static ssize_t intr_coalesce_store(struct dma_chan *c, const char *page,
+size_t count)
+{
+	int intr_coalesce = 0;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	if (sscanf(page, "%du", &intr_coalesce) != -1) {
+		if ((intr_coalesce < 0) ||
+		    (intr_coalesce > IOAT_INTRDELAY_MASK))
+			return -EINVAL;
+		ioat_chan->intr_coalesce = intr_coalesce;
+	}
+
+	return count;
+}
+
+static struct ioat_sysfs_entry intr_coalesce_attr = __ATTR_RW(intr_coalesce);
+
+static struct attribute *ioat_attrs[] = {
+	&ring_size_attr.attr,
+	&ring_active_attr.attr,
+	&ioat_cap_attr.attr,
+	&ioat_version_attr.attr,
+	&intr_coalesce_attr.attr,
+	NULL,
+};
+
+struct kobj_type ioat_ktype = {
+	.sysfs_ops = &ioat_sysfs_ops,
+	.default_attrs = ioat_attrs,
+};
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 0000000..a410657
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1565 @@
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+#include <linux/raid/pq.h>
+#include <linux/slab.h>
+
+#include <mach/adma.h>
+
+#include "dmaengine.h"
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+	container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+	container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+	int stride = slot->slots_per_op;
+
+	while (stride--) {
+		slot->slots_per_op = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct iop_adma_desc_slot,
+				slot_node);
+	}
+}
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+	struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+	BUG_ON(tx->cookie < 0);
+	if (tx->cookie > 0) {
+		cookie = tx->cookie;
+		tx->cookie = 0;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		dmaengine_desc_get_callback_invoke(tx, NULL);
+
+		dma_descriptor_unmap(tx);
+		if (desc->group_head)
+			desc->group_head = NULL;
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(tx);
+
+	return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (desc->chain_node.next == &iop_chan->chain)
+		return 1;
+
+	dev_dbg(iop_chan->device->common.dev,
+		"\tfree slot: %d slots_per_op: %d\n",
+		desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	iop_adma_free_slots(desc);
+
+	return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+	dma_cookie_t cookie = 0;
+	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+	int busy = iop_chan_is_busy(iop_chan);
+	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		pr_debug("\tcookie: %d slot: %d busy: %d "
+			"this_desc: %#x next_desc: %#x ack: %d\n",
+			iter->async_tx.cookie, iter->idx, busy,
+			iter->async_tx.phys, iop_desc_get_next_desc(iter),
+			async_tx_test_ack(&iter->async_tx));
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->async_tx.phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || iop_desc_get_next_desc(iter))
+				break;
+		}
+
+		/* detect the start of a group transaction */
+		if (!slot_cnt && !slots_per_op) {
+			slot_cnt = iter->slot_cnt;
+			slots_per_op = iter->slots_per_op;
+			if (slot_cnt <= slots_per_op) {
+				slot_cnt = 0;
+				slots_per_op = 0;
+			}
+		}
+
+		if (slot_cnt) {
+			pr_debug("\tgroup++\n");
+			if (!grp_start)
+				grp_start = iter;
+			slot_cnt -= slots_per_op;
+		}
+
+		/* all the members of a group are complete */
+		if (slots_per_op != 0 && slot_cnt == 0) {
+			struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+			int end_of_chain = 0;
+			pr_debug("\tgroup end\n");
+
+			/* collect the total results */
+			if (grp_start->xor_check_result) {
+				u32 zero_sum_result = 0;
+				slot_cnt = grp_start->slot_cnt;
+				grp_iter = grp_start;
+
+				list_for_each_entry_from(grp_iter,
+					&iop_chan->chain, chain_node) {
+					zero_sum_result |=
+					    iop_desc_get_zero_result(grp_iter);
+					    pr_debug("\titer%d result: %d\n",
+					    grp_iter->idx, zero_sum_result);
+					slot_cnt -= slots_per_op;
+					if (slot_cnt == 0)
+						break;
+				}
+				pr_debug("\tgrp_start->xor_check_result: %p\n",
+					grp_start->xor_check_result);
+				*grp_start->xor_check_result = zero_sum_result;
+			}
+
+			/* clean up the group */
+			slot_cnt = grp_start->slot_cnt;
+			grp_iter = grp_start;
+			list_for_each_entry_safe_from(grp_iter, _grp_iter,
+				&iop_chan->chain, chain_node) {
+				cookie = iop_adma_run_tx_complete_actions(
+					grp_iter, iop_chan, cookie);
+
+				slot_cnt -= slots_per_op;
+				end_of_chain = iop_adma_clean_slot(grp_iter,
+					iop_chan);
+
+				if (slot_cnt == 0 || end_of_chain)
+					break;
+			}
+
+			/* the group should be complete at this point */
+			BUG_ON(slot_cnt);
+
+			slots_per_op = 0;
+			grp_start = NULL;
+			if (end_of_chain)
+				break;
+			else
+				continue;
+		} else if (slots_per_op) /* wait for group completion */
+			continue;
+
+		/* write back zero sum results (single descriptor case) */
+		if (iter->xor_check_result && iter->async_tx.cookie)
+			*iter->xor_check_result =
+				iop_desc_get_zero_result(iter);
+
+		cookie = iop_adma_run_tx_complete_actions(
+					iter, iop_chan, cookie);
+
+		if (iop_adma_clean_slot(iter, iop_chan))
+			break;
+	}
+
+	if (cookie > 0) {
+		iop_chan->common.completed_cookie = cookie;
+		pr_debug("\tcompleted cookie %d\n", cookie);
+	}
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	spin_lock_bh(&iop_chan->lock);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(unsigned long data)
+{
+	struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
+
+	/* lockdep will flag depedency submissions as potentially
+	 * recursive locking, this is not the case as a dependency
+	 * submission will never recurse a channels submit routine.
+	 * There are checks in async_tx.c to prevent this.
+	 */
+	spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+			int slots_per_op)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+	LIST_HEAD(chain);
+	int slots_found, retry = 0;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = iop_chan->last_used;
+	else
+		iter = list_entry(&iop_chan->all_slots,
+			struct iop_adma_desc_slot,
+			slot_node);
+
+	list_for_each_entry_safe_continue(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			/* give up after finding the first busy slot
+			 * on the second pass through the list
+			 */
+			if (retry)
+				break;
+
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++) {
+			if (iop_desc_is_aligned(iter, slots_per_op))
+				alloc_start = iter;
+			else {
+				slots_found = 0;
+				continue;
+			}
+		}
+
+		if (slots_found == num_slots) {
+			struct iop_adma_desc_slot *alloc_tail = NULL;
+			struct iop_adma_desc_slot *last_used = NULL;
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+				dev_dbg(iop_chan->device->common.dev,
+					"allocated slot: %d "
+					"(desc %p phys: %#x) slots_per_op %d\n",
+					iter->idx, iter->hw_desc,
+					iter->async_tx.phys, slots_per_op);
+
+				/* pre-ack all but the last descriptor */
+				if (num_slots != slots_per_op)
+					async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct iop_adma_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->tx_list);
+			iop_chan->last_used = last_used;
+			iop_desc_clear_next_desc(alloc_start);
+			iop_desc_clear_next_desc(alloc_tail);
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* perform direct reclaim if the allocation fails */
+	__iop_adma_slot_cleanup(iop_chan);
+
+	return NULL;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+		iop_chan->pending);
+
+	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+	struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+	int slot_cnt;
+	int slots_per_op;
+	dma_cookie_t cookie;
+	dma_addr_t next_dma;
+
+	grp_start = sw_desc->group_head;
+	slot_cnt = grp_start->slot_cnt;
+	slots_per_op = grp_start->slots_per_op;
+
+	spin_lock_bh(&iop_chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	old_chain_tail = list_entry(iop_chan->chain.prev,
+		struct iop_adma_desc_slot, chain_node);
+	list_splice_init(&sw_desc->tx_list,
+			 &old_chain_tail->chain_node);
+
+	/* fix up the hardware chain */
+	next_dma = grp_start->async_tx.phys;
+	iop_desc_set_next_desc(old_chain_tail, next_dma);
+	BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+	/* check for pre-chained descriptors */
+	iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+	/* increment the pending count by the number of slots
+	 * memcpy operations have a 1:1 (slot:operation) relation
+	 * other operations are heavier and will pop the threshold
+	 * more often.
+	 */
+	iop_chan->pending += slot_cnt;
+	iop_adma_check_threshold(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+
+	dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+		__func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+	return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+	char *hw_desc;
+	int idx;
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *slot = NULL;
+	int init = iop_chan->slots_allocated ? 0 : 1;
+	struct iop_adma_platform_data *plat_data =
+		dev_get_platdata(&iop_chan->device->pdev->dev);
+	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	do {
+		idx = iop_chan->slots_allocated;
+		if (idx == num_descs_in_pool)
+			break;
+
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "IOP ADMA Channel only initialized"
+				" %d descriptor slots", idx);
+			break;
+		}
+		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = iop_adma_tx_submit;
+		INIT_LIST_HEAD(&slot->tx_list);
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		hw_desc = (char *) iop_chan->device->dma_desc_pool;
+		slot->async_tx.phys =
+			(dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+		slot->idx = idx;
+
+		spin_lock_bh(&iop_chan->lock);
+		iop_chan->slots_allocated++;
+		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+		spin_unlock_bh(&iop_chan->lock);
+	} while (iop_chan->slots_allocated < num_descs_in_pool);
+
+	if (idx && !iop_chan->last_used)
+		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+					struct iop_adma_desc_slot,
+					slot_node);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"allocated %d descriptor slots last_used: %p\n",
+		iop_chan->slots_allocated, iop_chan->last_used);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		if (dma_has_cap(DMA_MEMCPY,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_memcpy(iop_chan);
+		else if (dma_has_cap(DMA_XOR,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_xor(iop_chan);
+		else
+			BUG();
+	}
+
+	return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_interrupt(grp_start, iop_chan);
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+		__func__, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_memcpy(grp_start, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+		      unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__func__, src_cnt, len, flags);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_xor(grp_start, src_cnt, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_xor_src_addr(grp_start, src_cnt,
+						  dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
+			  unsigned int src_cnt, size_t len, u32 *result,
+			  unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+		__func__, src_cnt, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+		iop_desc_set_zero_sum_byte_count(grp_start, len);
+		grp_start->xor_check_result = result;
+		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+			__func__, grp_start->xor_check_result);
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+						       dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		     unsigned int src_cnt, const unsigned char *scf, size_t len,
+		     unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *g;
+	int slot_cnt, slots_per_op;
+	int continue_srcs;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__func__, src_cnt, len, flags);
+
+	if (dmaf_p_disabled_continue(flags))
+		continue_srcs = 1+src_cnt;
+	else if (dmaf_continue(flags))
+		continue_srcs = 3+src_cnt;
+	else
+		continue_srcs = 0+src_cnt;
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		int i;
+
+		g = sw_desc->group_head;
+		iop_desc_set_byte_count(g, iop_chan, len);
+
+		/* even if P is disabled its destination address (bits
+		 * [3:0]) must match Q.  It is ok if P points to an
+		 * invalid address, it won't be written.
+		 */
+		if (flags & DMA_PREP_PQ_DISABLE_P)
+			dst[0] = dst[1] & 0x7;
+
+		iop_desc_set_pq_addr(g, dst);
+		sw_desc->async_tx.flags = flags;
+		for (i = 0; i < src_cnt; i++)
+			iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
+
+		/* if we are continuing a previous operation factor in
+		 * the old p and q values, see the comment for dma_maxpq
+		 * in include/linux/dmaengine.h
+		 */
+		if (dmaf_p_disabled_continue(flags))
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+		else if (dmaf_continue(flags)) {
+			iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
+			iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
+		}
+		iop_desc_init_pq(g, i, flags);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+			 unsigned int src_cnt, const unsigned char *scf,
+			 size_t len, enum sum_check_flags *pqres,
+			 unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *g;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+		__func__, src_cnt, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		/* for validate operations p and q are tagged onto the
+		 * end of the source list
+		 */
+		int pq_idx = src_cnt;
+
+		g = sw_desc->group_head;
+		iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
+		iop_desc_set_pq_zero_sum_byte_count(g, len);
+		g->pq_check_result = pqres;
+		pr_debug("\t%s: g->pq_check_result: %p\n",
+			__func__, g->pq_check_result);
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
+							  src[src_cnt],
+							  scf[src_cnt]);
+		iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	spin_lock_bh(&iop_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		iop_chan->slots_allocated--;
+	}
+	iop_chan->last_used = NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+		__func__, iop_chan->slots_allocated);
+	spin_unlock_bh(&iop_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * iop_adma_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel or NULL
+ */
+static enum dma_status iop_adma_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	int ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eoc_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+	unsigned long status = iop_chan_get_status(chan);
+
+	dev_err(chan->device->common.dev,
+		"error ( %s%s%s%s%s%s%s)\n",
+		iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+		iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+		iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+		iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+		iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+		iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+		iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+	iop_adma_device_clear_err_status(chan);
+
+	BUG();
+
+	return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+	if (iop_chan->pending) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+	int i;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dest_dma = dma_map_single(dma_chan->device->dev, dest,
+				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	src_dma = dma_map_single(dma_chan->device->dev, src,
+				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				      IOP_ADMA_TEST_SIZE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(1);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+			DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+		dev_err(dma_chan->device->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int
+iop_adma_xor_val_self_test(struct iop_adma_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 zero_sum_result;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				   IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+				   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_err(dma_chan->device->dev,
+				"Self-test xor failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_TO_DEVICE);
+
+	/* skip zero sum if the capability is not present */
+	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	/* zero sum the sources with the destintation page */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		zero_sum_srcs[i] = xor_srcs[i];
+	zero_sum_srcs[i] = dest;
+
+	zero_sum_result = 1;
+
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+				       &zero_sum_result,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		dev_err(dma_chan->device->dev,
+			"Self-test zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test for non-zero parity sum */
+	zero_sum_result = 0;
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
+				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+				       &zero_sum_result,
+				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test non-zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 1) {
+		dev_err(dma_chan->device->dev,
+			"Self-test non-zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	src_idx = IOP_ADMA_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+#ifdef CONFIG_RAID6_PQ
+static int
+iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
+{
+	/* combined sources, software pq results, and extra hw pq results */
+	struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
+	/* ptr to the extra hw pq buffers defined above */
+	struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
+	/* address conversion buffers (dma_map / page_address) */
+	void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
+	dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2];
+	dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST];
+
+	int i;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u32 zero_sum_result;
+	int err = 0;
+	struct device *dev;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	for (i = 0; i < ARRAY_SIZE(pq); i++) {
+		pq[i] = alloc_page(GFP_KERNEL);
+		if (!pq[i]) {
+			while (i--)
+				__free_page(pq[i]);
+			return -ENOMEM;
+		}
+	}
+
+	/* Fill in src buffers */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
+		pq_sw[i] = page_address(pq[i]);
+		memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
+	}
+	pq_sw[i] = page_address(pq[i]);
+	pq_sw[i+1] = page_address(pq[i+1]);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dev = dma_chan->device->dev;
+
+	/* initialize the dests */
+	memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
+	memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
+
+	/* test pq */
+	pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
+				  IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
+				  PAGE_SIZE,
+				  DMA_PREP_INTERRUPT |
+				  DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_COMPLETE) {
+		dev_err(dev, "Self-test pq timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
+
+	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
+		   page_address(pq_hw[0]), PAGE_SIZE) != 0) {
+		dev_err(dev, "Self-test p failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
+		   page_address(pq_hw[1]), PAGE_SIZE) != 0) {
+		dev_err(dev, "Self-test q failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test correct zero sum using the software generated pq values */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	zero_sum_result = ~0;
+	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+				      pq_src, IOP_ADMA_NUM_SRC_TEST,
+				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_COMPLETE) {
+		dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
+			zero_sum_result);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test incorrect zero sum */
+	i = IOP_ADMA_NUM_SRC_TEST;
+	memset(pq_sw[i] + 100, 0, 100);
+	memset(pq_sw[i+1] + 200, 0, 200);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
+		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
+					 DMA_TO_DEVICE);
+
+	zero_sum_result = 0;
+	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
+				      pq_src, IOP_ADMA_NUM_SRC_TEST,
+				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
+				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_status(dma_chan, cookie, NULL) !=
+		DMA_COMPLETE) {
+		dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
+		dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
+			zero_sum_result);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	i = ARRAY_SIZE(pq);
+	while (i--)
+		__free_page(pq[i]);
+	return err;
+}
+#endif
+
+static int iop_adma_remove(struct platform_device *dev)
+{
+	struct iop_adma_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct iop_adma_chan *iop_chan;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev);
+
+	dma_async_device_unregister(&device->common);
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		iop_chan = to_iop_adma_chan(chan);
+		list_del(&chan->device_node);
+		kfree(iop_chan);
+	}
+	kfree(device);
+
+	return 0;
+}
+
+static int iop_adma_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int ret = 0, i;
+	struct iop_adma_device *adev;
+	struct iop_adma_chan *iop_chan;
+	struct dma_device *dma_dev;
+	struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				resource_size(res), pdev->name))
+		return -EBUSY;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	dma_dev = &adev->common;
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
+						plat_data->pool_size,
+						&adev->dma_desc_pool,
+						GFP_KERNEL);
+	if (!adev->dma_desc_pool_virt) {
+		ret = -ENOMEM;
+		goto err_free_adev;
+	}
+
+	dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %p\n",
+		__func__, adev->dma_desc_pool_virt,
+		(void *) adev->dma_desc_pool);
+
+	adev->id = plat_data->hw_id;
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = plat_data->cap_mask;
+
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+	dma_dev->device_tx_status = iop_adma_status;
+	dma_dev->device_issue_pending = iop_adma_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = iop_adma_get_max_xor();
+		dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+	}
+	if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_xor_val =
+			iop_adma_prep_dma_xor_val;
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
+		dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
+	}
+	if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_pq_val =
+			iop_adma_prep_dma_pq_val;
+	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_interrupt =
+			iop_adma_prep_dma_interrupt;
+
+	iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+	if (!iop_chan) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	iop_chan->device = adev;
+
+	iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+					resource_size(res));
+	if (!iop_chan->mmr_base) {
+		ret = -ENOMEM;
+		goto err_free_iop_chan;
+	}
+	tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
+		iop_chan);
+
+	/* clear errors before enabling interrupts */
+	iop_adma_device_clear_err_status(iop_chan);
+
+	for (i = 0; i < 3; i++) {
+		irq_handler_t handler[] = { iop_adma_eot_handler,
+					iop_adma_eoc_handler,
+					iop_adma_err_handler };
+		int irq = platform_get_irq(pdev, i);
+		if (irq < 0) {
+			ret = -ENXIO;
+			goto err_free_iop_chan;
+		} else {
+			ret = devm_request_irq(&pdev->dev, irq,
+					handler[i], 0, pdev->name, iop_chan);
+			if (ret)
+				goto err_free_iop_chan;
+		}
+	}
+
+	spin_lock_init(&iop_chan->lock);
+	INIT_LIST_HEAD(&iop_chan->chain);
+	INIT_LIST_HEAD(&iop_chan->all_slots);
+	iop_chan->common.device = dma_dev;
+	dma_cookie_init(&iop_chan->common);
+	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = iop_adma_memcpy_self_test(adev);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		ret = iop_adma_xor_val_self_test(adev);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
+	    dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
+		#ifdef CONFIG_RAID6_PQ
+		ret = iop_adma_pq_zero_sum_self_test(adev);
+		dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
+		#else
+		/* can not test raid6, so do not publish capability */
+		dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
+		dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
+		ret = 0;
+		#endif
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n",
+		 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
+		 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
+		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+		 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
+		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	goto out;
+
+ err_free_iop_chan:
+	kfree(iop_chan);
+ err_free_dma:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+	kfree(adev);
+ out:
+	return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+
+		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_memcpy(grp_start, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_err(iop_chan->device->common.dev,
+			"failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_null_xor(grp_start, 2, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_xor_src_addr(grp_start, 0, 0);
+		iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_err(iop_chan->device->common.dev,
+			"failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static struct platform_driver iop_adma_driver = {
+	.probe		= iop_adma_probe,
+	.remove		= iop_adma_remove,
+	.driver		= {
+		.name	= "iop-adma",
+	},
+};
+
+module_platform_driver(iop_adma_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:iop-adma");
diff --git a/drivers/dma/ipu/Makefile b/drivers/dma/ipu/Makefile
new file mode 100644
index 0000000..6704cf4
--- /dev/null
+++ b/drivers/dma/ipu/Makefile
@@ -0,0 +1 @@
+obj-y	+= ipu_irq.o ipu_idmac.o
diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c
new file mode 100644
index 0000000..bbff52b
--- /dev/null
+++ b/drivers/dma/ipu/ipu_idmac.c
@@ -0,0 +1,1804 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/clk.h>
+#include <linux/vmalloc.h>
+#include <linux/string.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma/ipu-dma.h>
+
+#include "../dmaengine.h"
+#include "ipu_intern.h"
+
+#define FS_VF_IN_VALID	0x00000002
+#define FS_ENC_IN_VALID	0x00000001
+
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+			       bool wait_for_stop);
+
+/*
+ * There can be only one, we could allocate it dynamically, but then we'd have
+ * to add an extra parameter to some functions, and use something as ugly as
+ *	struct ipu *ipu = to_ipu(to_idmac(ichan->dma_chan.device));
+ * in the ISR
+ */
+static struct ipu ipu_data;
+
+#define to_ipu(id) container_of(id, struct ipu, idmac)
+
+static u32 __idmac_read_icreg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ic + reg);
+}
+
+#define idmac_read_icreg(ipu, reg) __idmac_read_icreg(ipu, reg - IC_CONF)
+
+static void __idmac_write_icreg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ic + reg);
+}
+
+#define idmac_write_icreg(ipu, v, reg) __idmac_write_icreg(ipu, v, reg - IC_CONF)
+
+static u32 idmac_read_ipureg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void idmac_write_ipureg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ipu + reg);
+}
+
+/*****************************************************************************
+ * IPU / IC common functions
+ */
+static void dump_idmac_reg(struct ipu *ipu)
+{
+	dev_dbg(ipu->dev, "IDMAC_CONF 0x%x, IC_CONF 0x%x, IDMAC_CHA_EN 0x%x, "
+		"IDMAC_CHA_PRI 0x%x, IDMAC_CHA_BUSY 0x%x\n",
+		idmac_read_icreg(ipu, IDMAC_CONF),
+		idmac_read_icreg(ipu, IC_CONF),
+		idmac_read_icreg(ipu, IDMAC_CHA_EN),
+		idmac_read_icreg(ipu, IDMAC_CHA_PRI),
+		idmac_read_icreg(ipu, IDMAC_CHA_BUSY));
+	dev_dbg(ipu->dev, "BUF0_RDY 0x%x, BUF1_RDY 0x%x, CUR_BUF 0x%x, "
+		"DB_MODE 0x%x, TASKS_STAT 0x%x\n",
+		idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+		idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+		idmac_read_ipureg(ipu, IPU_CHA_CUR_BUF),
+		idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL),
+		idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+}
+
+static uint32_t bytes_per_pixel(enum pixel_fmt fmt)
+{
+	switch (fmt) {
+	case IPU_PIX_FMT_GENERIC:	/* generic data */
+	case IPU_PIX_FMT_RGB332:
+	case IPU_PIX_FMT_YUV420P:
+	case IPU_PIX_FMT_YUV422P:
+	default:
+		return 1;
+	case IPU_PIX_FMT_RGB565:
+	case IPU_PIX_FMT_YUYV:
+	case IPU_PIX_FMT_UYVY:
+		return 2;
+	case IPU_PIX_FMT_BGR24:
+	case IPU_PIX_FMT_RGB24:
+		return 3;
+	case IPU_PIX_FMT_GENERIC_32:	/* generic data */
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_RGB32:
+	case IPU_PIX_FMT_ABGR32:
+		return 4;
+	}
+}
+
+/* Enable direct write to memory by the Camera Sensor Interface */
+static void ipu_ic_enable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t ic_conf, mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+		mask = IC_CONF_PRPENC_EN;
+		break;
+	case IDMAC_IC_7:
+		mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+		break;
+	default:
+		return;
+	}
+	ic_conf = idmac_read_icreg(ipu, IC_CONF) | mask;
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+/* Called under spin_lock_irqsave(&ipu_data.lock) */
+static void ipu_ic_disable_task(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t ic_conf, mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+		mask = IC_CONF_PRPENC_EN;
+		break;
+	case IDMAC_IC_7:
+		mask = IC_CONF_RWS_EN | IC_CONF_PRPENC_EN;
+		break;
+	default:
+		return;
+	}
+	ic_conf = idmac_read_icreg(ipu, IC_CONF) & ~mask;
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+}
+
+static uint32_t ipu_channel_status(struct ipu *ipu, enum ipu_channel channel)
+{
+	uint32_t stat = TASK_STAT_IDLE;
+	uint32_t task_stat_reg = idmac_read_ipureg(ipu, IPU_TASKS_STAT);
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		stat = (task_stat_reg & TSTAT_CSI2MEM_MASK) >>
+			TSTAT_CSI2MEM_OFFSET;
+		break;
+	case IDMAC_IC_0:
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	default:
+		break;
+	}
+	return stat;
+}
+
+struct chan_param_mem_planar {
+	/* Word 0 */
+	u32	xv:10;
+	u32	yv:10;
+	u32	xb:12;
+
+	u32	yb:12;
+	u32	res1:2;
+	u32	nsb:1;
+	u32	lnpb:6;
+	u32	ubo_l:11;
+
+	u32	ubo_h:15;
+	u32	vbo_l:17;
+
+	u32	vbo_h:9;
+	u32	res2:3;
+	u32	fw:12;
+	u32	fh_l:8;
+
+	u32	fh_h:4;
+	u32	res3:28;
+
+	/* Word 1 */
+	u32	eba0;
+
+	u32	eba1;
+
+	u32	bpp:3;
+	u32	sl:14;
+	u32	pfs:3;
+	u32	bam:3;
+	u32	res4:2;
+	u32	npb:6;
+	u32	res5:1;
+
+	u32	sat:2;
+	u32	res6:30;
+} __attribute__ ((packed));
+
+struct chan_param_mem_interleaved {
+	/* Word 0 */
+	u32	xv:10;
+	u32	yv:10;
+	u32	xb:12;
+
+	u32	yb:12;
+	u32	sce:1;
+	u32	res1:1;
+	u32	nsb:1;
+	u32	lnpb:6;
+	u32	sx:10;
+	u32	sy_l:1;
+
+	u32	sy_h:9;
+	u32	ns:10;
+	u32	sm:10;
+	u32	sdx_l:3;
+
+	u32	sdx_h:2;
+	u32	sdy:5;
+	u32	sdrx:1;
+	u32	sdry:1;
+	u32	sdr1:1;
+	u32	res2:2;
+	u32	fw:12;
+	u32	fh_l:8;
+
+	u32	fh_h:4;
+	u32	res3:28;
+
+	/* Word 1 */
+	u32	eba0;
+
+	u32	eba1;
+
+	u32	bpp:3;
+	u32	sl:14;
+	u32	pfs:3;
+	u32	bam:3;
+	u32	res4:2;
+	u32	npb:6;
+	u32	res5:1;
+
+	u32	sat:2;
+	u32	scc:1;
+	u32	ofs0:5;
+	u32	ofs1:5;
+	u32	ofs2:5;
+	u32	ofs3:5;
+	u32	wid0:3;
+	u32	wid1:3;
+	u32	wid2:3;
+
+	u32	wid3:3;
+	u32	dec_sel:1;
+	u32	res6:28;
+} __attribute__ ((packed));
+
+union chan_param_mem {
+	struct chan_param_mem_planar		pp;
+	struct chan_param_mem_interleaved	ip;
+};
+
+static void ipu_ch_param_set_plane_offset(union chan_param_mem *params,
+					  u32 u_offset, u32 v_offset)
+{
+	params->pp.ubo_l = u_offset & 0x7ff;
+	params->pp.ubo_h = u_offset >> 11;
+	params->pp.vbo_l = v_offset & 0x1ffff;
+	params->pp.vbo_h = v_offset >> 17;
+}
+
+static void ipu_ch_param_set_size(union chan_param_mem *params,
+				  uint32_t pixel_fmt, uint16_t width,
+				  uint16_t height, uint16_t stride)
+{
+	u32 u_offset;
+	u32 v_offset;
+
+	params->pp.fw		= width - 1;
+	params->pp.fh_l		= height - 1;
+	params->pp.fh_h		= (height - 1) >> 8;
+	params->pp.sl		= stride - 1;
+
+	switch (pixel_fmt) {
+	case IPU_PIX_FMT_GENERIC:
+		/*Represents 8-bit Generic data */
+		params->pp.bpp	= 3;
+		params->pp.pfs	= 7;
+		params->pp.npb	= 31;
+		params->pp.sat	= 2;		/* SAT = use 32-bit access */
+		break;
+	case IPU_PIX_FMT_GENERIC_32:
+		/*Represents 32-bit Generic data */
+		params->pp.bpp	= 0;
+		params->pp.pfs	= 7;
+		params->pp.npb	= 7;
+		params->pp.sat	= 2;		/* SAT = use 32-bit access */
+		break;
+	case IPU_PIX_FMT_RGB565:
+		params->ip.bpp	= 2;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 15;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 0;		/* Red bit offset */
+		params->ip.ofs1	= 5;		/* Green bit offset */
+		params->ip.ofs2	= 11;		/* Blue bit offset */
+		params->ip.ofs3	= 16;		/* Alpha bit offset */
+		params->ip.wid0	= 4;		/* Red bit width - 1 */
+		params->ip.wid1	= 5;		/* Green bit width - 1 */
+		params->ip.wid2	= 4;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_BGR24:
+		params->ip.bpp	= 1;		/* 24 BPP & RGB PFS */
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 0;		/* Red bit offset */
+		params->ip.ofs1	= 8;		/* Green bit offset */
+		params->ip.ofs2	= 16;		/* Blue bit offset */
+		params->ip.ofs3	= 24;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_RGB24:
+		params->ip.bpp	= 1;		/* 24 BPP & RGB PFS */
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 16;		/* Red bit offset */
+		params->ip.ofs1	= 8;		/* Green bit offset */
+		params->ip.ofs2	= 0;		/* Blue bit offset */
+		params->ip.ofs3	= 24;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		break;
+	case IPU_PIX_FMT_BGRA32:
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_ABGR32:
+		params->ip.bpp	= 0;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 8;		/* Red bit offset */
+		params->ip.ofs1	= 16;		/* Green bit offset */
+		params->ip.ofs2	= 24;		/* Blue bit offset */
+		params->ip.ofs3	= 0;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
+		break;
+	case IPU_PIX_FMT_RGBA32:
+	case IPU_PIX_FMT_RGB32:
+		params->ip.bpp	= 0;
+		params->ip.pfs	= 4;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		params->ip.ofs0	= 24;		/* Red bit offset */
+		params->ip.ofs1	= 16;		/* Green bit offset */
+		params->ip.ofs2	= 8;		/* Blue bit offset */
+		params->ip.ofs3	= 0;		/* Alpha bit offset */
+		params->ip.wid0	= 7;		/* Red bit width - 1 */
+		params->ip.wid1	= 7;		/* Green bit width - 1 */
+		params->ip.wid2	= 7;		/* Blue bit width - 1 */
+		params->ip.wid3	= 7;		/* Alpha bit width - 1 */
+		break;
+	case IPU_PIX_FMT_UYVY:
+		params->ip.bpp	= 2;
+		params->ip.pfs	= 6;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		break;
+	case IPU_PIX_FMT_YUV420P2:
+	case IPU_PIX_FMT_YUV420P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 3;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		u_offset = stride * height;
+		v_offset = u_offset + u_offset / 4;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	case IPU_PIX_FMT_YVU422P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 2;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		v_offset = stride * height;
+		u_offset = v_offset + v_offset / 2;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	case IPU_PIX_FMT_YUV422P:
+		params->ip.bpp	= 3;
+		params->ip.pfs	= 2;
+		params->ip.npb	= 7;
+		params->ip.sat	= 2;		/* SAT = 32-bit access */
+		u_offset = stride * height;
+		v_offset = u_offset + u_offset / 2;
+		ipu_ch_param_set_plane_offset(params, u_offset, v_offset);
+		break;
+	default:
+		dev_err(ipu_data.dev,
+			"mx3 ipu: unimplemented pixel format %d\n", pixel_fmt);
+		break;
+	}
+
+	params->pp.nsb = 1;
+}
+
+static void ipu_ch_param_set_buffer(union chan_param_mem *params,
+				    dma_addr_t buf0, dma_addr_t buf1)
+{
+	params->pp.eba0 = buf0;
+	params->pp.eba1 = buf1;
+}
+
+static void ipu_ch_param_set_rotation(union chan_param_mem *params,
+				      enum ipu_rotate_mode rotate)
+{
+	params->pp.bam = rotate;
+}
+
+static void ipu_write_param_mem(uint32_t addr, uint32_t *data,
+				uint32_t num_words)
+{
+	for (; num_words > 0; num_words--) {
+		dev_dbg(ipu_data.dev,
+			"write param mem - addr = 0x%08X, data = 0x%08X\n",
+			addr, *data);
+		idmac_write_ipureg(&ipu_data, addr, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, *data++, IPU_IMA_DATA);
+		addr++;
+		if ((addr & 0x7) == 5) {
+			addr &= ~0x7;	/* set to word 0 */
+			addr += 8;	/* increment to next row */
+		}
+	}
+}
+
+static int calc_resize_coeffs(uint32_t in_size, uint32_t out_size,
+			      uint32_t *resize_coeff,
+			      uint32_t *downsize_coeff)
+{
+	uint32_t temp_size;
+	uint32_t temp_downsize;
+
+	*resize_coeff	= 1 << 13;
+	*downsize_coeff	= 1 << 13;
+
+	/* Cannot downsize more than 8:1 */
+	if (out_size << 3 < in_size)
+		return -EINVAL;
+
+	/* compute downsizing coefficient */
+	temp_downsize = 0;
+	temp_size = in_size;
+	while (temp_size >= out_size * 2 && temp_downsize < 2) {
+		temp_size >>= 1;
+		temp_downsize++;
+	}
+	*downsize_coeff = temp_downsize;
+
+	/*
+	 * compute resizing coefficient using the following formula:
+	 * resize_coeff = M*(SI -1)/(SO - 1)
+	 * where M = 2^13, SI - input size, SO - output size
+	 */
+	*resize_coeff = (8192L * (temp_size - 1)) / (out_size - 1);
+	if (*resize_coeff >= 16384L) {
+		dev_err(ipu_data.dev, "Warning! Overflow on resize coeff.\n");
+		*resize_coeff = 0x3FFF;
+	}
+
+	dev_dbg(ipu_data.dev, "resizing from %u -> %u pixels, "
+		"downsize=%u, resize=%u.%lu (reg=%u)\n", in_size, out_size,
+		*downsize_coeff, *resize_coeff >= 8192L ? 1 : 0,
+		((*resize_coeff & 0x1FFF) * 10000L) / 8192L, *resize_coeff);
+
+	return 0;
+}
+
+static enum ipu_color_space format_to_colorspace(enum pixel_fmt fmt)
+{
+	switch (fmt) {
+	case IPU_PIX_FMT_RGB565:
+	case IPU_PIX_FMT_BGR24:
+	case IPU_PIX_FMT_RGB24:
+	case IPU_PIX_FMT_BGR32:
+	case IPU_PIX_FMT_RGB32:
+		return IPU_COLORSPACE_RGB;
+	default:
+		return IPU_COLORSPACE_YCBCR;
+	}
+}
+
+static int ipu_ic_init_prpenc(struct ipu *ipu,
+			      union ipu_channel_param *params, bool src_is_csi)
+{
+	uint32_t reg, ic_conf;
+	uint32_t downsize_coeff, resize_coeff;
+	enum ipu_color_space in_fmt, out_fmt;
+
+	/* Setup vertical resizing */
+	calc_resize_coeffs(params->video.in_height,
+			    params->video.out_height,
+			    &resize_coeff, &downsize_coeff);
+	reg = (downsize_coeff << 30) | (resize_coeff << 16);
+
+	/* Setup horizontal resizing */
+	calc_resize_coeffs(params->video.in_width,
+			    params->video.out_width,
+			    &resize_coeff, &downsize_coeff);
+	reg |= (downsize_coeff << 14) | resize_coeff;
+
+	/* Setup color space conversion */
+	in_fmt = format_to_colorspace(params->video.in_pixel_fmt);
+	out_fmt = format_to_colorspace(params->video.out_pixel_fmt);
+
+	/*
+	 * Colourspace conversion unsupported yet - see _init_csc() in
+	 * Freescale sources
+	 */
+	if (in_fmt != out_fmt) {
+		dev_err(ipu->dev, "Colourspace conversion unsupported!\n");
+		return -EOPNOTSUPP;
+	}
+
+	idmac_write_icreg(ipu, reg, IC_PRP_ENC_RSC);
+
+	ic_conf = idmac_read_icreg(ipu, IC_CONF);
+
+	if (src_is_csi)
+		ic_conf &= ~IC_CONF_RWS_EN;
+	else
+		ic_conf |= IC_CONF_RWS_EN;
+
+	idmac_write_icreg(ipu, ic_conf, IC_CONF);
+
+	return 0;
+}
+
+static uint32_t dma_param_addr(uint32_t dma_ch)
+{
+	/* Channel Parameter Memory */
+	return 0x10000 | (dma_ch << 4);
+}
+
+static void ipu_channel_set_priority(struct ipu *ipu, enum ipu_channel channel,
+				     bool prio)
+{
+	u32 reg = idmac_read_icreg(ipu, IDMAC_CHA_PRI);
+
+	if (prio)
+		reg |= 1UL << channel;
+	else
+		reg &= ~(1UL << channel);
+
+	idmac_write_icreg(ipu, reg, IDMAC_CHA_PRI);
+
+	dump_idmac_reg(ipu);
+}
+
+static uint32_t ipu_channel_conf_mask(enum ipu_channel channel)
+{
+	uint32_t mask;
+
+	switch (channel) {
+	case IDMAC_IC_0:
+	case IDMAC_IC_7:
+		mask = IPU_CONF_CSI_EN | IPU_CONF_IC_EN;
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+		mask = IPU_CONF_SDC_EN | IPU_CONF_DI_EN;
+		break;
+	default:
+		mask = 0;
+		break;
+	}
+
+	return mask;
+}
+
+/**
+ * ipu_enable_channel() - enable an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	IDMAC channel.
+ * @return:	0 on success or negative error code on failure.
+ */
+static int ipu_enable_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	struct ipu *ipu = to_ipu(idmac);
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	uint32_t reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Reset to buffer 0 */
+	idmac_write_ipureg(ipu, 1UL << channel, IPU_CHA_CUR_BUF);
+	ichan->active_buffer = 0;
+	ichan->status = IPU_CHANNEL_ENABLED;
+
+	switch (channel) {
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	case IDMAC_IC_7:
+		ipu_channel_set_priority(ipu, channel, true);
+	default:
+		break;
+	}
+
+	reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+
+	idmac_write_icreg(ipu, reg | (1UL << channel), IDMAC_CHA_EN);
+
+	ipu_ic_enable_task(ipu, channel);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+	return 0;
+}
+
+/**
+ * ipu_init_channel_buffer() - initialize a buffer for logical IPU channel.
+ * @ichan:	IDMAC channel.
+ * @pixel_fmt:	pixel format of buffer. Pixel format is a FOURCC ASCII code.
+ * @width:	width of buffer in pixels.
+ * @height:	height of buffer in pixels.
+ * @stride:	stride length of buffer in pixels.
+ * @rot_mode:	rotation mode of buffer. A rotation setting other than
+ *		IPU_ROTATE_VERT_FLIP should only be used for input buffers of
+ *		rotation channels.
+ * @phyaddr_0:	buffer 0 physical address.
+ * @phyaddr_1:	buffer 1 physical address. Setting this to a value other than
+ *		NULL enables double buffering mode.
+ * @return:	0 on success or negative error code on failure.
+ */
+static int ipu_init_channel_buffer(struct idmac_channel *ichan,
+				   enum pixel_fmt pixel_fmt,
+				   uint16_t width, uint16_t height,
+				   uint32_t stride,
+				   enum ipu_rotate_mode rot_mode,
+				   dma_addr_t phyaddr_0, dma_addr_t phyaddr_1)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+	struct ipu *ipu = to_ipu(idmac);
+	union chan_param_mem params = {};
+	unsigned long flags;
+	uint32_t reg;
+	uint32_t stride_bytes;
+
+	stride_bytes = stride * bytes_per_pixel(pixel_fmt);
+
+	if (stride_bytes % 4) {
+		dev_err(ipu->dev,
+			"Stride length must be 32-bit aligned, stride = %d, bytes = %d\n",
+			stride, stride_bytes);
+		return -EINVAL;
+	}
+
+	/* IC channel's stride must be a multiple of 8 pixels */
+	if ((channel <= IDMAC_IC_13) && (stride % 8)) {
+		dev_err(ipu->dev, "Stride must be 8 pixel multiple\n");
+		return -EINVAL;
+	}
+
+	/* Build parameter memory data for DMA channel */
+	ipu_ch_param_set_size(&params, pixel_fmt, width, height, stride_bytes);
+	ipu_ch_param_set_buffer(&params, phyaddr_0, phyaddr_1);
+	ipu_ch_param_set_rotation(&params, rot_mode);
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	ipu_write_param_mem(dma_param_addr(channel), (uint32_t *)&params, 10);
+
+	reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+
+	if (phyaddr_1)
+		reg |= 1UL << channel;
+	else
+		reg &= ~(1UL << channel);
+
+	idmac_write_ipureg(ipu, reg, IPU_CHA_DB_MODE_SEL);
+
+	ichan->status = IPU_CHANNEL_READY;
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+
+/**
+ * ipu_select_buffer() - mark a channel's buffer as ready.
+ * @channel:	channel ID.
+ * @buffer_n:	buffer number to mark ready.
+ */
+static void ipu_select_buffer(enum ipu_channel channel, int buffer_n)
+{
+	/* No locking - this is a write-one-to-set register, cleared by IPU */
+	if (buffer_n == 0)
+		/* Mark buffer 0 as ready. */
+		idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF0_RDY);
+	else
+		/* Mark buffer 1 as ready. */
+		idmac_write_ipureg(&ipu_data, 1UL << channel, IPU_CHA_BUF1_RDY);
+}
+
+/**
+ * ipu_update_channel_buffer() - update physical address of a channel buffer.
+ * @ichan:	IDMAC channel.
+ * @buffer_n:	buffer number to update.
+ *		0 or 1 are the only valid values.
+ * @phyaddr:	buffer physical address.
+ */
+/* Called under spin_lock(_irqsave)(&ichan->lock) */
+static void ipu_update_channel_buffer(struct idmac_channel *ichan,
+				      int buffer_n, dma_addr_t phyaddr)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	uint32_t reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipu_data.lock, flags);
+
+	if (buffer_n == 0) {
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+		if (reg & (1UL << channel)) {
+			ipu_ic_disable_task(&ipu_data, channel);
+			ichan->status = IPU_CHANNEL_READY;
+		}
+
+		/* 44.3.3.1.9 - Row Number 1 (WORD1, offset 0) */
+		idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+				   0x0008UL, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+	} else {
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+		if (reg & (1UL << channel)) {
+			ipu_ic_disable_task(&ipu_data, channel);
+			ichan->status = IPU_CHANNEL_READY;
+		}
+
+		/* Check if double-buffering is already enabled */
+		reg = idmac_read_ipureg(&ipu_data, IPU_CHA_DB_MODE_SEL);
+
+		if (!(reg & (1UL << channel)))
+			idmac_write_ipureg(&ipu_data, reg | (1UL << channel),
+					   IPU_CHA_DB_MODE_SEL);
+
+		/* 44.3.3.1.9 - Row Number 1 (WORD1, offset 1) */
+		idmac_write_ipureg(&ipu_data, dma_param_addr(channel) +
+				   0x0009UL, IPU_IMA_ADDR);
+		idmac_write_ipureg(&ipu_data, phyaddr, IPU_IMA_DATA);
+	}
+
+	spin_unlock_irqrestore(&ipu_data.lock, flags);
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_buffer(struct idmac_channel *ichan,
+	struct idmac_tx_desc *desc, struct scatterlist *sg, int buf_idx)
+{
+	unsigned int chan_id = ichan->dma_chan.chan_id;
+	struct device *dev = &ichan->dma_chan.dev->device;
+
+	if (async_tx_test_ack(&desc->txd))
+		return -EINTR;
+
+	/*
+	 * On first invocation this shouldn't be necessary, the call to
+	 * ipu_init_channel_buffer() above will set addresses for us, so we
+	 * could make it conditional on status >= IPU_CHANNEL_ENABLED, but
+	 * doing it again shouldn't hurt either.
+	 */
+	ipu_update_channel_buffer(ichan, buf_idx, sg_dma_address(sg));
+
+	ipu_select_buffer(chan_id, buf_idx);
+	dev_dbg(dev, "Updated sg %p on channel 0x%x buffer %d\n",
+		sg, chan_id, buf_idx);
+
+	return 0;
+}
+
+/* Called under spin_lock_irqsave(&ichan->lock) */
+static int ipu_submit_channel_buffers(struct idmac_channel *ichan,
+				      struct idmac_tx_desc *desc)
+{
+	struct scatterlist *sg;
+	int i, ret = 0;
+
+	for (i = 0, sg = desc->sg; i < 2 && sg; i++) {
+		if (!ichan->sg[i]) {
+			ichan->sg[i] = sg;
+
+			ret = ipu_submit_buffer(ichan, desc, sg, i);
+			if (ret < 0)
+				return ret;
+
+			sg = sg_next(sg);
+		}
+	}
+
+	return ret;
+}
+
+static dma_cookie_t idmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct idmac_tx_desc *desc = to_tx_desc(tx);
+	struct idmac_channel *ichan = to_idmac_chan(tx->chan);
+	struct idmac *idmac = to_idmac(tx->chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	struct device *dev = &ichan->dma_chan.dev->device;
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int ret;
+
+	/* Sanity check */
+	if (!list_empty(&desc->list)) {
+		/* The descriptor doesn't belong to client */
+		dev_err(dev, "Descriptor %p not prepared!\n", tx);
+		return -EBUSY;
+	}
+
+	mutex_lock(&ichan->chan_mutex);
+
+	async_tx_clear_ack(tx);
+
+	if (ichan->status < IPU_CHANNEL_READY) {
+		struct idmac_video_param *video = &ichan->params.video;
+		/*
+		 * Initial buffer assignment - the first two sg-entries from
+		 * the descriptor will end up in the IDMAC buffers
+		 */
+		dma_addr_t dma_1 = sg_is_last(desc->sg) ? 0 :
+			sg_dma_address(&desc->sg[1]);
+
+		WARN_ON(ichan->sg[0] || ichan->sg[1]);
+
+		cookie = ipu_init_channel_buffer(ichan,
+						 video->out_pixel_fmt,
+						 video->out_width,
+						 video->out_height,
+						 video->out_stride,
+						 IPU_ROTATE_NONE,
+						 sg_dma_address(&desc->sg[0]),
+						 dma_1);
+		if (cookie < 0)
+			goto out;
+	}
+
+	dev_dbg(dev, "Submitting sg %p\n", &desc->sg[0]);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* ipu->lock can be taken under ichan->lock, but not v.v. */
+	spin_lock_irqsave(&ichan->lock, flags);
+
+	list_add_tail(&desc->list, &ichan->queue);
+	/* submit_buffers() atomically verifies and fills empty sg slots */
+	ret = ipu_submit_channel_buffers(ichan, desc);
+
+	spin_unlock_irqrestore(&ichan->lock, flags);
+
+	if (ret < 0) {
+		cookie = ret;
+		goto dequeue;
+	}
+
+	if (ichan->status < IPU_CHANNEL_ENABLED) {
+		ret = ipu_enable_channel(idmac, ichan);
+		if (ret < 0) {
+			cookie = ret;
+			goto dequeue;
+		}
+	}
+
+	dump_idmac_reg(ipu);
+
+dequeue:
+	if (cookie < 0) {
+		spin_lock_irqsave(&ichan->lock, flags);
+		list_del_init(&desc->list);
+		spin_unlock_irqrestore(&ichan->lock, flags);
+		tx->cookie = cookie;
+		ichan->dma_chan.cookie = cookie;
+	}
+
+out:
+	mutex_unlock(&ichan->chan_mutex);
+
+	return cookie;
+}
+
+/* Called with ichan->chan_mutex held */
+static int idmac_desc_alloc(struct idmac_channel *ichan, int n)
+{
+	struct idmac_tx_desc *desc =
+		vmalloc(array_size(n, sizeof(struct idmac_tx_desc)));
+	struct idmac *idmac = to_idmac(ichan->dma_chan.device);
+
+	if (!desc)
+		return -ENOMEM;
+
+	/* No interrupts, just disable the tasklet for a moment */
+	tasklet_disable(&to_ipu(idmac)->tasklet);
+
+	ichan->n_tx_desc = n;
+	ichan->desc = desc;
+	INIT_LIST_HEAD(&ichan->queue);
+	INIT_LIST_HEAD(&ichan->free_list);
+
+	while (n--) {
+		struct dma_async_tx_descriptor *txd = &desc->txd;
+
+		memset(txd, 0, sizeof(*txd));
+		dma_async_tx_descriptor_init(txd, &ichan->dma_chan);
+		txd->tx_submit		= idmac_tx_submit;
+
+		list_add(&desc->list, &ichan->free_list);
+
+		desc++;
+	}
+
+	tasklet_enable(&to_ipu(idmac)->tasklet);
+
+	return 0;
+}
+
+/**
+ * ipu_init_channel() - initialize an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	pointer to the channel object.
+ * @return      0 on success or negative error code on failure.
+ */
+static int ipu_init_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	union ipu_channel_param *params = &ichan->params;
+	uint32_t ipu_conf;
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	unsigned long flags;
+	uint32_t reg;
+	struct ipu *ipu = to_ipu(idmac);
+	int ret = 0, n_desc = 0;
+
+	dev_dbg(ipu->dev, "init channel = %d\n", channel);
+
+	if (channel != IDMAC_SDC_0 && channel != IDMAC_SDC_1 &&
+	    channel != IDMAC_IC_7)
+		return -EINVAL;
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		n_desc = 16;
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~IC_CONF_CSI_MEM_WR_EN, IC_CONF);
+		break;
+	case IDMAC_IC_0:
+		n_desc = 16;
+		reg = idmac_read_ipureg(ipu, IPU_FS_PROC_FLOW);
+		idmac_write_ipureg(ipu, reg & ~FS_ENC_IN_VALID, IPU_FS_PROC_FLOW);
+		ret = ipu_ic_init_prpenc(ipu, params, true);
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+		n_desc = 4;
+	default:
+		break;
+	}
+
+	ipu->channel_init_mask |= 1L << channel;
+
+	/* Enable IPU sub module */
+	ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) |
+		ipu_channel_conf_mask(channel);
+	idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	if (n_desc && !ichan->desc)
+		ret = idmac_desc_alloc(ichan, n_desc);
+
+	dump_idmac_reg(ipu);
+
+	return ret;
+}
+
+/**
+ * ipu_uninit_channel() - uninitialize an IPU channel.
+ * @idmac:	IPU DMAC context.
+ * @ichan:	pointer to the channel object.
+ */
+static void ipu_uninit_channel(struct idmac *idmac, struct idmac_channel *ichan)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	unsigned long flags;
+	uint32_t reg;
+	unsigned long chan_mask = 1UL << channel;
+	uint32_t ipu_conf;
+	struct ipu *ipu = to_ipu(idmac);
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	if (!(ipu->channel_init_mask & chan_mask)) {
+		dev_err(ipu->dev, "Channel already uninitialized %d\n",
+			channel);
+		spin_unlock_irqrestore(&ipu->lock, flags);
+		return;
+	}
+
+	/* Reset the double buffer */
+	reg = idmac_read_ipureg(ipu, IPU_CHA_DB_MODE_SEL);
+	idmac_write_ipureg(ipu, reg & ~chan_mask, IPU_CHA_DB_MODE_SEL);
+
+	ichan->sec_chan_en = false;
+
+	switch (channel) {
+	case IDMAC_IC_7:
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~(IC_CONF_RWS_EN | IC_CONF_PRPENC_EN),
+			     IC_CONF);
+		break;
+	case IDMAC_IC_0:
+		reg = idmac_read_icreg(ipu, IC_CONF);
+		idmac_write_icreg(ipu, reg & ~(IC_CONF_PRPENC_EN | IC_CONF_PRPENC_CSC1),
+				  IC_CONF);
+		break;
+	case IDMAC_SDC_0:
+	case IDMAC_SDC_1:
+	default:
+		break;
+	}
+
+	ipu->channel_init_mask &= ~(1L << channel);
+
+	ipu_conf = idmac_read_ipureg(ipu, IPU_CONF) &
+		~ipu_channel_conf_mask(channel);
+	idmac_write_ipureg(ipu, ipu_conf, IPU_CONF);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	ichan->n_tx_desc = 0;
+	vfree(ichan->desc);
+	ichan->desc = NULL;
+}
+
+/**
+ * ipu_disable_channel() - disable an IPU channel.
+ * @idmac:		IPU DMAC context.
+ * @ichan:		channel object pointer.
+ * @wait_for_stop:	flag to set whether to wait for channel end of frame or
+ *			return immediately.
+ * @return:		0 on success or negative error code on failure.
+ */
+static int ipu_disable_channel(struct idmac *idmac, struct idmac_channel *ichan,
+			       bool wait_for_stop)
+{
+	enum ipu_channel channel = ichan->dma_chan.chan_id;
+	struct ipu *ipu = to_ipu(idmac);
+	uint32_t reg;
+	unsigned long flags;
+	unsigned long chan_mask = 1UL << channel;
+	unsigned int timeout;
+
+	if (wait_for_stop && channel != IDMAC_SDC_1 && channel != IDMAC_SDC_0) {
+		timeout = 40;
+		/* This waiting always fails. Related to spurious irq problem */
+		while ((idmac_read_icreg(ipu, IDMAC_CHA_BUSY) & chan_mask) ||
+		       (ipu_channel_status(ipu, channel) == TASK_STAT_ACTIVE)) {
+			timeout--;
+			msleep(10);
+
+			if (!timeout) {
+				dev_dbg(ipu->dev,
+					"Warning: timeout waiting for channel %u to "
+					"stop: buf0_rdy = 0x%08X, buf1_rdy = 0x%08X, "
+					"busy = 0x%08X, tstat = 0x%08X\n", channel,
+					idmac_read_ipureg(ipu, IPU_CHA_BUF0_RDY),
+					idmac_read_ipureg(ipu, IPU_CHA_BUF1_RDY),
+					idmac_read_icreg(ipu, IDMAC_CHA_BUSY),
+					idmac_read_ipureg(ipu, IPU_TASKS_STAT));
+				break;
+			}
+		}
+		dev_dbg(ipu->dev, "timeout = %d * 10ms\n", 40 - timeout);
+	}
+	/* SDC BG and FG must be disabled before DMA is disabled */
+	if (wait_for_stop && (channel == IDMAC_SDC_0 ||
+			      channel == IDMAC_SDC_1)) {
+		for (timeout = 5;
+		     timeout && !ipu_irq_status(ichan->eof_irq); timeout--)
+			msleep(5);
+	}
+
+	spin_lock_irqsave(&ipu->lock, flags);
+
+	/* Disable IC task */
+	ipu_ic_disable_task(ipu, channel);
+
+	/* Disable DMA channel(s) */
+	reg = idmac_read_icreg(ipu, IDMAC_CHA_EN);
+	idmac_write_icreg(ipu, reg & ~chan_mask, IDMAC_CHA_EN);
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	return 0;
+}
+
+static struct scatterlist *idmac_sg_next(struct idmac_channel *ichan,
+	struct idmac_tx_desc **desc, struct scatterlist *sg)
+{
+	struct scatterlist *sgnew = sg ? sg_next(sg) : NULL;
+
+	if (sgnew)
+		/* next sg-element in this list */
+		return sgnew;
+
+	if ((*desc)->list.next == &ichan->queue)
+		/* No more descriptors on the queue */
+		return NULL;
+
+	/* Fetch next descriptor */
+	*desc = list_entry((*desc)->list.next, struct idmac_tx_desc, list);
+	return (*desc)->sg;
+}
+
+/*
+ * We have several possibilities here:
+ * current BUF		next BUF
+ *
+ * not last sg		next not last sg
+ * not last sg		next last sg
+ * last sg		first sg from next descriptor
+ * last sg		NULL
+ *
+ * Besides, the descriptor queue might be empty or not. We process all these
+ * cases carefully.
+ */
+static irqreturn_t idmac_interrupt(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	struct device *dev = &ichan->dma_chan.dev->device;
+	unsigned int chan_id = ichan->dma_chan.chan_id;
+	struct scatterlist **sg, *sgnext, *sgnew = NULL;
+	/* Next transfer descriptor */
+	struct idmac_tx_desc *desc, *descnew;
+	bool done = false;
+	u32 ready0, ready1, curbuf, err;
+	unsigned long flags;
+	struct dmaengine_desc_callback cb;
+
+	/* IDMAC has cleared the respective BUFx_RDY bit, we manage the buffer */
+
+	dev_dbg(dev, "IDMAC irq %d, buf %d\n", irq, ichan->active_buffer);
+
+	spin_lock_irqsave(&ipu_data.lock, flags);
+
+	ready0	= idmac_read_ipureg(&ipu_data, IPU_CHA_BUF0_RDY);
+	ready1	= idmac_read_ipureg(&ipu_data, IPU_CHA_BUF1_RDY);
+	curbuf	= idmac_read_ipureg(&ipu_data, IPU_CHA_CUR_BUF);
+	err	= idmac_read_ipureg(&ipu_data, IPU_INT_STAT_4);
+
+	if (err & (1 << chan_id)) {
+		idmac_write_ipureg(&ipu_data, 1 << chan_id, IPU_INT_STAT_4);
+		spin_unlock_irqrestore(&ipu_data.lock, flags);
+		/*
+		 * Doing this
+		 * ichan->sg[0] = ichan->sg[1] = NULL;
+		 * you can force channel re-enable on the next tx_submit(), but
+		 * this is dirty - think about descriptors with multiple
+		 * sg elements.
+		 */
+		dev_warn(dev, "NFB4EOF on channel %d, ready %x, %x, cur %x\n",
+			 chan_id, ready0, ready1, curbuf);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&ipu_data.lock, flags);
+
+	/* Other interrupts do not interfere with this channel */
+	spin_lock(&ichan->lock);
+	if (unlikely((ichan->active_buffer && (ready1 >> chan_id) & 1) ||
+		     (!ichan->active_buffer && (ready0 >> chan_id) & 1)
+		     )) {
+		spin_unlock(&ichan->lock);
+		dev_dbg(dev,
+			"IRQ with active buffer still ready on channel %x, "
+			"active %d, ready %x, %x!\n", chan_id,
+			ichan->active_buffer, ready0, ready1);
+		return IRQ_NONE;
+	}
+
+	if (unlikely(list_empty(&ichan->queue))) {
+		ichan->sg[ichan->active_buffer] = NULL;
+		spin_unlock(&ichan->lock);
+		dev_err(dev,
+			"IRQ without queued buffers on channel %x, active %d, "
+			"ready %x, %x!\n", chan_id,
+			ichan->active_buffer, ready0, ready1);
+		return IRQ_NONE;
+	}
+
+	/*
+	 * active_buffer is a software flag, it shows which buffer we are
+	 * currently expecting back from the hardware, IDMAC should be
+	 * processing the other buffer already
+	 */
+	sg = &ichan->sg[ichan->active_buffer];
+	sgnext = ichan->sg[!ichan->active_buffer];
+
+	if (!*sg) {
+		spin_unlock(&ichan->lock);
+		return IRQ_HANDLED;
+	}
+
+	desc = list_entry(ichan->queue.next, struct idmac_tx_desc, list);
+	descnew = desc;
+
+	dev_dbg(dev, "IDMAC irq %d, dma %#llx, next dma %#llx, current %d, curbuf %#x\n",
+		irq, (u64)sg_dma_address(*sg),
+		sgnext ? (u64)sg_dma_address(sgnext) : 0,
+		ichan->active_buffer, curbuf);
+
+	/* Find the descriptor of sgnext */
+	sgnew = idmac_sg_next(ichan, &descnew, *sg);
+	if (sgnext != sgnew)
+		dev_err(dev, "Submitted buffer %p, next buffer %p\n", sgnext, sgnew);
+
+	/*
+	 * if sgnext == NULL sg must be the last element in a scatterlist and
+	 * queue must be empty
+	 */
+	if (unlikely(!sgnext)) {
+		if (!WARN_ON(sg_next(*sg)))
+			dev_dbg(dev, "Underrun on channel %x\n", chan_id);
+		ichan->sg[!ichan->active_buffer] = sgnew;
+
+		if (unlikely(sgnew)) {
+			ipu_submit_buffer(ichan, descnew, sgnew, !ichan->active_buffer);
+		} else {
+			spin_lock_irqsave(&ipu_data.lock, flags);
+			ipu_ic_disable_task(&ipu_data, chan_id);
+			spin_unlock_irqrestore(&ipu_data.lock, flags);
+			ichan->status = IPU_CHANNEL_READY;
+			/* Continue to check for complete descriptor */
+		}
+	}
+
+	/* Calculate and submit the next sg element */
+	sgnew = idmac_sg_next(ichan, &descnew, sgnew);
+
+	if (unlikely(!sg_next(*sg)) || !sgnext) {
+		/*
+		 * Last element in scatterlist done, remove from the queue,
+		 * _init for debugging
+		 */
+		list_del_init(&desc->list);
+		done = true;
+	}
+
+	*sg = sgnew;
+
+	if (likely(sgnew) &&
+	    ipu_submit_buffer(ichan, descnew, sgnew, ichan->active_buffer) < 0) {
+		dmaengine_desc_get_callback(&descnew->txd, &cb);
+
+		list_del_init(&descnew->list);
+		spin_unlock(&ichan->lock);
+
+		dmaengine_desc_callback_invoke(&cb, NULL);
+		spin_lock(&ichan->lock);
+	}
+
+	/* Flip the active buffer - even if update above failed */
+	ichan->active_buffer = !ichan->active_buffer;
+	if (done)
+		dma_cookie_complete(&desc->txd);
+
+	dmaengine_desc_get_callback(&desc->txd, &cb);
+
+	spin_unlock(&ichan->lock);
+
+	if (done && (desc->txd.flags & DMA_PREP_INTERRUPT))
+		dmaengine_desc_callback_invoke(&cb, NULL);
+
+	return IRQ_HANDLED;
+}
+
+static void ipu_gc_tasklet(unsigned long arg)
+{
+	struct ipu *ipu = (struct ipu *)arg;
+	int i;
+
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+		struct idmac_tx_desc *desc;
+		unsigned long flags;
+		struct scatterlist *sg;
+		int j, k;
+
+		for (j = 0; j < ichan->n_tx_desc; j++) {
+			desc = ichan->desc + j;
+			spin_lock_irqsave(&ichan->lock, flags);
+			if (async_tx_test_ack(&desc->txd)) {
+				list_move(&desc->list, &ichan->free_list);
+				for_each_sg(desc->sg, sg, desc->sg_len, k) {
+					if (ichan->sg[0] == sg)
+						ichan->sg[0] = NULL;
+					else if (ichan->sg[1] == sg)
+						ichan->sg[1] = NULL;
+				}
+				async_tx_clear_ack(&desc->txd);
+			}
+			spin_unlock_irqrestore(&ichan->lock, flags);
+		}
+	}
+}
+
+/* Allocate and initialise a transfer descriptor. */
+static struct dma_async_tx_descriptor *idmac_prep_slave_sg(struct dma_chan *chan,
+		struct scatterlist *sgl, unsigned int sg_len,
+		enum dma_transfer_direction direction, unsigned long tx_flags,
+		void *context)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac_tx_desc *desc = NULL;
+	struct dma_async_tx_descriptor *txd = NULL;
+	unsigned long flags;
+
+	/* We only can handle these three channels so far */
+	if (chan->chan_id != IDMAC_SDC_0 && chan->chan_id != IDMAC_SDC_1 &&
+	    chan->chan_id != IDMAC_IC_7)
+		return NULL;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(chan->device->dev, "Invalid DMA direction %d!\n", direction);
+		return NULL;
+	}
+
+	mutex_lock(&ichan->chan_mutex);
+
+	spin_lock_irqsave(&ichan->lock, flags);
+	if (!list_empty(&ichan->free_list)) {
+		desc = list_entry(ichan->free_list.next,
+				  struct idmac_tx_desc, list);
+
+		list_del_init(&desc->list);
+
+		desc->sg_len	= sg_len;
+		desc->sg	= sgl;
+		txd		= &desc->txd;
+		txd->flags	= tx_flags;
+	}
+	spin_unlock_irqrestore(&ichan->lock, flags);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	tasklet_schedule(&to_ipu(to_idmac(chan->device))->tasklet);
+
+	return txd;
+}
+
+/* Re-select the current buffer and re-activate the channel */
+static void idmac_issue_pending(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	unsigned long flags;
+
+	/* This is not always needed, but doesn't hurt either */
+	spin_lock_irqsave(&ipu->lock, flags);
+	ipu_select_buffer(chan->chan_id, ichan->active_buffer);
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	/*
+	 * Might need to perform some parts of initialisation from
+	 * ipu_enable_channel(), but not all, we do not want to reset to buffer
+	 * 0, don't need to set priority again either, but re-enabling the task
+	 * and the channel might be a good idea.
+	 */
+}
+
+static int idmac_pause(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	struct list_head *list, *tmp;
+	unsigned long flags;
+
+	mutex_lock(&ichan->chan_mutex);
+
+	spin_lock_irqsave(&ipu->lock, flags);
+	ipu_ic_disable_task(ipu, chan->chan_id);
+
+	/* Return all descriptors into "prepared" state */
+	list_for_each_safe(list, tmp, &ichan->queue)
+		list_del_init(list);
+
+	ichan->sg[0] = NULL;
+	ichan->sg[1] = NULL;
+
+	spin_unlock_irqrestore(&ipu->lock, flags);
+
+	ichan->status = IPU_CHANNEL_INITIALIZED;
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	return 0;
+}
+
+static int __idmac_terminate_all(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	struct ipu *ipu = to_ipu(idmac);
+	unsigned long flags;
+	int i;
+
+	ipu_disable_channel(idmac, ichan,
+			    ichan->status >= IPU_CHANNEL_ENABLED);
+
+	tasklet_disable(&ipu->tasklet);
+
+	/* ichan->queue is modified in ISR, have to spinlock */
+	spin_lock_irqsave(&ichan->lock, flags);
+	list_splice_init(&ichan->queue, &ichan->free_list);
+
+	if (ichan->desc)
+		for (i = 0; i < ichan->n_tx_desc; i++) {
+			struct idmac_tx_desc *desc = ichan->desc + i;
+			if (list_empty(&desc->list))
+				/* Descriptor was prepared, but not submitted */
+				list_add(&desc->list, &ichan->free_list);
+
+			async_tx_clear_ack(&desc->txd);
+		}
+
+	ichan->sg[0] = NULL;
+	ichan->sg[1] = NULL;
+	spin_unlock_irqrestore(&ichan->lock, flags);
+
+	tasklet_enable(&ipu->tasklet);
+
+	ichan->status = IPU_CHANNEL_INITIALIZED;
+
+	return 0;
+}
+
+static int idmac_terminate_all(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	int ret;
+
+	mutex_lock(&ichan->chan_mutex);
+
+	ret = __idmac_terminate_all(chan);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	return ret;
+}
+
+#ifdef DEBUG
+static irqreturn_t ic_sof_irq(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	printk(KERN_DEBUG "Got SOF IRQ %d on Channel %d\n",
+	       irq, ichan->dma_chan.chan_id);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ic_eof_irq(int irq, void *dev_id)
+{
+	struct idmac_channel *ichan = dev_id;
+	printk(KERN_DEBUG "Got EOF IRQ %d on Channel %d\n",
+	       irq, ichan->dma_chan.chan_id);
+	disable_irq_nosync(irq);
+	return IRQ_HANDLED;
+}
+
+static int ic_sof = -EINVAL, ic_eof = -EINVAL;
+#endif
+
+static int idmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+	int ret;
+
+	/* dmaengine.c now guarantees to only offer free channels */
+	BUG_ON(chan->client_count > 1);
+	WARN_ON(ichan->status != IPU_CHANNEL_FREE);
+
+	dma_cookie_init(chan);
+
+	ret = ipu_irq_map(chan->chan_id);
+	if (ret < 0)
+		goto eimap;
+
+	ichan->eof_irq = ret;
+
+	/*
+	 * Important to first disable the channel, because maybe someone
+	 * used it before us, e.g., the bootloader
+	 */
+	ipu_disable_channel(idmac, ichan, true);
+
+	ret = ipu_init_channel(idmac, ichan);
+	if (ret < 0)
+		goto eichan;
+
+	ret = request_irq(ichan->eof_irq, idmac_interrupt, 0,
+			  ichan->eof_name, ichan);
+	if (ret < 0)
+		goto erirq;
+
+#ifdef DEBUG
+	if (chan->chan_id == IDMAC_IC_7) {
+		ic_sof = ipu_irq_map(69);
+		if (ic_sof > 0) {
+			ret = request_irq(ic_sof, ic_sof_irq, 0, "IC SOF", ichan);
+			if (ret)
+				dev_err(&chan->dev->device, "request irq failed for IC SOF");
+		}
+		ic_eof = ipu_irq_map(70);
+		if (ic_eof > 0) {
+			ret = request_irq(ic_eof, ic_eof_irq, 0, "IC EOF", ichan);
+			if (ret)
+				dev_err(&chan->dev->device, "request irq failed for IC EOF");
+		}
+	}
+#endif
+
+	ichan->status = IPU_CHANNEL_INITIALIZED;
+
+	dev_dbg(&chan->dev->device, "Found channel 0x%x, irq %d\n",
+		chan->chan_id, ichan->eof_irq);
+
+	return ret;
+
+erirq:
+	ipu_uninit_channel(idmac, ichan);
+eichan:
+	ipu_irq_unmap(chan->chan_id);
+eimap:
+	return ret;
+}
+
+static void idmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct idmac_channel *ichan = to_idmac_chan(chan);
+	struct idmac *idmac = to_idmac(chan->device);
+
+	mutex_lock(&ichan->chan_mutex);
+
+	__idmac_terminate_all(chan);
+
+	if (ichan->status > IPU_CHANNEL_FREE) {
+#ifdef DEBUG
+		if (chan->chan_id == IDMAC_IC_7) {
+			if (ic_sof > 0) {
+				free_irq(ic_sof, ichan);
+				ipu_irq_unmap(69);
+				ic_sof = -EINVAL;
+			}
+			if (ic_eof > 0) {
+				free_irq(ic_eof, ichan);
+				ipu_irq_unmap(70);
+				ic_eof = -EINVAL;
+			}
+		}
+#endif
+		free_irq(ichan->eof_irq, ichan);
+		ipu_irq_unmap(chan->chan_id);
+	}
+
+	ichan->status = IPU_CHANNEL_FREE;
+
+	ipu_uninit_channel(idmac, ichan);
+
+	mutex_unlock(&ichan->chan_mutex);
+
+	tasklet_schedule(&to_ipu(idmac)->tasklet);
+}
+
+static enum dma_status idmac_tx_status(struct dma_chan *chan,
+		       dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static int __init ipu_idmac_init(struct ipu *ipu)
+{
+	struct idmac *idmac = &ipu->idmac;
+	struct dma_device *dma = &idmac->dma;
+	int i;
+
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	/* Compulsory common fields */
+	dma->dev				= ipu->dev;
+	dma->device_alloc_chan_resources	= idmac_alloc_chan_resources;
+	dma->device_free_chan_resources		= idmac_free_chan_resources;
+	dma->device_tx_status			= idmac_tx_status;
+	dma->device_issue_pending		= idmac_issue_pending;
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma->device_prep_slave_sg		= idmac_prep_slave_sg;
+	dma->device_pause			= idmac_pause;
+	dma->device_terminate_all		= idmac_terminate_all;
+
+	INIT_LIST_HEAD(&dma->channels);
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+		struct dma_chan *dma_chan = &ichan->dma_chan;
+
+		spin_lock_init(&ichan->lock);
+		mutex_init(&ichan->chan_mutex);
+
+		ichan->status		= IPU_CHANNEL_FREE;
+		ichan->sec_chan_en	= false;
+		snprintf(ichan->eof_name, sizeof(ichan->eof_name), "IDMAC EOF %d", i);
+
+		dma_chan->device	= &idmac->dma;
+		dma_cookie_init(dma_chan);
+		dma_chan->chan_id	= i;
+		list_add_tail(&dma_chan->device_node, &dma->channels);
+	}
+
+	idmac_write_icreg(ipu, 0x00000070, IDMAC_CONF);
+
+	return dma_async_device_register(&idmac->dma);
+}
+
+static void ipu_idmac_exit(struct ipu *ipu)
+{
+	int i;
+	struct idmac *idmac = &ipu->idmac;
+
+	for (i = 0; i < IPU_CHANNELS_NUM; i++) {
+		struct idmac_channel *ichan = ipu->channel + i;
+
+		idmac_terminate_all(&ichan->dma_chan);
+	}
+
+	dma_async_device_unregister(&idmac->dma);
+}
+
+/*****************************************************************************
+ * IPU common probe / remove
+ */
+
+static int __init ipu_probe(struct platform_device *pdev)
+{
+	struct resource *mem_ipu, *mem_ic;
+	int ret;
+
+	spin_lock_init(&ipu_data.lock);
+
+	mem_ipu	= platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mem_ic	= platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!mem_ipu || !mem_ic)
+		return -EINVAL;
+
+	ipu_data.dev = &pdev->dev;
+
+	platform_set_drvdata(pdev, &ipu_data);
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0)
+		goto err_noirq;
+
+	ipu_data.irq_fn = ret;
+	ret = platform_get_irq(pdev, 1);
+	if (ret < 0)
+		goto err_noirq;
+
+	ipu_data.irq_err = ret;
+
+	dev_dbg(&pdev->dev, "fn irq %u, err irq %u\n",
+		ipu_data.irq_fn, ipu_data.irq_err);
+
+	/* Remap IPU common registers */
+	ipu_data.reg_ipu = ioremap(mem_ipu->start, resource_size(mem_ipu));
+	if (!ipu_data.reg_ipu) {
+		ret = -ENOMEM;
+		goto err_ioremap_ipu;
+	}
+
+	/* Remap Image Converter and Image DMA Controller registers */
+	ipu_data.reg_ic = ioremap(mem_ic->start, resource_size(mem_ic));
+	if (!ipu_data.reg_ic) {
+		ret = -ENOMEM;
+		goto err_ioremap_ic;
+	}
+
+	/* Get IPU clock */
+	ipu_data.ipu_clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(ipu_data.ipu_clk)) {
+		ret = PTR_ERR(ipu_data.ipu_clk);
+		goto err_clk_get;
+	}
+
+	/* Make sure IPU HSP clock is running */
+	clk_prepare_enable(ipu_data.ipu_clk);
+
+	/* Disable all interrupts */
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_1);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_2);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_3);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_4);
+	idmac_write_ipureg(&ipu_data, 0, IPU_INT_CTRL_5);
+
+	dev_dbg(&pdev->dev, "%s @ 0x%08lx, fn irq %u, err irq %u\n", pdev->name,
+		(unsigned long)mem_ipu->start, ipu_data.irq_fn, ipu_data.irq_err);
+
+	ret = ipu_irq_attach_irq(&ipu_data, pdev);
+	if (ret < 0)
+		goto err_attach_irq;
+
+	/* Initialize DMA engine */
+	ret = ipu_idmac_init(&ipu_data);
+	if (ret < 0)
+		goto err_idmac_init;
+
+	tasklet_init(&ipu_data.tasklet, ipu_gc_tasklet, (unsigned long)&ipu_data);
+
+	ipu_data.dev = &pdev->dev;
+
+	dev_dbg(ipu_data.dev, "IPU initialized\n");
+
+	return 0;
+
+err_idmac_init:
+err_attach_irq:
+	ipu_irq_detach_irq(&ipu_data, pdev);
+	clk_disable_unprepare(ipu_data.ipu_clk);
+	clk_put(ipu_data.ipu_clk);
+err_clk_get:
+	iounmap(ipu_data.reg_ic);
+err_ioremap_ic:
+	iounmap(ipu_data.reg_ipu);
+err_ioremap_ipu:
+err_noirq:
+	dev_err(&pdev->dev, "Failed to probe IPU: %d\n", ret);
+	return ret;
+}
+
+static int ipu_remove(struct platform_device *pdev)
+{
+	struct ipu *ipu = platform_get_drvdata(pdev);
+
+	ipu_idmac_exit(ipu);
+	ipu_irq_detach_irq(ipu, pdev);
+	clk_disable_unprepare(ipu->ipu_clk);
+	clk_put(ipu->ipu_clk);
+	iounmap(ipu->reg_ic);
+	iounmap(ipu->reg_ipu);
+	tasklet_kill(&ipu->tasklet);
+
+	return 0;
+}
+
+/*
+ * We need two MEM resources - with IPU-common and Image Converter registers,
+ * including PF_CONF and IDMAC_* registers, and two IRQs - function and error
+ */
+static struct platform_driver ipu_platform_driver = {
+	.driver = {
+		.name	= "ipu-core",
+	},
+	.remove		= ipu_remove,
+};
+
+static int __init ipu_init(void)
+{
+	return platform_driver_probe(&ipu_platform_driver, ipu_probe);
+}
+subsys_initcall(ipu_init);
+
+MODULE_DESCRIPTION("IPU core driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
+MODULE_ALIAS("platform:ipu-core");
diff --git a/drivers/dma/ipu/ipu_intern.h b/drivers/dma/ipu/ipu_intern.h
new file mode 100644
index 0000000..545cf11
--- /dev/null
+++ b/drivers/dma/ipu/ipu_intern.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _IPU_INTERN_H_
+#define _IPU_INTERN_H_
+
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+
+/* IPU Common registers */
+#define IPU_CONF		0x00
+#define IPU_CHA_BUF0_RDY	0x04
+#define IPU_CHA_BUF1_RDY	0x08
+#define IPU_CHA_DB_MODE_SEL	0x0C
+#define IPU_CHA_CUR_BUF		0x10
+#define IPU_FS_PROC_FLOW	0x14
+#define IPU_FS_DISP_FLOW	0x18
+#define IPU_TASKS_STAT		0x1C
+#define IPU_IMA_ADDR		0x20
+#define IPU_IMA_DATA		0x24
+#define IPU_INT_CTRL_1		0x28
+#define IPU_INT_CTRL_2		0x2C
+#define IPU_INT_CTRL_3		0x30
+#define IPU_INT_CTRL_4		0x34
+#define IPU_INT_CTRL_5		0x38
+#define IPU_INT_STAT_1		0x3C
+#define IPU_INT_STAT_2		0x40
+#define IPU_INT_STAT_3		0x44
+#define IPU_INT_STAT_4		0x48
+#define IPU_INT_STAT_5		0x4C
+#define IPU_BRK_CTRL_1		0x50
+#define IPU_BRK_CTRL_2		0x54
+#define IPU_BRK_STAT		0x58
+#define IPU_DIAGB_CTRL		0x5C
+
+/* IPU_CONF Register bits */
+#define IPU_CONF_CSI_EN		0x00000001
+#define IPU_CONF_IC_EN		0x00000002
+#define IPU_CONF_ROT_EN		0x00000004
+#define IPU_CONF_PF_EN		0x00000008
+#define IPU_CONF_SDC_EN		0x00000010
+#define IPU_CONF_ADC_EN		0x00000020
+#define IPU_CONF_DI_EN		0x00000040
+#define IPU_CONF_DU_EN		0x00000080
+#define IPU_CONF_PXL_ENDIAN	0x00000100
+
+/* Image Converter Registers */
+#define IC_CONF			0x88
+#define IC_PRP_ENC_RSC		0x8C
+#define IC_PRP_VF_RSC		0x90
+#define IC_PP_RSC		0x94
+#define IC_CMBP_1		0x98
+#define IC_CMBP_2		0x9C
+#define PF_CONF			0xA0
+#define IDMAC_CONF		0xA4
+#define IDMAC_CHA_EN		0xA8
+#define IDMAC_CHA_PRI		0xAC
+#define IDMAC_CHA_BUSY		0xB0
+
+/* Image Converter Register bits */
+#define IC_CONF_PRPENC_EN	0x00000001
+#define IC_CONF_PRPENC_CSC1	0x00000002
+#define IC_CONF_PRPENC_ROT_EN	0x00000004
+#define IC_CONF_PRPVF_EN	0x00000100
+#define IC_CONF_PRPVF_CSC1	0x00000200
+#define IC_CONF_PRPVF_CSC2	0x00000400
+#define IC_CONF_PRPVF_CMB	0x00000800
+#define IC_CONF_PRPVF_ROT_EN	0x00001000
+#define IC_CONF_PP_EN		0x00010000
+#define IC_CONF_PP_CSC1		0x00020000
+#define IC_CONF_PP_CSC2		0x00040000
+#define IC_CONF_PP_CMB		0x00080000
+#define IC_CONF_PP_ROT_EN	0x00100000
+#define IC_CONF_IC_GLB_LOC_A	0x10000000
+#define IC_CONF_KEY_COLOR_EN	0x20000000
+#define IC_CONF_RWS_EN		0x40000000
+#define IC_CONF_CSI_MEM_WR_EN	0x80000000
+
+#define IDMA_CHAN_INVALID	0x000000FF
+#define IDMA_IC_0		0x00000001
+#define IDMA_IC_1		0x00000002
+#define IDMA_IC_2		0x00000004
+#define IDMA_IC_3		0x00000008
+#define IDMA_IC_4		0x00000010
+#define IDMA_IC_5		0x00000020
+#define IDMA_IC_6		0x00000040
+#define IDMA_IC_7		0x00000080
+#define IDMA_IC_8		0x00000100
+#define IDMA_IC_9		0x00000200
+#define IDMA_IC_10		0x00000400
+#define IDMA_IC_11		0x00000800
+#define IDMA_IC_12		0x00001000
+#define IDMA_IC_13		0x00002000
+#define IDMA_SDC_BG		0x00004000
+#define IDMA_SDC_FG		0x00008000
+#define IDMA_SDC_MASK		0x00010000
+#define IDMA_SDC_PARTIAL	0x00020000
+#define IDMA_ADC_SYS1_WR	0x00040000
+#define IDMA_ADC_SYS2_WR	0x00080000
+#define IDMA_ADC_SYS1_CMD	0x00100000
+#define IDMA_ADC_SYS2_CMD	0x00200000
+#define IDMA_ADC_SYS1_RD	0x00400000
+#define IDMA_ADC_SYS2_RD	0x00800000
+#define IDMA_PF_QP		0x01000000
+#define IDMA_PF_BSP		0x02000000
+#define IDMA_PF_Y_IN		0x04000000
+#define IDMA_PF_U_IN		0x08000000
+#define IDMA_PF_V_IN		0x10000000
+#define IDMA_PF_Y_OUT		0x20000000
+#define IDMA_PF_U_OUT		0x40000000
+#define IDMA_PF_V_OUT		0x80000000
+
+#define TSTAT_PF_H264_PAUSE	0x00000001
+#define TSTAT_CSI2MEM_MASK	0x0000000C
+#define TSTAT_CSI2MEM_OFFSET	2
+#define TSTAT_VF_MASK		0x00000600
+#define TSTAT_VF_OFFSET		9
+#define TSTAT_VF_ROT_MASK	0x000C0000
+#define TSTAT_VF_ROT_OFFSET	18
+#define TSTAT_ENC_MASK		0x00000180
+#define TSTAT_ENC_OFFSET	7
+#define TSTAT_ENC_ROT_MASK	0x00030000
+#define TSTAT_ENC_ROT_OFFSET	16
+#define TSTAT_PP_MASK		0x00001800
+#define TSTAT_PP_OFFSET		11
+#define TSTAT_PP_ROT_MASK	0x00300000
+#define TSTAT_PP_ROT_OFFSET	20
+#define TSTAT_PF_MASK		0x00C00000
+#define TSTAT_PF_OFFSET		22
+#define TSTAT_ADCSYS1_MASK	0x03000000
+#define TSTAT_ADCSYS1_OFFSET	24
+#define TSTAT_ADCSYS2_MASK	0x0C000000
+#define TSTAT_ADCSYS2_OFFSET	26
+
+#define TASK_STAT_IDLE		0
+#define TASK_STAT_ACTIVE	1
+#define TASK_STAT_WAIT4READY	2
+
+struct idmac {
+	struct dma_device	dma;
+};
+
+struct ipu {
+	void __iomem		*reg_ipu;
+	void __iomem		*reg_ic;
+	unsigned int		irq_fn;		/* IPU Function IRQ to the CPU */
+	unsigned int		irq_err;	/* IPU Error IRQ to the CPU */
+	unsigned int		irq_base;	/* Beginning of the IPU IRQ range */
+	unsigned long		channel_init_mask;
+	spinlock_t		lock;
+	struct clk		*ipu_clk;
+	struct device		*dev;
+	struct idmac		idmac;
+	struct idmac_channel	channel[IPU_CHANNELS_NUM];
+	struct tasklet_struct	tasklet;
+};
+
+#define to_idmac(d) container_of(d, struct idmac, dma)
+
+extern int ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev);
+extern void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev);
+
+extern bool ipu_irq_status(uint32_t irq);
+extern int ipu_irq_map(unsigned int source);
+extern int ipu_irq_unmap(unsigned int source);
+
+#endif
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
new file mode 100644
index 0000000..2846278
--- /dev/null
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2008
+ * Guennadi Liakhovetski, DENX Software Engineering, <lg@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma/ipu-dma.h>
+
+#include "ipu_intern.h"
+
+/*
+ * Register read / write - shall be inlined by the compiler
+ */
+static u32 ipu_read_reg(struct ipu *ipu, unsigned long reg)
+{
+	return __raw_readl(ipu->reg_ipu + reg);
+}
+
+static void ipu_write_reg(struct ipu *ipu, u32 value, unsigned long reg)
+{
+	__raw_writel(value, ipu->reg_ipu + reg);
+}
+
+
+/*
+ * IPU IRQ chip driver
+ */
+
+#define IPU_IRQ_NR_FN_BANKS 3
+#define IPU_IRQ_NR_ERR_BANKS 2
+#define IPU_IRQ_NR_BANKS (IPU_IRQ_NR_FN_BANKS + IPU_IRQ_NR_ERR_BANKS)
+
+struct ipu_irq_bank {
+	unsigned int	control;
+	unsigned int	status;
+	struct ipu	*ipu;
+};
+
+static struct ipu_irq_bank irq_bank[IPU_IRQ_NR_BANKS] = {
+	/* 3 groups of functional interrupts */
+	{
+		.control	= IPU_INT_CTRL_1,
+		.status		= IPU_INT_STAT_1,
+	}, {
+		.control	= IPU_INT_CTRL_2,
+		.status		= IPU_INT_STAT_2,
+	}, {
+		.control	= IPU_INT_CTRL_3,
+		.status		= IPU_INT_STAT_3,
+	},
+	/* 2 groups of error interrupts */
+	{
+		.control	= IPU_INT_CTRL_4,
+		.status		= IPU_INT_STAT_4,
+	}, {
+		.control	= IPU_INT_CTRL_5,
+		.status		= IPU_INT_STAT_5,
+	},
+};
+
+struct ipu_irq_map {
+	unsigned int		irq;
+	int			source;
+	struct ipu_irq_bank	*bank;
+	struct ipu		*ipu;
+};
+
+static struct ipu_irq_map irq_map[CONFIG_MX3_IPU_IRQS];
+/* Protects allocations from the above array of maps */
+static DEFINE_MUTEX(map_lock);
+/* Protects register accesses and individual mappings */
+static DEFINE_RAW_SPINLOCK(bank_lock);
+
+static struct ipu_irq_map *src2map(unsigned int src)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++)
+		if (irq_map[i].source == src)
+			return irq_map + i;
+
+	return NULL;
+}
+
+static void ipu_irq_unmask(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	uint32_t reg;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	reg = ipu_read_reg(bank->ipu, bank->control);
+	reg |= (1UL << (map->source & 31));
+	ipu_write_reg(bank->ipu, reg, bank->control);
+
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_mask(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	uint32_t reg;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	reg = ipu_read_reg(bank->ipu, bank->control);
+	reg &= ~(1UL << (map->source & 31));
+	ipu_write_reg(bank->ipu, reg, bank->control);
+
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+static void ipu_irq_ack(struct irq_data *d)
+{
+	struct ipu_irq_map *map = irq_data_get_irq_chip_data(d);
+	struct ipu_irq_bank *bank;
+	unsigned long lock_flags;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+
+	bank = map->bank;
+	if (!bank) {
+		raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+		pr_err("IPU: %s(%u) - unmapped!\n", __func__, d->irq);
+		return;
+	}
+
+	ipu_write_reg(bank->ipu, 1UL << (map->source & 31), bank->status);
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+}
+
+/**
+ * ipu_irq_status() - returns the current interrupt status of the specified IRQ.
+ * @irq:	interrupt line to get status for.
+ * @return:	true if the interrupt is pending/asserted or false if the
+ *		interrupt is not pending.
+ */
+bool ipu_irq_status(unsigned int irq)
+{
+	struct ipu_irq_map *map = irq_get_chip_data(irq);
+	struct ipu_irq_bank *bank;
+	unsigned long lock_flags;
+	bool ret;
+
+	raw_spin_lock_irqsave(&bank_lock, lock_flags);
+	bank = map->bank;
+	ret = bank && ipu_read_reg(bank->ipu, bank->status) &
+		(1UL << (map->source & 31));
+	raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+	return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source:	interrupt source bit position (see below)
+ * @return:	mapped IRQ number or negative error code
+ *
+ * The source parameter has to be explained further. On i.MX31 IPU has 137 IRQ
+ * sources, they are broken down in 5 32-bit registers, like 32, 32, 24, 32, 17.
+ * However, the source argument of this function is not the sequence number of
+ * the possible IRQ, but rather its bit position. So, first interrupt in fourth
+ * register has source number 96, and not 88. This makes calculations easier,
+ * and also provides forward compatibility with any future IPU implementations
+ * with any interrupt bit assignments.
+ */
+int ipu_irq_map(unsigned int source)
+{
+	int i, ret = -ENOMEM;
+	struct ipu_irq_map *map;
+
+	might_sleep();
+
+	mutex_lock(&map_lock);
+	map = src2map(source);
+	if (map) {
+		pr_err("IPU: Source %u already mapped to IRQ %u\n", source, map->irq);
+		ret = -EBUSY;
+		goto out;
+	}
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		if (irq_map[i].source < 0) {
+			unsigned long lock_flags;
+
+			raw_spin_lock_irqsave(&bank_lock, lock_flags);
+			irq_map[i].source = source;
+			irq_map[i].bank = irq_bank + source / 32;
+			raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+			ret = irq_map[i].irq;
+			pr_debug("IPU: mapped source %u to IRQ %u\n",
+				 source, ret);
+			break;
+		}
+	}
+out:
+	mutex_unlock(&map_lock);
+
+	if (ret < 0)
+		pr_err("IPU: couldn't map source %u: %d\n", source, ret);
+
+	return ret;
+}
+
+/**
+ * ipu_irq_map() - map an IPU interrupt source to an IRQ number
+ * @source:	interrupt source bit position (see ipu_irq_map())
+ * @return:	0 or negative error code
+ */
+int ipu_irq_unmap(unsigned int source)
+{
+	int i, ret = -EINVAL;
+
+	might_sleep();
+
+	mutex_lock(&map_lock);
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		if (irq_map[i].source == source) {
+			unsigned long lock_flags;
+
+			pr_debug("IPU: unmapped source %u from IRQ %u\n",
+				 source, irq_map[i].irq);
+
+			raw_spin_lock_irqsave(&bank_lock, lock_flags);
+			irq_map[i].source = -EINVAL;
+			irq_map[i].bank = NULL;
+			raw_spin_unlock_irqrestore(&bank_lock, lock_flags);
+
+			ret = 0;
+			break;
+		}
+	}
+	mutex_unlock(&map_lock);
+
+	return ret;
+}
+
+/* Chained IRQ handler for IPU function and error interrupt */
+static void ipu_irq_handler(struct irq_desc *desc)
+{
+	struct ipu *ipu = irq_desc_get_handler_data(desc);
+	u32 status;
+	int i, line;
+
+	for (i = 0; i < IPU_IRQ_NR_BANKS; i++) {
+		struct ipu_irq_bank *bank = irq_bank + i;
+
+		raw_spin_lock(&bank_lock);
+		status = ipu_read_reg(ipu, bank->status);
+		/*
+		 * Don't think we have to clear all interrupts here, they will
+		 * be acked by ->handle_irq() (handle_level_irq). However, we
+		 * might want to clear unhandled interrupts after the loop...
+		 */
+		status &= ipu_read_reg(ipu, bank->control);
+		raw_spin_unlock(&bank_lock);
+		while ((line = ffs(status))) {
+			struct ipu_irq_map *map;
+			unsigned int irq;
+
+			line--;
+			status &= ~(1UL << line);
+
+			raw_spin_lock(&bank_lock);
+			map = src2map(32 * i + line);
+			if (!map) {
+				raw_spin_unlock(&bank_lock);
+				pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
+				       line, i);
+				continue;
+			}
+			irq = map->irq;
+			raw_spin_unlock(&bank_lock);
+			generic_handle_irq(irq);
+		}
+	}
+}
+
+static struct irq_chip ipu_irq_chip = {
+	.name		= "ipu_irq",
+	.irq_ack	= ipu_irq_ack,
+	.irq_mask	= ipu_irq_mask,
+	.irq_unmask	= ipu_irq_unmask,
+};
+
+/* Install the IRQ handler */
+int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+	unsigned int irq, i;
+	int irq_base = irq_alloc_descs(-1, 0, CONFIG_MX3_IPU_IRQS,
+				       numa_node_id());
+
+	if (irq_base < 0)
+		return irq_base;
+
+	for (i = 0; i < IPU_IRQ_NR_BANKS; i++)
+		irq_bank[i].ipu = ipu;
+
+	for (i = 0; i < CONFIG_MX3_IPU_IRQS; i++) {
+		int ret;
+
+		irq = irq_base + i;
+		ret = irq_set_chip(irq, &ipu_irq_chip);
+		if (ret < 0)
+			return ret;
+		ret = irq_set_chip_data(irq, irq_map + i);
+		if (ret < 0)
+			return ret;
+		irq_map[i].ipu = ipu;
+		irq_map[i].irq = irq;
+		irq_map[i].source = -EINVAL;
+		irq_set_handler(irq, handle_level_irq);
+		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
+	}
+
+	irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu);
+
+	irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu);
+
+	ipu->irq_base = irq_base;
+
+	return 0;
+}
+
+void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev)
+{
+	unsigned int irq, irq_base;
+
+	irq_base = ipu->irq_base;
+
+	irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL);
+
+	irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL);
+
+	for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) {
+		irq_set_status_flags(irq, IRQ_NOREQUEST);
+		irq_set_chip(irq, NULL);
+		irq_set_chip_data(irq, NULL);
+	}
+}
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
new file mode 100644
index 0000000..6bfa217
--- /dev/null
+++ b/drivers/dma/k3dma.c
@@ -0,0 +1,996 @@
+/*
+ * Copyright (c) 2013 - 2015 Linaro Ltd.
+ * Copyright (c) 2013 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME		"k3-dma"
+#define DMA_MAX_SIZE		0x1ffc
+#define DMA_CYCLIC_MAX_PERIOD	0x1000
+#define LLI_BLOCK_SIZE		(4 * PAGE_SIZE)
+
+#define INT_STAT		0x00
+#define INT_TC1			0x04
+#define INT_TC2			0x08
+#define INT_ERR1		0x0c
+#define INT_ERR2		0x10
+#define INT_TC1_MASK		0x18
+#define INT_TC2_MASK		0x1c
+#define INT_ERR1_MASK		0x20
+#define INT_ERR2_MASK		0x24
+#define INT_TC1_RAW		0x600
+#define INT_TC2_RAW		0x608
+#define INT_ERR1_RAW		0x610
+#define INT_ERR2_RAW		0x618
+#define CH_PRI			0x688
+#define CH_STAT			0x690
+#define CX_CUR_CNT		0x704
+#define CX_LLI			0x800
+#define CX_CNT1			0x80c
+#define CX_CNT0			0x810
+#define CX_SRC			0x814
+#define CX_DST			0x818
+#define CX_CFG			0x81c
+#define AXI_CFG			0x820
+#define AXI_CFG_DEFAULT		0x201201
+
+#define CX_LLI_CHAIN_EN		0x2
+#define CX_CFG_EN		0x1
+#define CX_CFG_NODEIRQ		BIT(1)
+#define CX_CFG_MEM2PER		(0x1 << 2)
+#define CX_CFG_PER2MEM		(0x2 << 2)
+#define CX_CFG_SRCINCR		(0x1 << 31)
+#define CX_CFG_DSTINCR		(0x1 << 30)
+
+struct k3_desc_hw {
+	u32 lli;
+	u32 reserved[3];
+	u32 count;
+	u32 saddr;
+	u32 daddr;
+	u32 config;
+} __aligned(32);
+
+struct k3_dma_desc_sw {
+	struct virt_dma_desc	vd;
+	dma_addr_t		desc_hw_lli;
+	size_t			desc_num;
+	size_t			size;
+	struct k3_desc_hw	*desc_hw;
+};
+
+struct k3_dma_phy;
+
+struct k3_dma_chan {
+	u32			ccfg;
+	struct virt_dma_chan	vc;
+	struct k3_dma_phy	*phy;
+	struct list_head	node;
+	enum dma_transfer_direction dir;
+	dma_addr_t		dev_addr;
+	enum dma_status		status;
+	bool			cyclic;
+};
+
+struct k3_dma_phy {
+	u32			idx;
+	void __iomem		*base;
+	struct k3_dma_chan	*vchan;
+	struct k3_dma_desc_sw	*ds_run;
+	struct k3_dma_desc_sw	*ds_done;
+};
+
+struct k3_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	struct tasklet_struct	task;
+	spinlock_t		lock;
+	struct list_head	chan_pending;
+	struct k3_dma_phy	*phy;
+	struct k3_dma_chan	*chans;
+	struct clk		*clk;
+	struct dma_pool		*pool;
+	u32			dma_channels;
+	u32			dma_requests;
+	unsigned int		irq;
+};
+
+#define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave)
+
+static struct k3_dma_chan *to_k3_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct k3_dma_chan, vc.chan);
+}
+
+static void k3_dma_pause_dma(struct k3_dma_phy *phy, bool on)
+{
+	u32 val = 0;
+
+	if (on) {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val |= CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	} else {
+		val = readl_relaxed(phy->base + CX_CFG);
+		val &= ~CX_CFG_EN;
+		writel_relaxed(val, phy->base + CX_CFG);
+	}
+}
+
+static void k3_dma_terminate_chan(struct k3_dma_phy *phy, struct k3_dma_dev *d)
+{
+	u32 val = 0;
+
+	k3_dma_pause_dma(phy, false);
+
+	val = 0x1 << phy->idx;
+	writel_relaxed(val, d->base + INT_TC1_RAW);
+	writel_relaxed(val, d->base + INT_TC2_RAW);
+	writel_relaxed(val, d->base + INT_ERR1_RAW);
+	writel_relaxed(val, d->base + INT_ERR2_RAW);
+}
+
+static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
+{
+	writel_relaxed(hw->lli, phy->base + CX_LLI);
+	writel_relaxed(hw->count, phy->base + CX_CNT0);
+	writel_relaxed(hw->saddr, phy->base + CX_SRC);
+	writel_relaxed(hw->daddr, phy->base + CX_DST);
+	writel_relaxed(AXI_CFG_DEFAULT, phy->base + AXI_CFG);
+	writel_relaxed(hw->config, phy->base + CX_CFG);
+}
+
+static u32 k3_dma_get_curr_cnt(struct k3_dma_dev *d, struct k3_dma_phy *phy)
+{
+	u32 cnt = 0;
+
+	cnt = readl_relaxed(d->base + CX_CUR_CNT + phy->idx * 0x10);
+	cnt &= 0xffff;
+	return cnt;
+}
+
+static u32 k3_dma_get_curr_lli(struct k3_dma_phy *phy)
+{
+	return readl_relaxed(phy->base + CX_LLI);
+}
+
+static u32 k3_dma_get_chan_stat(struct k3_dma_dev *d)
+{
+	return readl_relaxed(d->base + CH_STAT);
+}
+
+static void k3_dma_enable_dma(struct k3_dma_dev *d, bool on)
+{
+	if (on) {
+		/* set same priority */
+		writel_relaxed(0x0, d->base + CH_PRI);
+
+		/* unmask irq */
+		writel_relaxed(0xffff, d->base + INT_TC1_MASK);
+		writel_relaxed(0xffff, d->base + INT_TC2_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR1_MASK);
+		writel_relaxed(0xffff, d->base + INT_ERR2_MASK);
+	} else {
+		/* mask irq */
+		writel_relaxed(0x0, d->base + INT_TC1_MASK);
+		writel_relaxed(0x0, d->base + INT_TC2_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR1_MASK);
+		writel_relaxed(0x0, d->base + INT_ERR2_MASK);
+	}
+}
+
+static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)dev_id;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c;
+	u32 stat = readl_relaxed(d->base + INT_STAT);
+	u32 tc1  = readl_relaxed(d->base + INT_TC1);
+	u32 tc2  = readl_relaxed(d->base + INT_TC2);
+	u32 err1 = readl_relaxed(d->base + INT_ERR1);
+	u32 err2 = readl_relaxed(d->base + INT_ERR2);
+	u32 i, irq_chan = 0;
+
+	while (stat) {
+		i = __ffs(stat);
+		stat &= ~BIT(i);
+		if (likely(tc1 & BIT(i)) || (tc2 & BIT(i))) {
+			unsigned long flags;
+
+			p = &d->phy[i];
+			c = p->vchan;
+			if (c && (tc1 & BIT(i))) {
+				spin_lock_irqsave(&c->vc.lock, flags);
+				vchan_cookie_complete(&p->ds_run->vd);
+				p->ds_done = p->ds_run;
+				p->ds_run = NULL;
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+			if (c && (tc2 & BIT(i))) {
+				spin_lock_irqsave(&c->vc.lock, flags);
+				if (p->ds_run != NULL)
+					vchan_cyclic_callback(&p->ds_run->vd);
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+			irq_chan |= BIT(i);
+		}
+		if (unlikely((err1 & BIT(i)) || (err2 & BIT(i))))
+			dev_warn(d->slave.dev, "DMA ERR\n");
+	}
+
+	writel_relaxed(irq_chan, d->base + INT_TC1_RAW);
+	writel_relaxed(irq_chan, d->base + INT_TC2_RAW);
+	writel_relaxed(err1, d->base + INT_ERR1_RAW);
+	writel_relaxed(err2, d->base + INT_ERR2_RAW);
+
+	if (irq_chan)
+		tasklet_schedule(&d->task);
+
+	if (irq_chan || err1 || err2)
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+static int k3_dma_start_txd(struct k3_dma_chan *c)
+{
+	struct k3_dma_dev *d = to_k3_dma(c->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	if (!c->phy)
+		return -EAGAIN;
+
+	if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
+		return -EAGAIN;
+
+	if (vd) {
+		struct k3_dma_desc_sw *ds =
+			container_of(vd, struct k3_dma_desc_sw, vd);
+		/*
+		 * fetch and remove request from vc->desc_issued
+		 * so vc->desc_issued only contains desc pending
+		 */
+		list_del(&ds->vd.node);
+
+		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
+		/* start dma */
+		k3_dma_set_desc(c->phy, &ds->desc_hw[0]);
+		return 0;
+	}
+	c->phy->ds_run = NULL;
+	c->phy->ds_done = NULL;
+	return -EAGAIN;
+}
+
+static void k3_dma_tasklet(unsigned long arg)
+{
+	struct k3_dma_dev *d = (struct k3_dma_dev *)arg;
+	struct k3_dma_phy *p;
+	struct k3_dma_chan *c, *cn;
+	unsigned pch, pch_alloc = 0;
+
+	/* check new dma request of running channel in vc->desc_issued */
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&c->vc.lock);
+		p = c->phy;
+		if (p && p->ds_done) {
+			if (k3_dma_start_txd(c)) {
+				/* No current txd associated with this channel */
+				dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+				/* Mark this channel free */
+				c->phy = NULL;
+				p->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&c->vc.lock);
+	}
+
+	/* check new channel request in d->chan_pending */
+	spin_lock_irq(&d->lock);
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		p = &d->phy[pch];
+
+		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+			c = list_first_entry(&d->chan_pending,
+				struct k3_dma_chan, node);
+			/* remove from d->chan_pending */
+			list_del_init(&c->node);
+			pch_alloc |= 1 << pch;
+			/* Mark this channel allocated */
+			p->vchan = c;
+			c->phy = p;
+			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+		}
+	}
+	spin_unlock_irq(&d->lock);
+
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+			if (c) {
+				spin_lock_irq(&c->vc.lock);
+				k3_dma_start_txd(c);
+				spin_unlock_irq(&c->vc.lock);
+			}
+		}
+	}
+}
+
+static void k3_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+	c->ccfg = 0;
+}
+
+static enum dma_status k3_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+	ret = c->status;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd && !c->cyclic) {
+		bytes = container_of(vd, struct k3_dma_desc_sw, vd)->size;
+	} else if ((!p) || (!p->ds_run)) {
+		bytes = 0;
+	} else {
+		struct k3_dma_desc_sw *ds = p->ds_run;
+		u32 clli = 0, index = 0;
+
+		bytes = k3_dma_get_curr_cnt(d, p);
+		clli = k3_dma_get_curr_lli(p);
+		index = ((clli - ds->desc_hw_lli) /
+				sizeof(struct k3_desc_hw)) + 1;
+		for (; index < ds->desc_num; index++) {
+			bytes += ds->desc_hw[index].count;
+			/* end of lli */
+			if (!ds->desc_hw[index].lli)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	dma_set_residue(state, bytes);
+	return ret;
+}
+
+static void k3_dma_issue_pending(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	/* add request to vc->desc_issued */
+	if (vchan_issue_pending(&c->vc)) {
+		spin_lock(&d->lock);
+		if (!c->phy) {
+			if (list_empty(&c->node)) {
+				/* if new channel, add chan_pending */
+				list_add_tail(&c->node, &d->chan_pending);
+				/* check in tasklet */
+				tasklet_schedule(&d->task);
+				dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+			}
+		}
+		spin_unlock(&d->lock);
+	} else
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static void k3_dma_fill_desc(struct k3_dma_desc_sw *ds, dma_addr_t dst,
+			dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+	if (num != ds->desc_num - 1)
+		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+			sizeof(struct k3_desc_hw);
+
+	ds->desc_hw[num].lli |= CX_LLI_CHAIN_EN;
+	ds->desc_hw[num].count = len;
+	ds->desc_hw[num].saddr = src;
+	ds->desc_hw[num].daddr = dst;
+	ds->desc_hw[num].config = ccfg;
+}
+
+static struct k3_dma_desc_sw *k3_dma_alloc_desc_resource(int num,
+							struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	int lli_limit = LLI_BLOCK_SIZE / sizeof(struct k3_desc_hw);
+
+	if (num > lli_limit) {
+		dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
+			&c->vc, num, lli_limit);
+		return NULL;
+	}
+
+	ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+	if (!ds)
+		return NULL;
+
+	ds->desc_hw = dma_pool_zalloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
+	if (!ds->desc_hw) {
+		dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
+		kfree(ds);
+		return NULL;
+	}
+	ds->desc_num = num;
+	return ds;
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t copy = 0;
+	int num = 0;
+
+	if (!len)
+		return NULL;
+
+	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+
+	ds = k3_dma_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	c->cyclic = 0;
+	ds->size = len;
+	num = 0;
+
+	if (!c->ccfg) {
+		/* default is memtomem, without calling device_config */
+		c->ccfg = CX_CFG_SRCINCR | CX_CFG_DSTINCR | CX_CFG_EN;
+		c->ccfg |= (0xf << 20) | (0xf << 24);	/* burst = 16 */
+		c->ccfg |= (0x3 << 12) | (0x3 << 16);	/* width = 64 bit */
+	}
+
+	do {
+		copy = min_t(size_t, len, DMA_MAX_SIZE);
+		k3_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+		if (c->dir == DMA_MEM_TO_DEV) {
+			src += copy;
+		} else if (c->dir == DMA_DEV_TO_MEM) {
+			dst += copy;
+		} else {
+			src += copy;
+			dst += copy;
+		}
+		len -= copy;
+	} while (len);
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *k3_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	struct scatterlist *sg;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = sglen, i;
+
+	if (sgl == NULL)
+		return NULL;
+
+	c->cyclic = 0;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		avail = sg_dma_len(sg);
+		if (avail > DMA_MAX_SIZE)
+			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+	}
+
+	ds = k3_dma_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+	num = 0;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		total += avail;
+
+		do {
+			len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = addr;
+				dst = c->dev_addr;
+			} else if (dir == DMA_DEV_TO_MEM) {
+				src = c->dev_addr;
+				dst = addr;
+			}
+
+			k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	ds->desc_hw[num-1].lli = 0;	/* end of link */
+	ds->size = total;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+k3_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+		       size_t buf_len, size_t period_len,
+		       enum dma_transfer_direction dir,
+		       unsigned long flags)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = 1, since = 0;
+	size_t modulo = DMA_CYCLIC_MAX_PERIOD;
+	u32 en_tc2 = 0;
+
+	dev_dbg(chan->device->dev, "%s: buf %pad, dst %pad, buf len %zu, period_len = %zu, dir %d\n",
+	       __func__, &buf_addr, &to_k3_chan(chan)->dev_addr,
+	       buf_len, period_len, (int)dir);
+
+	avail = buf_len;
+	if (avail > modulo)
+		num += DIV_ROUND_UP(avail, modulo) - 1;
+
+	ds = k3_dma_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	c->cyclic = 1;
+	addr = buf_addr;
+	avail = buf_len;
+	total = avail;
+	num = 0;
+
+	if (period_len < modulo)
+		modulo = period_len;
+
+	do {
+		len = min_t(size_t, avail, modulo);
+
+		if (dir == DMA_MEM_TO_DEV) {
+			src = addr;
+			dst = c->dev_addr;
+		} else if (dir == DMA_DEV_TO_MEM) {
+			src = c->dev_addr;
+			dst = addr;
+		}
+		since += len;
+		if (since >= period_len) {
+			/* descriptor asks for TC2 interrupt on completion */
+			en_tc2 = CX_CFG_NODEIRQ;
+			since -= period_len;
+		} else
+			en_tc2 = 0;
+
+		k3_dma_fill_desc(ds, dst, src, len, num++, c->ccfg | en_tc2);
+
+		addr += len;
+		avail -= len;
+	} while (avail);
+
+	/* "Cyclic" == end of link points back to start of link */
+	ds->desc_hw[num - 1].lli |= ds->desc_hw_lli;
+
+	ds->size = total;
+
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int k3_dma_config(struct dma_chan *chan,
+			 struct dma_slave_config *cfg)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	u32 maxburst = 0, val = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	if (cfg == NULL)
+		return -EINVAL;
+	c->dir = cfg->direction;
+	if (c->dir == DMA_DEV_TO_MEM) {
+		c->ccfg = CX_CFG_DSTINCR;
+		c->dev_addr = cfg->src_addr;
+		maxburst = cfg->src_maxburst;
+		width = cfg->src_addr_width;
+	} else if (c->dir == DMA_MEM_TO_DEV) {
+		c->ccfg = CX_CFG_SRCINCR;
+		c->dev_addr = cfg->dst_addr;
+		maxburst = cfg->dst_maxburst;
+		width = cfg->dst_addr_width;
+	}
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		val =  __ffs(width);
+		break;
+	default:
+		val = 3;
+		break;
+	}
+	c->ccfg |= (val << 12) | (val << 16);
+
+	if ((maxburst == 0) || (maxburst > 16))
+		val = 15;
+	else
+		val = maxburst - 1;
+	c->ccfg |= (val << 20) | (val << 24);
+	c->ccfg |= CX_CFG_MEM2PER | CX_CFG_EN;
+
+	/* specific request line */
+	c->ccfg |= c->vc.chan.chan_id << 4;
+
+	return 0;
+}
+
+static void k3_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct k3_dma_desc_sw *ds =
+		container_of(vd, struct k3_dma_desc_sw, vd);
+	struct k3_dma_dev *d = to_k3_dma(vd->tx.chan->device);
+
+	dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
+	kfree(ds);
+}
+
+static int k3_dma_terminate_all(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p = c->phy;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+	/* Prevent this channel being scheduled */
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	/* Clear the tx descriptor lists */
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vchan_get_all_descriptors(&c->vc, &head);
+	if (p) {
+		/* vchan is assigned to a pchan - stop the channel */
+		k3_dma_terminate_chan(p, d);
+		c->phy = NULL;
+		p->vchan = NULL;
+		if (p->ds_run) {
+			vchan_terminate_vdesc(&p->ds_run->vd);
+			p->ds_run = NULL;
+		}
+		p->ds_done = NULL;
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static void k3_dma_synchronize(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+
+	vchan_synchronize(&c->vc);
+}
+
+static int k3_dma_transfer_pause(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p = c->phy;
+
+	dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+	if (c->status == DMA_IN_PROGRESS) {
+		c->status = DMA_PAUSED;
+		if (p) {
+			k3_dma_pause_dma(p, false);
+		} else {
+			spin_lock(&d->lock);
+			list_del_init(&c->node);
+			spin_unlock(&d->lock);
+		}
+	}
+
+	return 0;
+}
+
+static int k3_dma_transfer_resume(struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	struct k3_dma_phy *p = c->phy;
+	unsigned long flags;
+
+	dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (c->status == DMA_PAUSED) {
+		c->status = DMA_IN_PROGRESS;
+		if (p) {
+			k3_dma_pause_dma(p, true);
+		} else if (!list_empty(&c->vc.desc_issued)) {
+			spin_lock(&d->lock);
+			list_add_tail(&c->node, &d->chan_pending);
+			spin_unlock(&d->lock);
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return 0;
+}
+
+static const struct of_device_id k3_pdma_dt_ids[] = {
+	{ .compatible = "hisilicon,k3-dma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
+
+static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct k3_dma_dev *d = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+
+	if (request >= d->dma_requests)
+		return NULL;
+
+	return dma_get_slave_channel(&(d->chans[request].vc.chan));
+}
+
+static int k3_dma_probe(struct platform_device *op)
+{
+	struct k3_dma_dev *d;
+	const struct of_device_id *of_id;
+	struct resource *iores;
+	int i, ret, irq = 0;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(d->base))
+		return PTR_ERR(d->base);
+
+	of_id = of_match_device(k3_pdma_dt_ids, &op->dev);
+	if (of_id) {
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-channels", &d->dma_channels);
+		of_property_read_u32((&op->dev)->of_node,
+				"dma-requests", &d->dma_requests);
+	}
+
+	d->clk = devm_clk_get(&op->dev, NULL);
+	if (IS_ERR(d->clk)) {
+		dev_err(&op->dev, "no dma clk\n");
+		return PTR_ERR(d->clk);
+	}
+
+	irq = platform_get_irq(op, 0);
+	ret = devm_request_irq(&op->dev, irq,
+			k3_dma_int_handler, 0, DRIVER_NAME, d);
+	if (ret)
+		return ret;
+
+	d->irq = irq;
+
+	/* A DMA memory pool for LLIs, align on 32-byte boundary */
+	d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
+					LLI_BLOCK_SIZE, 32, 0);
+	if (!d->pool)
+		return -ENOMEM;
+
+	/* init phy channel */
+	d->phy = devm_kcalloc(&op->dev,
+		d->dma_channels, sizeof(struct k3_dma_phy), GFP_KERNEL);
+	if (d->phy == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_channels; i++) {
+		struct k3_dma_phy *p = &d->phy[i];
+
+		p->idx = i;
+		p->base = d->base + i * 0x40;
+	}
+
+	INIT_LIST_HEAD(&d->slave.channels);
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
+	d->slave.dev = &op->dev;
+	d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
+	d->slave.device_tx_status = k3_dma_tx_status;
+	d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
+	d->slave.device_prep_slave_sg = k3_dma_prep_slave_sg;
+	d->slave.device_prep_dma_cyclic = k3_dma_prep_dma_cyclic;
+	d->slave.device_issue_pending = k3_dma_issue_pending;
+	d->slave.device_config = k3_dma_config;
+	d->slave.device_pause = k3_dma_transfer_pause;
+	d->slave.device_resume = k3_dma_transfer_resume;
+	d->slave.device_terminate_all = k3_dma_terminate_all;
+	d->slave.device_synchronize = k3_dma_synchronize;
+	d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES;
+
+	/* init virtual channel */
+	d->chans = devm_kcalloc(&op->dev,
+		d->dma_requests, sizeof(struct k3_dma_chan), GFP_KERNEL);
+	if (d->chans == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_requests; i++) {
+		struct k3_dma_chan *c = &d->chans[i];
+
+		c->status = DMA_IN_PROGRESS;
+		INIT_LIST_HEAD(&c->node);
+		c->vc.desc_free = k3_dma_free_desc;
+		vchan_init(&c->vc, &d->slave);
+	}
+
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	k3_dma_enable_dma(d, true);
+
+	ret = dma_async_device_register(&d->slave);
+	if (ret)
+		goto dma_async_register_fail;
+
+	ret = of_dma_controller_register((&op->dev)->of_node,
+					k3_of_dma_simple_xlate, d);
+	if (ret)
+		goto of_dma_register_fail;
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	tasklet_init(&d->task, k3_dma_tasklet, (unsigned long)d);
+	platform_set_drvdata(op, d);
+	dev_info(&op->dev, "initialized\n");
+
+	return 0;
+
+of_dma_register_fail:
+	dma_async_device_unregister(&d->slave);
+dma_async_register_fail:
+	clk_disable_unprepare(d->clk);
+	return ret;
+}
+
+static int k3_dma_remove(struct platform_device *op)
+{
+	struct k3_dma_chan *c, *cn;
+	struct k3_dma_dev *d = platform_get_drvdata(op);
+
+	dma_async_device_unregister(&d->slave);
+	of_dma_controller_free((&op->dev)->of_node);
+
+	devm_free_irq(&op->dev, d->irq, d);
+
+	list_for_each_entry_safe(c, cn, &d->slave.channels, vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+	tasklet_kill(&d->task);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int k3_dma_suspend_dev(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	u32 stat = 0;
+
+	stat = k3_dma_get_chan_stat(d);
+	if (stat) {
+		dev_warn(d->slave.dev,
+			"chan %d is running fail to suspend\n", stat);
+		return -1;
+	}
+	k3_dma_enable_dma(d, false);
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int k3_dma_resume_dev(struct device *dev)
+{
+	struct k3_dma_dev *d = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+	k3_dma_enable_dma(d, true);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(k3_dma_pmops, k3_dma_suspend_dev, k3_dma_resume_dev);
+
+static struct platform_driver k3_pdma_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.pm	= &k3_dma_pmops,
+		.of_match_table = k3_pdma_dt_ids,
+	},
+	.probe		= k3_dma_probe,
+	.remove		= k3_dma_remove,
+};
+
+module_platform_driver(k3_pdma_driver);
+
+MODULE_DESCRIPTION("Hisilicon k3 DMA Driver");
+MODULE_ALIAS("platform:k3dma");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/lpc18xx-dmamux.c b/drivers/dma/lpc18xx-dmamux.c
new file mode 100644
index 0000000..761f326
--- /dev/null
+++ b/drivers/dma/lpc18xx-dmamux.c
@@ -0,0 +1,183 @@
+/*
+ * DMA Router driver for LPC18xx/43xx DMA MUX
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * Based on TI DMA Crossbar driver by:
+ *   Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *   Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/regmap.h>
+#include <linux/spinlock.h>
+
+/* CREG register offset and macros for mux manipulation */
+#define LPC18XX_CREG_DMAMUX		0x11c
+#define LPC18XX_DMAMUX_VAL(v, n)	((v) << (n * 2))
+#define LPC18XX_DMAMUX_MASK(n)		(0x3 << (n * 2))
+#define LPC18XX_DMAMUX_MAX_VAL		0x3
+
+struct lpc18xx_dmamux {
+	u32 value;
+	bool busy;
+};
+
+struct lpc18xx_dmamux_data {
+	struct dma_router dmarouter;
+	struct lpc18xx_dmamux *muxes;
+	u32 dma_master_requests;
+	u32 dma_mux_requests;
+	struct regmap *reg;
+	spinlock_t lock;
+};
+
+static void lpc18xx_dmamux_free(struct device *dev, void *route_data)
+{
+	struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev);
+	struct lpc18xx_dmamux *mux = route_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dmamux->lock, flags);
+	mux->busy = false;
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+}
+
+static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec,
+				    struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev);
+	unsigned long flags;
+	unsigned mux;
+
+	if (dma_spec->args_count != 3) {
+		dev_err(&pdev->dev, "invalid number of dma mux args\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	mux = dma_spec->args[0];
+	if (mux >= dmamux->dma_master_requests) {
+		dev_err(&pdev->dev, "invalid mux number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) {
+		dev_err(&pdev->dev, "invalid dma mux value: %d\n",
+			dma_spec->args[1]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "can't get dma master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock_irqsave(&dmamux->lock, flags);
+	if (dmamux->muxes[mux].busy) {
+		spin_unlock_irqrestore(&dmamux->lock, flags);
+		dev_err(&pdev->dev, "dma request %u busy with %u.%u\n",
+			mux, mux, dmamux->muxes[mux].value);
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-EBUSY);
+	}
+
+	dmamux->muxes[mux].busy = true;
+	dmamux->muxes[mux].value = dma_spec->args[1];
+
+	regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX,
+			   LPC18XX_DMAMUX_MASK(mux),
+			   LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux));
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+
+	dma_spec->args[1] = dma_spec->args[2];
+	dma_spec->args_count = 2;
+
+	dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux,
+		dmamux->muxes[mux].value, mux);
+
+	return &dmamux->muxes[mux];
+}
+
+static int lpc18xx_dmamux_probe(struct platform_device *pdev)
+{
+	struct device_node *dma_np, *np = pdev->dev.of_node;
+	struct lpc18xx_dmamux_data *dmamux;
+	int ret;
+
+	dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL);
+	if (!dmamux)
+		return -ENOMEM;
+
+	dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
+	if (IS_ERR(dmamux->reg)) {
+		dev_err(&pdev->dev, "syscon lookup failed\n");
+		return PTR_ERR(dmamux->reg);
+	}
+
+	ret = of_property_read_u32(np, "dma-requests",
+				   &dmamux->dma_mux_requests);
+	if (ret) {
+		dev_err(&pdev->dev, "missing dma-requests property\n");
+		return ret;
+	}
+
+	dma_np = of_parse_phandle(np, "dma-masters", 0);
+	if (!dma_np) {
+		dev_err(&pdev->dev, "can't get dma master\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(dma_np, "dma-requests",
+				   &dmamux->dma_master_requests);
+	of_node_put(dma_np);
+	if (ret) {
+		dev_err(&pdev->dev, "missing master dma-requests property\n");
+		return ret;
+	}
+
+	dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests,
+				     sizeof(struct lpc18xx_dmamux),
+				     GFP_KERNEL);
+	if (!dmamux->muxes)
+		return -ENOMEM;
+
+	spin_lock_init(&dmamux->lock);
+	platform_set_drvdata(pdev, dmamux);
+	dmamux->dmarouter.dev = &pdev->dev;
+	dmamux->dmarouter.route_free = lpc18xx_dmamux_free;
+
+	return of_dma_router_register(np, lpc18xx_dmamux_reserve,
+				      &dmamux->dmarouter);
+}
+
+static const struct of_device_id lpc18xx_dmamux_match[] = {
+	{ .compatible = "nxp,lpc1850-dmamux" },
+	{},
+};
+
+static struct platform_driver lpc18xx_dmamux_driver = {
+	.probe	= lpc18xx_dmamux_probe,
+	.driver = {
+		.name = "lpc18xx-dmamux",
+		.of_match_table = lpc18xx_dmamux_match,
+	},
+};
+
+static int __init lpc18xx_dmamux_init(void)
+{
+	return platform_driver_register(&lpc18xx_dmamux_driver);
+}
+arch_initcall(lpc18xx_dmamux_init);
diff --git a/drivers/dma/mediatek/Kconfig b/drivers/dma/mediatek/Kconfig
new file mode 100644
index 0000000..27bac0b
--- /dev/null
+++ b/drivers/dma/mediatek/Kconfig
@@ -0,0 +1,13 @@
+
+config MTK_HSDMA
+	tristate "MediaTek High-Speed DMA controller support"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	---help---
+	  Enable support for High-Speed DMA controller on MediaTek
+	  SoCs.
+
+	  This controller provides the channels which is dedicated to
+	  memory-to-memory transfer to offload from CPU through ring-
+	  based descriptor management.
diff --git a/drivers/dma/mediatek/Makefile b/drivers/dma/mediatek/Makefile
new file mode 100644
index 0000000..6e778f8
--- /dev/null
+++ b/drivers/dma/mediatek/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
new file mode 100644
index 0000000..b7ec56a
--- /dev/null
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -0,0 +1,1056 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 MediaTek Inc.
+
+/*
+ * Driver for MediaTek High-Speed DMA Controller
+ *
+ * Author: Sean Wang <sean.wang@mediatek.com>
+ *
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+
+#include "../virt-dma.h"
+
+#define MTK_HSDMA_USEC_POLL		20
+#define MTK_HSDMA_TIMEOUT_POLL		200000
+#define MTK_HSDMA_DMA_BUSWIDTHS		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+/* The default number of virtual channel */
+#define MTK_HSDMA_NR_VCHANS		3
+
+/* Only one physical channel supported */
+#define MTK_HSDMA_NR_MAX_PCHANS		1
+
+/* Macro for physical descriptor (PD) manipulation */
+/* The number of PD which must be 2 of power */
+#define MTK_DMA_SIZE			64
+#define MTK_HSDMA_NEXT_DESP_IDX(x, y)	(((x) + 1) & ((y) - 1))
+#define MTK_HSDMA_LAST_DESP_IDX(x, y)	(((x) - 1) & ((y) - 1))
+#define MTK_HSDMA_MAX_LEN		0x3f80
+#define MTK_HSDMA_ALIGN_SIZE		4
+#define MTK_HSDMA_PLEN_MASK		0x3fff
+#define MTK_HSDMA_DESC_PLEN(x)		(((x) & MTK_HSDMA_PLEN_MASK) << 16)
+#define MTK_HSDMA_DESC_PLEN_GET(x)	(((x) >> 16) & MTK_HSDMA_PLEN_MASK)
+
+/* Registers for underlying ring manipulation */
+#define MTK_HSDMA_TX_BASE		0x0
+#define MTK_HSDMA_TX_CNT		0x4
+#define MTK_HSDMA_TX_CPU		0x8
+#define MTK_HSDMA_TX_DMA		0xc
+#define MTK_HSDMA_RX_BASE		0x100
+#define MTK_HSDMA_RX_CNT		0x104
+#define MTK_HSDMA_RX_CPU		0x108
+#define MTK_HSDMA_RX_DMA		0x10c
+
+/* Registers for global setup */
+#define MTK_HSDMA_GLO			0x204
+#define MTK_HSDMA_GLO_MULTI_DMA		BIT(10)
+#define MTK_HSDMA_TX_WB_DDONE		BIT(6)
+#define MTK_HSDMA_BURST_64BYTES		(0x2 << 4)
+#define MTK_HSDMA_GLO_RX_BUSY		BIT(3)
+#define MTK_HSDMA_GLO_RX_DMA		BIT(2)
+#define MTK_HSDMA_GLO_TX_BUSY		BIT(1)
+#define MTK_HSDMA_GLO_TX_DMA		BIT(0)
+#define MTK_HSDMA_GLO_DMA		(MTK_HSDMA_GLO_TX_DMA |	\
+					 MTK_HSDMA_GLO_RX_DMA)
+#define MTK_HSDMA_GLO_BUSY		(MTK_HSDMA_GLO_RX_BUSY | \
+					 MTK_HSDMA_GLO_TX_BUSY)
+#define MTK_HSDMA_GLO_DEFAULT		(MTK_HSDMA_GLO_TX_DMA | \
+					 MTK_HSDMA_GLO_RX_DMA | \
+					 MTK_HSDMA_TX_WB_DDONE | \
+					 MTK_HSDMA_BURST_64BYTES | \
+					 MTK_HSDMA_GLO_MULTI_DMA)
+
+/* Registers for reset */
+#define MTK_HSDMA_RESET			0x208
+#define MTK_HSDMA_RST_TX		BIT(0)
+#define MTK_HSDMA_RST_RX		BIT(16)
+
+/* Registers for interrupt control */
+#define MTK_HSDMA_DLYINT		0x20c
+#define MTK_HSDMA_RXDLY_INT_EN		BIT(15)
+
+/* Interrupt fires when the pending number's more than the specified */
+#define MTK_HSDMA_RXMAX_PINT(x)		(((x) & 0x7f) << 8)
+
+/* Interrupt fires when the pending time's more than the specified in 20 us */
+#define MTK_HSDMA_RXMAX_PTIME(x)	((x) & 0x7f)
+#define MTK_HSDMA_DLYINT_DEFAULT	(MTK_HSDMA_RXDLY_INT_EN | \
+					 MTK_HSDMA_RXMAX_PINT(20) | \
+					 MTK_HSDMA_RXMAX_PTIME(20))
+#define MTK_HSDMA_INT_STATUS		0x220
+#define MTK_HSDMA_INT_ENABLE		0x228
+#define MTK_HSDMA_INT_RXDONE		BIT(16)
+
+enum mtk_hsdma_vdesc_flag {
+	MTK_HSDMA_VDESC_FINISHED	= 0x01,
+};
+
+#define IS_MTK_HSDMA_VDESC_FINISHED(x) ((x) == MTK_HSDMA_VDESC_FINISHED)
+
+/**
+ * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
+ *			    descriptor (PD) and its placement must be kept at
+ *			    4-bytes alignment in little endian order.
+ * @desc[1-4]:		    The control pad used to indicate hardware how to
+ *			    deal with the descriptor such as source and
+ *			    destination address and data length. The maximum
+ *			    data length each pdesc can handle is 0x3f80 bytes
+ */
+struct mtk_hsdma_pdesc {
+	__le32 desc1;
+	__le32 desc2;
+	__le32 desc3;
+	__le32 desc4;
+} __packed __aligned(4);
+
+/**
+ * struct mtk_hsdma_vdesc - This is the struct holding info describing virtual
+ *			    descriptor (VD)
+ * @vd:			    An instance for struct virt_dma_desc
+ * @len:		    The total data size device wants to move
+ * @residue:		    The remaining data size device will move
+ * @dest:		    The destination address device wants to move to
+ * @src:		    The source address device wants to move from
+ */
+struct mtk_hsdma_vdesc {
+	struct virt_dma_desc vd;
+	size_t len;
+	size_t residue;
+	dma_addr_t dest;
+	dma_addr_t src;
+};
+
+/**
+ * struct mtk_hsdma_cb - This is the struct holding extra info required for RX
+ *			 ring to know what relevant VD the the PD is being
+ *			 mapped to.
+ * @vd:			 Pointer to the relevant VD.
+ * @flag:		 Flag indicating what action should be taken when VD
+ *			 is completed.
+ */
+struct mtk_hsdma_cb {
+	struct virt_dma_desc *vd;
+	enum mtk_hsdma_vdesc_flag flag;
+};
+
+/**
+ * struct mtk_hsdma_ring - This struct holds info describing underlying ring
+ *			   space
+ * @txd:		   The descriptor TX ring which describes DMA source
+ *			   information
+ * @rxd:		   The descriptor RX ring which describes DMA
+ *			   destination information
+ * @cb:			   The extra information pointed at by RX ring
+ * @tphys:		   The physical addr of TX ring
+ * @rphys:		   The physical addr of RX ring
+ * @cur_tptr:		   Pointer to the next free descriptor used by the host
+ * @cur_rptr:		   Pointer to the last done descriptor by the device
+ */
+struct mtk_hsdma_ring {
+	struct mtk_hsdma_pdesc *txd;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_cb *cb;
+	dma_addr_t tphys;
+	dma_addr_t rphys;
+	u16 cur_tptr;
+	u16 cur_rptr;
+};
+
+/**
+ * struct mtk_hsdma_pchan - This is the struct holding info describing physical
+ *			   channel (PC)
+ * @ring:		   An instance for the underlying ring
+ * @sz_ring:		   Total size allocated for the ring
+ * @nr_free:		   Total number of free rooms in the ring. It would
+ *			   be accessed and updated frequently between IRQ
+ *			   context and user context to reflect whether ring
+ *			   can accept requests from VD.
+ */
+struct mtk_hsdma_pchan {
+	struct mtk_hsdma_ring ring;
+	size_t sz_ring;
+	atomic_t nr_free;
+};
+
+/**
+ * struct mtk_hsdma_vchan - This is the struct holding info describing virtual
+ *			   channel (VC)
+ * @vc:			   An instance for struct virt_dma_chan
+ * @issue_completion:	   The wait for all issued descriptors completited
+ * @issue_synchronize:	   Bool indicating channel synchronization starts
+ * @desc_hw_processing:	   List those descriptors the hardware is processing,
+ *			   which is protected by vc.lock
+ */
+struct mtk_hsdma_vchan {
+	struct virt_dma_chan vc;
+	struct completion issue_completion;
+	bool issue_synchronize;
+	struct list_head desc_hw_processing;
+};
+
+/**
+ * struct mtk_hsdma_soc - This is the struct holding differences among SoCs
+ * @ddone:		  Bit mask for DDONE
+ * @ls0:		  Bit mask for LS0
+ */
+struct mtk_hsdma_soc {
+	__le32 ddone;
+	__le32 ls0;
+};
+
+/**
+ * struct mtk_hsdma_device - This is the struct holding info describing HSDMA
+ *			     device
+ * @ddev:		     An instance for struct dma_device
+ * @base:		     The mapped register I/O base
+ * @clk:		     The clock that device internal is using
+ * @irq:		     The IRQ that device are using
+ * @dma_requests:	     The number of VCs the device supports to
+ * @vc:			     The pointer to all available VCs
+ * @pc:			     The pointer to the underlying PC
+ * @pc_refcnt:		     Track how many VCs are using the PC
+ * @lock:		     Lock protect agaisting multiple VCs access PC
+ * @soc:		     The pointer to area holding differences among
+ *			     vaious platform
+ */
+struct mtk_hsdma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	u32 irq;
+
+	u32 dma_requests;
+	struct mtk_hsdma_vchan *vc;
+	struct mtk_hsdma_pchan *pc;
+	refcount_t pc_refcnt;
+
+	/* Lock used to protect against multiple VCs access PC */
+	spinlock_t lock;
+
+	const struct mtk_hsdma_soc *soc;
+};
+
+static struct mtk_hsdma_device *to_hsdma_dev(struct dma_chan *chan)
+{
+	return container_of(chan->device, struct mtk_hsdma_device, ddev);
+}
+
+static inline struct mtk_hsdma_vchan *to_hsdma_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mtk_hsdma_vchan, vc.chan);
+}
+
+static struct mtk_hsdma_vdesc *to_hsdma_vdesc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct mtk_hsdma_vdesc, vd);
+}
+
+static struct device *hsdma2dev(struct mtk_hsdma_device *hsdma)
+{
+	return hsdma->ddev.dev;
+}
+
+static u32 mtk_dma_read(struct mtk_hsdma_device *hsdma, u32 reg)
+{
+	return readl(hsdma->base + reg);
+}
+
+static void mtk_dma_write(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	writel(val, hsdma->base + reg);
+}
+
+static void mtk_dma_rmw(struct mtk_hsdma_device *hsdma, u32 reg,
+			u32 mask, u32 set)
+{
+	u32 val;
+
+	val = mtk_dma_read(hsdma, reg);
+	val &= ~mask;
+	val |= set;
+	mtk_dma_write(hsdma, reg, val);
+}
+
+static void mtk_dma_set(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, 0, val);
+}
+
+static void mtk_dma_clr(struct mtk_hsdma_device *hsdma, u32 reg, u32 val)
+{
+	mtk_dma_rmw(hsdma, reg, val, 0);
+}
+
+static void mtk_hsdma_vdesc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct mtk_hsdma_vdesc, vd));
+}
+
+static int mtk_hsdma_busy_wait(struct mtk_hsdma_device *hsdma)
+{
+	u32 status = 0;
+
+	return readl_poll_timeout(hsdma->base + MTK_HSDMA_GLO, status,
+				  !(status & MTK_HSDMA_GLO_BUSY),
+				  MTK_HSDMA_USEC_POLL,
+				  MTK_HSDMA_TIMEOUT_POLL);
+}
+
+static int mtk_hsdma_alloc_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	int err;
+
+	memset(pc, 0, sizeof(*pc));
+
+	/*
+	 * Allocate ring space where [0 ... MTK_DMA_SIZE - 1] is for TX ring
+	 * and [MTK_DMA_SIZE ... 2 * MTK_DMA_SIZE - 1] is for RX ring.
+	 */
+	pc->sz_ring = 2 * MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->txd = dma_zalloc_coherent(hsdma2dev(hsdma), pc->sz_ring,
+					&ring->tphys, GFP_NOWAIT);
+	if (!ring->txd)
+		return -ENOMEM;
+
+	ring->rxd = &ring->txd[MTK_DMA_SIZE];
+	ring->rphys = ring->tphys + MTK_DMA_SIZE * sizeof(*ring->txd);
+	ring->cur_tptr = 0;
+	ring->cur_rptr = MTK_DMA_SIZE - 1;
+
+	ring->cb = kcalloc(MTK_DMA_SIZE, sizeof(*ring->cb), GFP_NOWAIT);
+	if (!ring->cb) {
+		err = -ENOMEM;
+		goto err_free_dma;
+	}
+
+	atomic_set(&pc->nr_free, MTK_DMA_SIZE - 1);
+
+	/* Disable HSDMA and wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	err = mtk_hsdma_busy_wait(hsdma);
+	if (err)
+		goto err_free_cb;
+
+	/* Reset */
+	mtk_dma_set(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+	mtk_dma_clr(hsdma, MTK_HSDMA_RESET,
+		    MTK_HSDMA_RST_TX | MTK_HSDMA_RST_RX);
+
+	/* Setup HSDMA initial pointer in the ring */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, ring->tphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_DMA, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, ring->rphys);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, MTK_DMA_SIZE);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, ring->cur_rptr);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_DMA, 0);
+
+	/* Enable HSDMA */
+	mtk_dma_set(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+
+	/* Setup delayed interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_DLYINT, MTK_HSDMA_DLYINT_DEFAULT);
+
+	/* Enable interrupt */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	return 0;
+
+err_free_cb:
+	kfree(ring->cb);
+
+err_free_dma:
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+	return err;
+}
+
+static void mtk_hsdma_free_pchan(struct mtk_hsdma_device *hsdma,
+				 struct mtk_hsdma_pchan *pc)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+
+	/* Disable HSDMA and then wait for the completion */
+	mtk_dma_clr(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DMA);
+	mtk_hsdma_busy_wait(hsdma);
+
+	/* Reset pointer in the ring */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_BASE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CNT, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, MTK_DMA_SIZE - 1);
+
+	kfree(ring->cb);
+
+	dma_free_coherent(hsdma2dev(hsdma),
+			  pc->sz_ring, ring->txd, ring->tphys);
+}
+
+static int mtk_hsdma_issue_pending_vdesc(struct mtk_hsdma_device *hsdma,
+					 struct mtk_hsdma_pchan *pc,
+					 struct mtk_hsdma_vdesc *hvd)
+{
+	struct mtk_hsdma_ring *ring = &pc->ring;
+	struct mtk_hsdma_pdesc *txd, *rxd;
+	u16 reserved, prev, tlen, num_sgs;
+	unsigned long flags;
+
+	/* Protect against PC is accessed by multiple VCs simultaneously */
+	spin_lock_irqsave(&hsdma->lock, flags);
+
+	/*
+	 * Reserve rooms, where pc->nr_free is used to track how many free
+	 * rooms in the ring being updated in user and IRQ context.
+	 */
+	num_sgs = DIV_ROUND_UP(hvd->len, MTK_HSDMA_MAX_LEN);
+	reserved = min_t(u16, num_sgs, atomic_read(&pc->nr_free));
+
+	if (!reserved) {
+		spin_unlock_irqrestore(&hsdma->lock, flags);
+		return -ENOSPC;
+	}
+
+	atomic_sub(reserved, &pc->nr_free);
+
+	while (reserved--) {
+		/* Limit size by PD capability for valid data moving */
+		tlen = (hvd->len > MTK_HSDMA_MAX_LEN) ?
+		       MTK_HSDMA_MAX_LEN : hvd->len;
+
+		/*
+		 * Setup PDs using the remaining VD info mapped on those
+		 * reserved rooms. And since RXD is shared memory between the
+		 * host and the device allocated by dma_alloc_coherent call,
+		 * the helper macro WRITE_ONCE can ensure the data written to
+		 * RAM would really happens.
+		 */
+		txd = &ring->txd[ring->cur_tptr];
+		WRITE_ONCE(txd->desc1, hvd->src);
+		WRITE_ONCE(txd->desc2,
+			   hsdma->soc->ls0 | MTK_HSDMA_DESC_PLEN(tlen));
+
+		rxd = &ring->rxd[ring->cur_tptr];
+		WRITE_ONCE(rxd->desc1, hvd->dest);
+		WRITE_ONCE(rxd->desc2, MTK_HSDMA_DESC_PLEN(tlen));
+
+		/* Associate VD, the PD belonged to */
+		ring->cb[ring->cur_tptr].vd = &hvd->vd;
+
+		/* Move forward the pointer of TX ring */
+		ring->cur_tptr = MTK_HSDMA_NEXT_DESP_IDX(ring->cur_tptr,
+							 MTK_DMA_SIZE);
+
+		/* Update VD with remaining data */
+		hvd->src  += tlen;
+		hvd->dest += tlen;
+		hvd->len  -= tlen;
+	}
+
+	/*
+	 * Tagging flag for the last PD for VD will be responsible for
+	 * completing VD.
+	 */
+	if (!hvd->len) {
+		prev = MTK_HSDMA_LAST_DESP_IDX(ring->cur_tptr, MTK_DMA_SIZE);
+		ring->cb[prev].flag = MTK_HSDMA_VDESC_FINISHED;
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/*
+	 * Updating into hardware the pointer of TX ring lets HSDMA to take
+	 * action for those pending PDs.
+	 */
+	mtk_dma_write(hsdma, MTK_HSDMA_TX_CPU, ring->cur_tptr);
+
+	spin_unlock_irqrestore(&hsdma->lock, flags);
+
+	return 0;
+}
+
+static void mtk_hsdma_issue_vchan_pending(struct mtk_hsdma_device *hsdma,
+					  struct mtk_hsdma_vchan *hvc)
+{
+	struct virt_dma_desc *vd, *vd2;
+	int err;
+
+	lockdep_assert_held(&hvc->vc.lock);
+
+	list_for_each_entry_safe(vd, vd2, &hvc->vc.desc_issued, node) {
+		struct mtk_hsdma_vdesc *hvd;
+
+		hvd = to_hsdma_vdesc(vd);
+
+		/* Map VD into PC and all VCs shares a single PC */
+		err = mtk_hsdma_issue_pending_vdesc(hsdma, hsdma->pc, hvd);
+
+		/*
+		 * Move VD from desc_issued to desc_hw_processing when entire
+		 * VD is fit into available PDs. Otherwise, the uncompleted
+		 * VDs would stay in list desc_issued and then restart the
+		 * processing as soon as possible once underlying ring space
+		 * got freed.
+		 */
+		if (err == -ENOSPC || hvd->len > 0)
+			break;
+
+		/*
+		 * The extra list desc_hw_processing is used because
+		 * hardware can't provide sufficient information allowing us
+		 * to know what VDs are still working on the underlying ring.
+		 * Through the additional list, it can help us to implement
+		 * terminate_all, residue calculation and such thing needed
+		 * to know detail descriptor status on the hardware.
+		 */
+		list_move_tail(&vd->node, &hvc->desc_hw_processing);
+	}
+}
+
+static void mtk_hsdma_free_rooms_in_ring(struct mtk_hsdma_device *hsdma)
+{
+	struct mtk_hsdma_vchan *hvc;
+	struct mtk_hsdma_pdesc *rxd;
+	struct mtk_hsdma_vdesc *hvd;
+	struct mtk_hsdma_pchan *pc;
+	struct mtk_hsdma_cb *cb;
+	int i = MTK_DMA_SIZE;
+	__le32 desc2;
+	u32 status;
+	u16 next;
+
+	/* Read IRQ status */
+	status = mtk_dma_read(hsdma, MTK_HSDMA_INT_STATUS);
+	if (unlikely(!(status & MTK_HSDMA_INT_RXDONE)))
+		goto rx_done;
+
+	pc = hsdma->pc;
+
+	/*
+	 * Using a fail-safe loop with iterations of up to MTK_DMA_SIZE to
+	 * reclaim these finished descriptors: The most number of PDs the ISR
+	 * can handle at one time shouldn't be more than MTK_DMA_SIZE so we
+	 * take it as limited count instead of just using a dangerous infinite
+	 * poll.
+	 */
+	while (i--) {
+		next = MTK_HSDMA_NEXT_DESP_IDX(pc->ring.cur_rptr,
+					       MTK_DMA_SIZE);
+		rxd = &pc->ring.rxd[next];
+
+		/*
+		 * If MTK_HSDMA_DESC_DDONE is no specified, that means data
+		 * moving for the PD is still under going.
+		 */
+		desc2 = READ_ONCE(rxd->desc2);
+		if (!(desc2 & hsdma->soc->ddone))
+			break;
+
+		cb = &pc->ring.cb[next];
+		if (unlikely(!cb->vd)) {
+			dev_err(hsdma2dev(hsdma), "cb->vd cannot be null\n");
+			break;
+		}
+
+		/* Update residue of VD the associated PD belonged to */
+		hvd = to_hsdma_vdesc(cb->vd);
+		hvd->residue -= MTK_HSDMA_DESC_PLEN_GET(rxd->desc2);
+
+		/* Complete VD until the relevant last PD is finished */
+		if (IS_MTK_HSDMA_VDESC_FINISHED(cb->flag)) {
+			hvc = to_hsdma_vchan(cb->vd->tx.chan);
+
+			spin_lock(&hvc->vc.lock);
+
+			/* Remove VD from list desc_hw_processing */
+			list_del(&cb->vd->node);
+
+			/* Add VD into list desc_completed */
+			vchan_cookie_complete(cb->vd);
+
+			if (hvc->issue_synchronize &&
+			    list_empty(&hvc->desc_hw_processing)) {
+				complete(&hvc->issue_completion);
+				hvc->issue_synchronize = false;
+			}
+			spin_unlock(&hvc->vc.lock);
+
+			cb->flag = 0;
+		}
+
+		cb->vd = 0;
+
+		/*
+		 * Recycle the RXD with the helper WRITE_ONCE that can ensure
+		 * data written into RAM would really happens.
+		 */
+		WRITE_ONCE(rxd->desc1, 0);
+		WRITE_ONCE(rxd->desc2, 0);
+		pc->ring.cur_rptr = next;
+
+		/* Release rooms */
+		atomic_inc(&pc->nr_free);
+	}
+
+	/* Ensure all changes indeed done before we're going on */
+	wmb();
+
+	/* Update CPU pointer for those completed PDs */
+	mtk_dma_write(hsdma, MTK_HSDMA_RX_CPU, pc->ring.cur_rptr);
+
+	/*
+	 * Acking the pending IRQ allows hardware no longer to keep the used
+	 * IRQ line in certain trigger state when software has completed all
+	 * the finished physical descriptors.
+	 */
+	if (atomic_read(&pc->nr_free) >= MTK_DMA_SIZE - 1)
+		mtk_dma_write(hsdma, MTK_HSDMA_INT_STATUS, status);
+
+	/* ASAP handles pending VDs in all VCs after freeing some rooms */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		hvc = &hsdma->vc[i];
+		spin_lock(&hvc->vc.lock);
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+		spin_unlock(&hvc->vc.lock);
+	}
+
+rx_done:
+	/* All completed PDs are cleaned up, so enable interrupt again */
+	mtk_dma_set(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+}
+
+static irqreturn_t mtk_hsdma_irq(int irq, void *devid)
+{
+	struct mtk_hsdma_device *hsdma = devid;
+
+	/*
+	 * Disable interrupt until all completed PDs are cleaned up in
+	 * mtk_hsdma_free_rooms call.
+	 */
+	mtk_dma_clr(hsdma, MTK_HSDMA_INT_ENABLE, MTK_HSDMA_INT_RXDONE);
+
+	mtk_hsdma_free_rooms_in_ring(hsdma);
+
+	return IRQ_HANDLED;
+}
+
+static struct virt_dma_desc *mtk_hsdma_find_active_desc(struct dma_chan *c,
+							dma_cookie_t cookie)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct virt_dma_desc *vd;
+
+	list_for_each_entry(vd, &hvc->desc_hw_processing, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	list_for_each_entry(vd, &hvc->vc.desc_issued, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	return NULL;
+}
+
+static enum dma_status mtk_hsdma_tx_status(struct dma_chan *c,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	struct mtk_hsdma_vdesc *hvd;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+	vd = mtk_hsdma_find_active_desc(c, cookie);
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+
+	if (vd) {
+		hvd = to_hsdma_vdesc(vd);
+		bytes = hvd->residue;
+	}
+
+	dma_set_residue(txstate, bytes);
+
+	return ret;
+}
+
+static void mtk_hsdma_issue_pending(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&hvc->vc.lock, flags);
+
+	if (vchan_issue_pending(&hvc->vc))
+		mtk_hsdma_issue_vchan_pending(hsdma, hvc);
+
+	spin_unlock_irqrestore(&hvc->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mtk_hsdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct mtk_hsdma_vdesc *hvd;
+
+	hvd = kzalloc(sizeof(*hvd), GFP_NOWAIT);
+	if (!hvd)
+		return NULL;
+
+	hvd->len = len;
+	hvd->residue = len;
+	hvd->src = src;
+	hvd->dest = dest;
+
+	return vchan_tx_prep(to_virt_chan(c), &hvd->vd, flags);
+}
+
+static int mtk_hsdma_free_inactive_desc(struct dma_chan *c)
+{
+	struct virt_dma_chan *vc = to_virt_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_splice_tail_init(&vc->desc_allocated, &head);
+	list_splice_tail_init(&vc->desc_submitted, &head);
+	list_splice_tail_init(&vc->desc_issued, &head);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	/* At the point, we don't expect users put descriptor into VC again */
+	vchan_dma_desc_free_list(vc, &head);
+
+	return 0;
+}
+
+static void mtk_hsdma_free_active_desc(struct dma_chan *c)
+{
+	struct mtk_hsdma_vchan *hvc = to_hsdma_vchan(c);
+	bool sync_needed = false;
+
+	/*
+	 * Once issue_synchronize is being set, which means once the hardware
+	 * consumes all descriptors for the channel in the ring, the
+	 * synchronization must be be notified immediately it is completed.
+	 */
+	spin_lock(&hvc->vc.lock);
+	if (!list_empty(&hvc->desc_hw_processing)) {
+		hvc->issue_synchronize = true;
+		sync_needed = true;
+	}
+	spin_unlock(&hvc->vc.lock);
+
+	if (sync_needed)
+		wait_for_completion(&hvc->issue_completion);
+	/*
+	 * At the point, we expect that all remaining descriptors in the ring
+	 * for the channel should be all processing done.
+	 */
+	WARN_ONCE(!list_empty(&hvc->desc_hw_processing),
+		  "Desc pending still in list desc_hw_processing\n");
+
+	/* Free all descriptors in list desc_completed */
+	vchan_synchronize(&hvc->vc);
+
+	WARN_ONCE(!list_empty(&hvc->vc.desc_completed),
+		  "Desc pending still in list desc_completed\n");
+}
+
+static int mtk_hsdma_terminate_all(struct dma_chan *c)
+{
+	/*
+	 * Free pending descriptors not processed yet by hardware that have
+	 * previously been submitted to the channel.
+	 */
+	mtk_hsdma_free_inactive_desc(c);
+
+	/*
+	 * However, the DMA engine doesn't provide any way to stop these
+	 * descriptors being processed currently by hardware. The only way is
+	 * to just waiting until these descriptors are all processed completely
+	 * through mtk_hsdma_free_active_desc call.
+	 */
+	mtk_hsdma_free_active_desc(c);
+
+	return 0;
+}
+
+static int mtk_hsdma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+	int err;
+
+	/*
+	 * Since HSDMA has only one PC, the resource for PC is being allocated
+	 * when the first VC is being created and the other VCs would run on
+	 * the same PC.
+	 */
+	if (!refcount_read(&hsdma->pc_refcnt)) {
+		err = mtk_hsdma_alloc_pchan(hsdma, hsdma->pc);
+		if (err)
+			return err;
+		/*
+		 * refcount_inc would complain increment on 0; use-after-free.
+		 * Thus, we need to explicitly set it as 1 initially.
+		 */
+		refcount_set(&hsdma->pc_refcnt, 1);
+	} else {
+		refcount_inc(&hsdma->pc_refcnt);
+	}
+
+	return 0;
+}
+
+static void mtk_hsdma_free_chan_resources(struct dma_chan *c)
+{
+	struct mtk_hsdma_device *hsdma = to_hsdma_dev(c);
+
+	/* Free all descriptors in all lists on the VC */
+	mtk_hsdma_terminate_all(c);
+
+	/* The resource for PC is not freed until all the VCs are destroyed */
+	if (!refcount_dec_and_test(&hsdma->pc_refcnt))
+		return;
+
+	mtk_hsdma_free_pchan(hsdma, hsdma->pc);
+}
+
+static int mtk_hsdma_hw_init(struct mtk_hsdma_device *hsdma)
+{
+	int err;
+
+	pm_runtime_enable(hsdma2dev(hsdma));
+	pm_runtime_get_sync(hsdma2dev(hsdma));
+
+	err = clk_prepare_enable(hsdma->clk);
+	if (err)
+		return err;
+
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, MTK_HSDMA_GLO_DEFAULT);
+
+	return 0;
+}
+
+static int mtk_hsdma_hw_deinit(struct mtk_hsdma_device *hsdma)
+{
+	mtk_dma_write(hsdma, MTK_HSDMA_GLO, 0);
+
+	clk_disable_unprepare(hsdma->clk);
+
+	pm_runtime_put_sync(hsdma2dev(hsdma));
+	pm_runtime_disable(hsdma2dev(hsdma));
+
+	return 0;
+}
+
+static const struct mtk_hsdma_soc mt7623_soc = {
+	.ddone = BIT(31),
+	.ls0 = BIT(30),
+};
+
+static const struct mtk_hsdma_soc mt7622_soc = {
+	.ddone = BIT(15),
+	.ls0 = BIT(14),
+};
+
+static const struct of_device_id mtk_hsdma_match[] = {
+	{ .compatible = "mediatek,mt7623-hsdma", .data = &mt7623_soc},
+	{ .compatible = "mediatek,mt7622-hsdma", .data = &mt7622_soc},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_hsdma_match);
+
+static int mtk_hsdma_probe(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma;
+	struct mtk_hsdma_vchan *vc;
+	struct dma_device *dd;
+	struct resource *res;
+	int i, err;
+
+	hsdma = devm_kzalloc(&pdev->dev, sizeof(*hsdma), GFP_KERNEL);
+	if (!hsdma)
+		return -ENOMEM;
+
+	dd = &hsdma->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	hsdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(hsdma->base))
+		return PTR_ERR(hsdma->base);
+
+	hsdma->soc = of_device_get_match_data(&pdev->dev);
+	if (!hsdma->soc) {
+		dev_err(&pdev->dev, "No device match found\n");
+		return -ENODEV;
+	}
+
+	hsdma->clk = devm_clk_get(&pdev->dev, "hsdma");
+	if (IS_ERR(hsdma->clk)) {
+		dev_err(&pdev->dev, "No clock for %s\n",
+			dev_name(&pdev->dev));
+		return PTR_ERR(hsdma->clk);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "No irq resource for %s\n",
+			dev_name(&pdev->dev));
+		return -EINVAL;
+	}
+	hsdma->irq = res->start;
+
+	refcount_set(&hsdma->pc_refcnt, 0);
+	spin_lock_init(&hsdma->lock);
+
+	dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+
+	dd->copy_align = MTK_HSDMA_ALIGN_SIZE;
+	dd->device_alloc_chan_resources = mtk_hsdma_alloc_chan_resources;
+	dd->device_free_chan_resources = mtk_hsdma_free_chan_resources;
+	dd->device_tx_status = mtk_hsdma_tx_status;
+	dd->device_issue_pending = mtk_hsdma_issue_pending;
+	dd->device_prep_dma_memcpy = mtk_hsdma_prep_dma_memcpy;
+	dd->device_terminate_all = mtk_hsdma_terminate_all;
+	dd->src_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->dst_addr_widths = MTK_HSDMA_DMA_BUSWIDTHS;
+	dd->directions = BIT(DMA_MEM_TO_MEM);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	hsdma->dma_requests = MTK_HSDMA_NR_VCHANS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &hsdma->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Using %u as missing dma-requests property\n",
+			 MTK_HSDMA_NR_VCHANS);
+	}
+
+	hsdma->pc = devm_kcalloc(&pdev->dev, MTK_HSDMA_NR_MAX_PCHANS,
+				 sizeof(*hsdma->pc), GFP_KERNEL);
+	if (!hsdma->pc)
+		return -ENOMEM;
+
+	hsdma->vc = devm_kcalloc(&pdev->dev, hsdma->dma_requests,
+				 sizeof(*hsdma->vc), GFP_KERNEL);
+	if (!hsdma->vc)
+		return -ENOMEM;
+
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+		vc->vc.desc_free = mtk_hsdma_vdesc_free;
+		vchan_init(&vc->vc, dd);
+		init_completion(&vc->issue_completion);
+		INIT_LIST_HEAD(&vc->desc_hw_processing);
+	}
+
+	err = dma_async_device_register(dd);
+	if (err)
+		return err;
+
+	err = of_dma_controller_register(pdev->dev.of_node,
+					 of_dma_xlate_by_chan_id, hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"MediaTek HSDMA OF registration failed %d\n", err);
+		goto err_unregister;
+	}
+
+	mtk_hsdma_hw_init(hsdma);
+
+	err = devm_request_irq(&pdev->dev, hsdma->irq,
+			       mtk_hsdma_irq, 0,
+			       dev_name(&pdev->dev), hsdma);
+	if (err) {
+		dev_err(&pdev->dev,
+			"request_irq failed with err %d\n", err);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, hsdma);
+
+	dev_info(&pdev->dev, "MediaTek HSDMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return err;
+}
+
+static int mtk_hsdma_remove(struct platform_device *pdev)
+{
+	struct mtk_hsdma_device *hsdma = platform_get_drvdata(pdev);
+	struct mtk_hsdma_vchan *vc;
+	int i;
+
+	/* Kill VC task */
+	for (i = 0; i < hsdma->dma_requests; i++) {
+		vc = &hsdma->vc[i];
+
+		list_del(&vc->vc.chan.device_node);
+		tasklet_kill(&vc->vc.task);
+	}
+
+	/* Disable DMA interrupt */
+	mtk_dma_write(hsdma, MTK_HSDMA_INT_ENABLE, 0);
+
+	/* Waits for any pending IRQ handlers to complete */
+	synchronize_irq(hsdma->irq);
+
+	/* Disable hardware */
+	mtk_hsdma_hw_deinit(hsdma);
+
+	dma_async_device_unregister(&hsdma->ddev);
+	of_dma_controller_free(pdev->dev.of_node);
+
+	return 0;
+}
+
+static struct platform_driver mtk_hsdma_driver = {
+	.probe		= mtk_hsdma_probe,
+	.remove		= mtk_hsdma_remove,
+	.driver = {
+		.name		= KBUILD_MODNAME,
+		.of_match_table	= mtk_hsdma_match,
+	},
+};
+module_platform_driver(mtk_hsdma_driver);
+
+MODULE_DESCRIPTION("MediaTek High-Speed DMA Controller Driver");
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mic_x100_dma.c b/drivers/dma/mic_x100_dma.c
new file mode 100644
index 0000000..adfd316
--- /dev/null
+++ b/drivers/dma/mic_x100_dma.c
@@ -0,0 +1,799 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC X100 DMA Driver.
+ *
+ * Adapted from IOAT dma driver.
+ */
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/seq_file.h>
+#include <linux/vmalloc.h>
+
+#include "mic_x100_dma.h"
+
+#define MIC_DMA_MAX_XFER_SIZE_CARD  (1 * 1024 * 1024 -\
+				       MIC_DMA_ALIGN_BYTES)
+#define MIC_DMA_MAX_XFER_SIZE_HOST  (1 * 1024 * 1024 >> 1)
+#define MIC_DMA_DESC_TYPE_SHIFT	60
+#define MIC_DMA_MEMCPY_LEN_SHIFT 46
+#define MIC_DMA_STAT_INTR_SHIFT 59
+
+/* high-water mark for pushing dma descriptors */
+static int mic_dma_pending_level = 4;
+
+/* Status descriptor is used to write a 64 bit value to a memory location */
+enum mic_dma_desc_format_type {
+	MIC_DMA_MEMCPY = 1,
+	MIC_DMA_STATUS,
+};
+
+static inline u32 mic_dma_hw_ring_inc(u32 val)
+{
+	return (val + 1) % MIC_DMA_DESC_RX_SIZE;
+}
+
+static inline u32 mic_dma_hw_ring_dec(u32 val)
+{
+	return val ? val - 1 : MIC_DMA_DESC_RX_SIZE - 1;
+}
+
+static inline void mic_dma_hw_ring_inc_head(struct mic_dma_chan *ch)
+{
+	ch->head = mic_dma_hw_ring_inc(ch->head);
+}
+
+/* Prepare a memcpy desc */
+static inline void mic_dma_memcpy_desc(struct mic_dma_desc *desc,
+	dma_addr_t src_phys, dma_addr_t dst_phys, u64 size)
+{
+	u64 qw0, qw1;
+
+	qw0 = src_phys;
+	qw0 |= (size >> MIC_DMA_ALIGN_SHIFT) << MIC_DMA_MEMCPY_LEN_SHIFT;
+	qw1 = MIC_DMA_MEMCPY;
+	qw1 <<= MIC_DMA_DESC_TYPE_SHIFT;
+	qw1 |= dst_phys;
+	desc->qw0 = qw0;
+	desc->qw1 = qw1;
+}
+
+/* Prepare a status desc. with @data to be written at @dst_phys */
+static inline void mic_dma_prep_status_desc(struct mic_dma_desc *desc, u64 data,
+	dma_addr_t dst_phys, bool generate_intr)
+{
+	u64 qw0, qw1;
+
+	qw0 = data;
+	qw1 = (u64) MIC_DMA_STATUS << MIC_DMA_DESC_TYPE_SHIFT | dst_phys;
+	if (generate_intr)
+		qw1 |= (1ULL << MIC_DMA_STAT_INTR_SHIFT);
+	desc->qw0 = qw0;
+	desc->qw1 = qw1;
+}
+
+static void mic_dma_cleanup(struct mic_dma_chan *ch)
+{
+	struct dma_async_tx_descriptor *tx;
+	u32 tail;
+	u32 last_tail;
+
+	spin_lock(&ch->cleanup_lock);
+	tail = mic_dma_read_cmp_cnt(ch);
+	/*
+	 * This is the barrier pair for smp_wmb() in fn.
+	 * mic_dma_tx_submit_unlock. It's required so that we read the
+	 * updated cookie value from tx->cookie.
+	 */
+	smp_rmb();
+	for (last_tail = ch->last_tail; tail != last_tail;) {
+		tx = &ch->tx_array[last_tail];
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			dmaengine_desc_get_callback_invoke(tx, NULL);
+			tx->callback = NULL;
+		}
+		last_tail = mic_dma_hw_ring_inc(last_tail);
+	}
+	/* finish all completion callbacks before incrementing tail */
+	smp_mb();
+	ch->last_tail = last_tail;
+	spin_unlock(&ch->cleanup_lock);
+}
+
+static u32 mic_dma_ring_count(u32 head, u32 tail)
+{
+	u32 count;
+
+	if (head >= tail)
+		count = (tail - 0) + (MIC_DMA_DESC_RX_SIZE - head);
+	else
+		count = tail - head;
+	return count - 1;
+}
+
+/* Returns the num. of free descriptors on success, -ENOMEM on failure */
+static int mic_dma_avail_desc_ring_space(struct mic_dma_chan *ch, int required)
+{
+	struct device *dev = mic_dma_ch_to_device(ch);
+	u32 count;
+
+	count = mic_dma_ring_count(ch->head, ch->last_tail);
+	if (count < required) {
+		mic_dma_cleanup(ch);
+		count = mic_dma_ring_count(ch->head, ch->last_tail);
+	}
+
+	if (count < required) {
+		dev_dbg(dev, "Not enough desc space");
+		dev_dbg(dev, "%s %d required=%u, avail=%u\n",
+			__func__, __LINE__, required, count);
+		return -ENOMEM;
+	} else {
+		return count;
+	}
+}
+
+/* Program memcpy descriptors into the descriptor ring and update s/w head ptr*/
+static int mic_dma_prog_memcpy_desc(struct mic_dma_chan *ch, dma_addr_t src,
+				    dma_addr_t dst, size_t len)
+{
+	size_t current_transfer_len;
+	size_t max_xfer_size = to_mic_dma_dev(ch)->max_xfer_size;
+	/* 3 is added to make sure we have enough space for status desc */
+	int num_desc = len / max_xfer_size + 3;
+	int ret;
+
+	if (len % max_xfer_size)
+		num_desc++;
+
+	ret = mic_dma_avail_desc_ring_space(ch, num_desc);
+	if (ret < 0)
+		return ret;
+	do {
+		current_transfer_len = min(len, max_xfer_size);
+		mic_dma_memcpy_desc(&ch->desc_ring[ch->head],
+				    src, dst, current_transfer_len);
+		mic_dma_hw_ring_inc_head(ch);
+		len -= current_transfer_len;
+		dst = dst + current_transfer_len;
+		src = src + current_transfer_len;
+	} while (len > 0);
+	return 0;
+}
+
+/* It's a h/w quirk and h/w needs 2 status descriptors for every status desc */
+static void mic_dma_prog_intr(struct mic_dma_chan *ch)
+{
+	mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
+				 ch->status_dest_micpa, false);
+	mic_dma_hw_ring_inc_head(ch);
+	mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
+				 ch->status_dest_micpa, true);
+	mic_dma_hw_ring_inc_head(ch);
+}
+
+/* Wrapper function to program memcpy descriptors/status descriptors */
+static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src,
+			  dma_addr_t dst, size_t len)
+{
+	if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) {
+		return -ENOMEM;
+	} else {
+		/* 3 is the maximum number of status descriptors */
+		int ret = mic_dma_avail_desc_ring_space(ch, 3);
+
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */
+	if (flags & DMA_PREP_FENCE) {
+		mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
+					 ch->status_dest_micpa, false);
+		mic_dma_hw_ring_inc_head(ch);
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		mic_dma_prog_intr(ch);
+
+	return 0;
+}
+
+static inline void mic_dma_issue_pending(struct dma_chan *ch)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+
+	spin_lock(&mic_ch->issue_lock);
+	/*
+	 * Write to head triggers h/w to act on the descriptors.
+	 * On MIC, writing the same head value twice causes
+	 * a h/w error. On second write, h/w assumes we filled
+	 * the entire ring & overwrote some of the descriptors.
+	 */
+	if (mic_ch->issued == mic_ch->submitted)
+		goto out;
+	mic_ch->issued = mic_ch->submitted;
+	/*
+	 * make descriptor updates visible before advancing head,
+	 * this is purposefully not smp_wmb() since we are also
+	 * publishing the descriptor updates to a dma device
+	 */
+	wmb();
+	mic_dma_write_reg(mic_ch, MIC_DMA_REG_DHPR, mic_ch->issued);
+out:
+	spin_unlock(&mic_ch->issue_lock);
+}
+
+static inline void mic_dma_update_pending(struct mic_dma_chan *ch)
+{
+	if (mic_dma_ring_count(ch->issued, ch->submitted)
+			> mic_dma_pending_level)
+		mic_dma_issue_pending(&ch->api_ch);
+}
+
+static dma_cookie_t mic_dma_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	dma_cookie_assign(tx);
+	cookie = tx->cookie;
+	/*
+	 * We need an smp write barrier here because another CPU might see
+	 * an update to submitted and update h/w head even before we
+	 * assigned a cookie to this tx.
+	 */
+	smp_wmb();
+	mic_ch->submitted = mic_ch->head;
+	spin_unlock(&mic_ch->prep_lock);
+	mic_dma_update_pending(mic_ch);
+	return cookie;
+}
+
+static inline struct dma_async_tx_descriptor *
+allocate_tx(struct mic_dma_chan *ch)
+{
+	u32 idx = mic_dma_hw_ring_dec(ch->head);
+	struct dma_async_tx_descriptor *tx = &ch->tx_array[idx];
+
+	dma_async_tx_descriptor_init(tx, &ch->api_ch);
+	tx->tx_submit = mic_dma_tx_submit_unlock;
+	return tx;
+}
+
+/* Program a status descriptor with dst as address and value to be written */
+static struct dma_async_tx_descriptor *
+mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val,
+			 unsigned long flags)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	int result;
+
+	spin_lock(&mic_ch->prep_lock);
+	result = mic_dma_avail_desc_ring_space(mic_ch, 4);
+	if (result < 0)
+		goto error;
+	mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst,
+				 false);
+	mic_dma_hw_ring_inc_head(mic_ch);
+	result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
+	if (result < 0)
+		goto error;
+
+	return allocate_tx(mic_ch);
+error:
+	dev_err(mic_dma_ch_to_device(mic_ch),
+		"Error enqueueing dma status descriptor, error=%d\n", result);
+	spin_unlock(&mic_ch->prep_lock);
+	return NULL;
+}
+
+/*
+ * Prepare a memcpy descriptor to be added to the ring.
+ * Note that the temporary descriptor adds an extra overhead of copying the
+ * descriptor to ring. So, we copy directly to the descriptor ring
+ */
+static struct dma_async_tx_descriptor *
+mic_dma_prep_memcpy_lock(struct dma_chan *ch, dma_addr_t dma_dest,
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	struct device *dev = mic_dma_ch_to_device(mic_ch);
+	int result;
+
+	if (!len && !flags)
+		return NULL;
+
+	spin_lock(&mic_ch->prep_lock);
+	result = mic_dma_do_dma(mic_ch, flags, dma_src, dma_dest, len);
+	if (result >= 0)
+		return allocate_tx(mic_ch);
+	dev_err(dev, "Error enqueueing dma, error=%d\n", result);
+	spin_unlock(&mic_ch->prep_lock);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mic_dma_prep_interrupt_lock(struct dma_chan *ch, unsigned long flags)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	int ret;
+
+	spin_lock(&mic_ch->prep_lock);
+	ret = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
+	if (!ret)
+		return allocate_tx(mic_ch);
+	spin_unlock(&mic_ch->prep_lock);
+	return NULL;
+}
+
+/* Return the status of the transaction */
+static enum dma_status
+mic_dma_tx_status(struct dma_chan *ch, dma_cookie_t cookie,
+		  struct dma_tx_state *txstate)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+
+	if (DMA_COMPLETE != dma_cookie_status(ch, cookie, txstate))
+		mic_dma_cleanup(mic_ch);
+
+	return dma_cookie_status(ch, cookie, txstate);
+}
+
+static irqreturn_t mic_dma_thread_fn(int irq, void *data)
+{
+	mic_dma_cleanup((struct mic_dma_chan *)data);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mic_dma_intr_handler(int irq, void *data)
+{
+	struct mic_dma_chan *ch = ((struct mic_dma_chan *)data);
+
+	mic_dma_ack_interrupt(ch);
+	return IRQ_WAKE_THREAD;
+}
+
+static int mic_dma_alloc_desc_ring(struct mic_dma_chan *ch)
+{
+	u64 desc_ring_size = MIC_DMA_DESC_RX_SIZE * sizeof(*ch->desc_ring);
+	struct device *dev = &to_mbus_device(ch)->dev;
+
+	desc_ring_size = ALIGN(desc_ring_size, MIC_DMA_ALIGN_BYTES);
+	ch->desc_ring = kzalloc(desc_ring_size, GFP_KERNEL);
+
+	if (!ch->desc_ring)
+		return -ENOMEM;
+
+	ch->desc_ring_micpa = dma_map_single(dev, ch->desc_ring,
+					     desc_ring_size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, ch->desc_ring_micpa))
+		goto map_error;
+
+	ch->tx_array = vzalloc(array_size(MIC_DMA_DESC_RX_SIZE,
+					  sizeof(*ch->tx_array)));
+	if (!ch->tx_array)
+		goto tx_error;
+	return 0;
+tx_error:
+	dma_unmap_single(dev, ch->desc_ring_micpa, desc_ring_size,
+			 DMA_BIDIRECTIONAL);
+map_error:
+	kfree(ch->desc_ring);
+	return -ENOMEM;
+}
+
+static void mic_dma_free_desc_ring(struct mic_dma_chan *ch)
+{
+	u64 desc_ring_size = MIC_DMA_DESC_RX_SIZE * sizeof(*ch->desc_ring);
+
+	vfree(ch->tx_array);
+	desc_ring_size = ALIGN(desc_ring_size, MIC_DMA_ALIGN_BYTES);
+	dma_unmap_single(&to_mbus_device(ch)->dev, ch->desc_ring_micpa,
+			 desc_ring_size, DMA_BIDIRECTIONAL);
+	kfree(ch->desc_ring);
+	ch->desc_ring = NULL;
+}
+
+static void mic_dma_free_status_dest(struct mic_dma_chan *ch)
+{
+	dma_unmap_single(&to_mbus_device(ch)->dev, ch->status_dest_micpa,
+			 L1_CACHE_BYTES, DMA_BIDIRECTIONAL);
+	kfree(ch->status_dest);
+}
+
+static int mic_dma_alloc_status_dest(struct mic_dma_chan *ch)
+{
+	struct device *dev = &to_mbus_device(ch)->dev;
+
+	ch->status_dest = kzalloc(L1_CACHE_BYTES, GFP_KERNEL);
+	if (!ch->status_dest)
+		return -ENOMEM;
+	ch->status_dest_micpa = dma_map_single(dev, ch->status_dest,
+					L1_CACHE_BYTES, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, ch->status_dest_micpa)) {
+		kfree(ch->status_dest);
+		ch->status_dest = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int mic_dma_check_chan(struct mic_dma_chan *ch)
+{
+	if (mic_dma_read_reg(ch, MIC_DMA_REG_DCHERR) ||
+	    mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT) & MIC_DMA_CHAN_QUIESCE) {
+		mic_dma_disable_chan(ch);
+		mic_dma_chan_mask_intr(ch);
+		dev_err(mic_dma_ch_to_device(ch),
+			"%s %d error setting up mic dma chan %d\n",
+			__func__, __LINE__, ch->ch_num);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+static int mic_dma_chan_setup(struct mic_dma_chan *ch)
+{
+	if (MIC_DMA_CHAN_MIC == ch->owner)
+		mic_dma_chan_set_owner(ch);
+	mic_dma_disable_chan(ch);
+	mic_dma_chan_mask_intr(ch);
+	mic_dma_write_reg(ch, MIC_DMA_REG_DCHERRMSK, 0);
+	mic_dma_chan_set_desc_ring(ch);
+	ch->last_tail = mic_dma_read_reg(ch, MIC_DMA_REG_DTPR);
+	ch->head = ch->last_tail;
+	ch->issued = 0;
+	mic_dma_chan_unmask_intr(ch);
+	mic_dma_enable_chan(ch);
+	return mic_dma_check_chan(ch);
+}
+
+static void mic_dma_chan_destroy(struct mic_dma_chan *ch)
+{
+	mic_dma_disable_chan(ch);
+	mic_dma_chan_mask_intr(ch);
+}
+
+static int mic_dma_setup_irq(struct mic_dma_chan *ch)
+{
+	ch->cookie =
+		to_mbus_hw_ops(ch)->request_threaded_irq(to_mbus_device(ch),
+			mic_dma_intr_handler, mic_dma_thread_fn,
+			"mic dma_channel", ch, ch->ch_num);
+	return PTR_ERR_OR_ZERO(ch->cookie);
+}
+
+static inline void mic_dma_free_irq(struct mic_dma_chan *ch)
+{
+	to_mbus_hw_ops(ch)->free_irq(to_mbus_device(ch), ch->cookie, ch);
+}
+
+static int mic_dma_chan_init(struct mic_dma_chan *ch)
+{
+	int ret = mic_dma_alloc_desc_ring(ch);
+
+	if (ret)
+		goto ring_error;
+	ret = mic_dma_alloc_status_dest(ch);
+	if (ret)
+		goto status_error;
+	ret = mic_dma_chan_setup(ch);
+	if (ret)
+		goto chan_error;
+	return ret;
+chan_error:
+	mic_dma_free_status_dest(ch);
+status_error:
+	mic_dma_free_desc_ring(ch);
+ring_error:
+	return ret;
+}
+
+static int mic_dma_drain_chan(struct mic_dma_chan *ch)
+{
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	dma_cookie_t cookie;
+
+	tx = mic_dma_prep_memcpy_lock(&ch->api_ch, 0, 0, 0, DMA_PREP_FENCE);
+	if (!tx) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	cookie = tx->tx_submit(tx);
+	if (dma_submit_error(cookie))
+		err = -ENOMEM;
+	else
+		err = dma_sync_wait(&ch->api_ch, cookie);
+	if (err) {
+		dev_err(mic_dma_ch_to_device(ch), "%s %d TO chan 0x%x\n",
+			__func__, __LINE__, ch->ch_num);
+		err = -EIO;
+	}
+error:
+	mic_dma_cleanup(ch);
+	return err;
+}
+
+static inline void mic_dma_chan_uninit(struct mic_dma_chan *ch)
+{
+	mic_dma_chan_destroy(ch);
+	mic_dma_cleanup(ch);
+	mic_dma_free_status_dest(ch);
+	mic_dma_free_desc_ring(ch);
+}
+
+static int mic_dma_init(struct mic_dma_device *mic_dma_dev,
+			enum mic_dma_chan_owner owner)
+{
+	int i, first_chan = mic_dma_dev->start_ch;
+	struct mic_dma_chan *ch;
+	int ret;
+
+	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
+		ch = &mic_dma_dev->mic_ch[i];
+		ch->ch_num = i;
+		ch->owner = owner;
+		spin_lock_init(&ch->cleanup_lock);
+		spin_lock_init(&ch->prep_lock);
+		spin_lock_init(&ch->issue_lock);
+		ret = mic_dma_setup_irq(ch);
+		if (ret)
+			goto error;
+	}
+	return 0;
+error:
+	for (i = i - 1; i >= first_chan; i--)
+		mic_dma_free_irq(ch);
+	return ret;
+}
+
+static void mic_dma_uninit(struct mic_dma_device *mic_dma_dev)
+{
+	int i, first_chan = mic_dma_dev->start_ch;
+	struct mic_dma_chan *ch;
+
+	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
+		ch = &mic_dma_dev->mic_ch[i];
+		mic_dma_free_irq(ch);
+	}
+}
+
+static int mic_dma_alloc_chan_resources(struct dma_chan *ch)
+{
+	int ret = mic_dma_chan_init(to_mic_dma_chan(ch));
+	if (ret)
+		return ret;
+	return MIC_DMA_DESC_RX_SIZE;
+}
+
+static void mic_dma_free_chan_resources(struct dma_chan *ch)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	mic_dma_drain_chan(mic_ch);
+	mic_dma_chan_uninit(mic_ch);
+}
+
+/* Set the fn. handlers and register the dma device with dma api */
+static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev,
+				       enum mic_dma_chan_owner owner)
+{
+	int i, first_chan = mic_dma_dev->start_ch;
+
+	dma_cap_zero(mic_dma_dev->dma_dev.cap_mask);
+	/*
+	 * This dma engine is not capable of host memory to host memory
+	 * transfers
+	 */
+	dma_cap_set(DMA_MEMCPY, mic_dma_dev->dma_dev.cap_mask);
+
+	if (MIC_DMA_CHAN_HOST == owner)
+		dma_cap_set(DMA_PRIVATE, mic_dma_dev->dma_dev.cap_mask);
+	mic_dma_dev->dma_dev.device_alloc_chan_resources =
+		mic_dma_alloc_chan_resources;
+	mic_dma_dev->dma_dev.device_free_chan_resources =
+		mic_dma_free_chan_resources;
+	mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status;
+	mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock;
+	mic_dma_dev->dma_dev.device_prep_dma_imm_data =
+		mic_dma_prep_status_lock;
+	mic_dma_dev->dma_dev.device_prep_dma_interrupt =
+		mic_dma_prep_interrupt_lock;
+	mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending;
+	mic_dma_dev->dma_dev.copy_align = MIC_DMA_ALIGN_SHIFT;
+	INIT_LIST_HEAD(&mic_dma_dev->dma_dev.channels);
+	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
+		mic_dma_dev->mic_ch[i].api_ch.device = &mic_dma_dev->dma_dev;
+		dma_cookie_init(&mic_dma_dev->mic_ch[i].api_ch);
+		list_add_tail(&mic_dma_dev->mic_ch[i].api_ch.device_node,
+			      &mic_dma_dev->dma_dev.channels);
+	}
+	return dmaenginem_async_device_register(&mic_dma_dev->dma_dev);
+}
+
+/*
+ * Initializes dma channels and registers the dma device with the
+ * dma engine api.
+ */
+static struct mic_dma_device *mic_dma_dev_reg(struct mbus_device *mbdev,
+					      enum mic_dma_chan_owner owner)
+{
+	struct mic_dma_device *mic_dma_dev;
+	int ret;
+	struct device *dev = &mbdev->dev;
+
+	mic_dma_dev = devm_kzalloc(dev, sizeof(*mic_dma_dev), GFP_KERNEL);
+	if (!mic_dma_dev) {
+		ret = -ENOMEM;
+		goto alloc_error;
+	}
+	mic_dma_dev->mbdev = mbdev;
+	mic_dma_dev->dma_dev.dev = dev;
+	mic_dma_dev->mmio = mbdev->mmio_va;
+	if (MIC_DMA_CHAN_HOST == owner) {
+		mic_dma_dev->start_ch = 0;
+		mic_dma_dev->max_xfer_size = MIC_DMA_MAX_XFER_SIZE_HOST;
+	} else {
+		mic_dma_dev->start_ch = 4;
+		mic_dma_dev->max_xfer_size = MIC_DMA_MAX_XFER_SIZE_CARD;
+	}
+	ret = mic_dma_init(mic_dma_dev, owner);
+	if (ret)
+		goto init_error;
+	ret = mic_dma_register_dma_device(mic_dma_dev, owner);
+	if (ret)
+		goto reg_error;
+	return mic_dma_dev;
+reg_error:
+	mic_dma_uninit(mic_dma_dev);
+init_error:
+	mic_dma_dev = NULL;
+alloc_error:
+	dev_err(dev, "Error at %s %d ret=%d\n", __func__, __LINE__, ret);
+	return mic_dma_dev;
+}
+
+static void mic_dma_dev_unreg(struct mic_dma_device *mic_dma_dev)
+{
+	mic_dma_uninit(mic_dma_dev);
+}
+
+/* DEBUGFS CODE */
+static int mic_dma_reg_seq_show(struct seq_file *s, void *pos)
+{
+	struct mic_dma_device *mic_dma_dev = s->private;
+	int i, chan_num, first_chan = mic_dma_dev->start_ch;
+	struct mic_dma_chan *ch;
+
+	seq_printf(s, "SBOX_DCR: %#x\n",
+		   mic_dma_mmio_read(&mic_dma_dev->mic_ch[first_chan],
+				     MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR));
+	seq_puts(s, "DMA Channel Registers\n");
+	seq_printf(s, "%-10s| %-10s %-10s %-10s %-10s %-10s",
+		   "Channel", "DCAR", "DTPR", "DHPR", "DRAR_HI", "DRAR_LO");
+	seq_printf(s, " %-11s %-14s %-10s\n", "DCHERR", "DCHERRMSK", "DSTAT");
+	for (i = first_chan; i < first_chan + MIC_DMA_NUM_CHAN; i++) {
+		ch = &mic_dma_dev->mic_ch[i];
+		chan_num = ch->ch_num;
+		seq_printf(s, "%-10i| %-#10x %-#10x %-#10x %-#10x",
+			   chan_num,
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DCAR),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DTPR),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DHPR),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DRAR_HI));
+		seq_printf(s, " %-#10x %-#10x %-#14x %-#10x\n",
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DRAR_LO),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DCHERR),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DCHERRMSK),
+			   mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT));
+	}
+	return 0;
+}
+
+static int mic_dma_reg_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mic_dma_reg_seq_show, inode->i_private);
+}
+
+static int mic_dma_reg_debug_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations mic_dma_reg_ops = {
+	.owner   = THIS_MODULE,
+	.open    = mic_dma_reg_debug_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = mic_dma_reg_debug_release
+};
+
+/* Debugfs parent dir */
+static struct dentry *mic_dma_dbg;
+
+static int mic_dma_driver_probe(struct mbus_device *mbdev)
+{
+	struct mic_dma_device *mic_dma_dev;
+	enum mic_dma_chan_owner owner;
+
+	if (MBUS_DEV_DMA_MIC == mbdev->id.device)
+		owner = MIC_DMA_CHAN_MIC;
+	else
+		owner = MIC_DMA_CHAN_HOST;
+
+	mic_dma_dev = mic_dma_dev_reg(mbdev, owner);
+	dev_set_drvdata(&mbdev->dev, mic_dma_dev);
+
+	if (mic_dma_dbg) {
+		mic_dma_dev->dbg_dir = debugfs_create_dir(dev_name(&mbdev->dev),
+							  mic_dma_dbg);
+		if (mic_dma_dev->dbg_dir)
+			debugfs_create_file("mic_dma_reg", 0444,
+					    mic_dma_dev->dbg_dir, mic_dma_dev,
+					    &mic_dma_reg_ops);
+	}
+	return 0;
+}
+
+static void mic_dma_driver_remove(struct mbus_device *mbdev)
+{
+	struct mic_dma_device *mic_dma_dev;
+
+	mic_dma_dev = dev_get_drvdata(&mbdev->dev);
+	debugfs_remove_recursive(mic_dma_dev->dbg_dir);
+	mic_dma_dev_unreg(mic_dma_dev);
+}
+
+static struct mbus_device_id id_table[] = {
+	{MBUS_DEV_DMA_MIC, MBUS_DEV_ANY_ID},
+	{MBUS_DEV_DMA_HOST, MBUS_DEV_ANY_ID},
+	{0},
+};
+
+static struct mbus_driver mic_dma_driver = {
+	.driver.name =	KBUILD_MODNAME,
+	.driver.owner =	THIS_MODULE,
+	.id_table = id_table,
+	.probe = mic_dma_driver_probe,
+	.remove = mic_dma_driver_remove,
+};
+
+static int __init mic_x100_dma_init(void)
+{
+	int rc = mbus_register_driver(&mic_dma_driver);
+	if (rc)
+		return rc;
+	mic_dma_dbg = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	return 0;
+}
+
+static void __exit mic_x100_dma_exit(void)
+{
+	debugfs_remove_recursive(mic_dma_dbg);
+	mbus_unregister_driver(&mic_dma_driver);
+}
+
+module_init(mic_x100_dma_init);
+module_exit(mic_x100_dma_exit);
+
+MODULE_DEVICE_TABLE(mbus, id_table);
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("Intel(R) MIC X100 DMA Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mic_x100_dma.h b/drivers/dma/mic_x100_dma.h
new file mode 100644
index 0000000..d899820
--- /dev/null
+++ b/drivers/dma/mic_x100_dma.h
@@ -0,0 +1,286 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC X100 DMA Driver.
+ *
+ * Adapted from IOAT dma driver.
+ */
+#ifndef _MIC_X100_DMA_H_
+#define _MIC_X100_DMA_H_
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mic_bus.h>
+
+#include "dmaengine.h"
+
+/*
+ * MIC has a total of 8 dma channels.
+ * Four channels are assigned for host SW use & the remaining for MIC SW.
+ * MIC DMA transfer size & addresses need to be 64 byte aligned.
+ */
+#define MIC_DMA_MAX_NUM_CHAN	8
+#define MIC_DMA_NUM_CHAN	4
+#define MIC_DMA_ALIGN_SHIFT	DMAENGINE_ALIGN_64_BYTES
+#define MIC_DMA_ALIGN_BYTES	(1 << MIC_DMA_ALIGN_SHIFT)
+#define MIC_DMA_DESC_RX_SIZE	(128 * 1024 - 4)
+
+/*
+ * Register descriptions
+ * All the registers are 32 bit registers.
+ * DCR is a global register and all others are per-channel.
+ * DCR - bits 0, 2, 4, 6, 8, 10, 12, 14 - enable bits for channels 0 to 7
+ *	 bits 1, 3, 5, 7, 9, 11, 13, 15 - owner bits for channels 0 to 7
+ * DCAR - bit 24 & 25 interrupt masks for mic owned & host owned channels
+ * DHPR - head of the descriptor ring updated by s/w
+ * DTPR - tail of the descriptor ring updated by h/w
+ * DRAR_LO - lower 32 bits of descriptor ring's mic address
+ * DRAR_HI - 3:0 - remaining 4 bits of descriptor ring's mic address
+ *	     20:4 descriptor ring size
+ *	     25:21 mic smpt entry number
+ * DSTAT - 16:0 h/w completion count; 31:28 dma engine status
+ * DCHERR - this register is non-zero on error
+ * DCHERRMSK - interrupt mask register
+ */
+#define MIC_DMA_HW_CMP_CNT_MASK		0x1ffff
+#define MIC_DMA_CHAN_QUIESCE		0x20000000
+#define MIC_DMA_SBOX_BASE		0x00010000
+#define MIC_DMA_SBOX_DCR		0x0000A280
+#define MIC_DMA_SBOX_CH_BASE		0x0001A000
+#define MIC_DMA_SBOX_CHAN_OFF		0x40
+#define MIC_DMA_SBOX_DCAR_IM0		(0x1 << 24)
+#define MIC_DMA_SBOX_DCAR_IM1		(0x1 << 25)
+#define MIC_DMA_SBOX_DRARHI_SYS_MASK	(0x1 << 26)
+#define MIC_DMA_REG_DCAR		0
+#define MIC_DMA_REG_DHPR		4
+#define MIC_DMA_REG_DTPR		8
+#define MIC_DMA_REG_DRAR_LO		20
+#define MIC_DMA_REG_DRAR_HI		24
+#define MIC_DMA_REG_DSTAT		32
+#define MIC_DMA_REG_DCHERR		44
+#define MIC_DMA_REG_DCHERRMSK		48
+
+/* HW dma desc */
+struct mic_dma_desc {
+	u64 qw0;
+	u64 qw1;
+};
+
+enum mic_dma_chan_owner {
+	MIC_DMA_CHAN_MIC = 0,
+	MIC_DMA_CHAN_HOST
+};
+
+/*
+ * mic_dma_chan - channel specific information
+ * @ch_num: channel number
+ * @owner: owner of this channel
+ * @last_tail: cached value of descriptor ring tail
+ * @head: index of next descriptor in desc_ring
+ * @issued: hardware notification point
+ * @submitted: index that will be used to submit descriptors to h/w
+ * @api_ch: dma engine api channel
+ * @desc_ring: dma descriptor ring
+ * @desc_ring_micpa: mic physical address of desc_ring
+ * @status_dest: destination for status (fence) descriptor
+ * @status_dest_micpa: mic address for status_dest,
+ *		       DMA controller uses this address
+ * @tx_array: array of async_tx
+ * @cleanup_lock: lock held when processing completed tx
+ * @prep_lock: lock held in prep_memcpy & released in tx_submit
+ * @issue_lock: lock used to synchronize writes to head
+ * @cookie: mic_irq cookie used with mic irq request
+ */
+struct mic_dma_chan {
+	int ch_num;
+	enum mic_dma_chan_owner owner;
+	u32 last_tail;
+	u32 head;
+	u32 issued;
+	u32 submitted;
+	struct dma_chan api_ch;
+	struct mic_dma_desc *desc_ring;
+	dma_addr_t desc_ring_micpa;
+	u64 *status_dest;
+	dma_addr_t status_dest_micpa;
+	struct dma_async_tx_descriptor *tx_array;
+	spinlock_t cleanup_lock;
+	spinlock_t prep_lock;
+	spinlock_t issue_lock;
+	struct mic_irq *cookie;
+};
+
+/*
+ * struct mic_dma_device - per mic device
+ * @mic_ch: dma channels
+ * @dma_dev: underlying dma device
+ * @mbdev: mic bus dma device
+ * @mmio: virtual address of the mmio space
+ * @dbg_dir: debugfs directory
+ * @start_ch: first channel number that can be used
+ * @max_xfer_size: maximum transfer size per dma descriptor
+ */
+struct mic_dma_device {
+	struct mic_dma_chan mic_ch[MIC_DMA_MAX_NUM_CHAN];
+	struct dma_device dma_dev;
+	struct mbus_device *mbdev;
+	void __iomem *mmio;
+	struct dentry *dbg_dir;
+	int start_ch;
+	size_t max_xfer_size;
+};
+
+static inline struct mic_dma_chan *to_mic_dma_chan(struct dma_chan *ch)
+{
+	return container_of(ch, struct mic_dma_chan, api_ch);
+}
+
+static inline struct mic_dma_device *to_mic_dma_dev(struct mic_dma_chan *ch)
+{
+	return
+	container_of((const typeof(((struct mic_dma_device *)0)->mic_ch)*)
+		     (ch - ch->ch_num), struct mic_dma_device, mic_ch);
+}
+
+static inline struct mbus_device *to_mbus_device(struct mic_dma_chan *ch)
+{
+	return to_mic_dma_dev(ch)->mbdev;
+}
+
+static inline struct mbus_hw_ops *to_mbus_hw_ops(struct mic_dma_chan *ch)
+{
+	return to_mbus_device(ch)->hw_ops;
+}
+
+static inline struct device *mic_dma_ch_to_device(struct mic_dma_chan *ch)
+{
+	return to_mic_dma_dev(ch)->dma_dev.dev;
+}
+
+static inline void __iomem *mic_dma_chan_to_mmio(struct mic_dma_chan *ch)
+{
+	return to_mic_dma_dev(ch)->mmio;
+}
+
+static inline u32 mic_dma_read_reg(struct mic_dma_chan *ch, u32 reg)
+{
+	return ioread32(mic_dma_chan_to_mmio(ch) + MIC_DMA_SBOX_CH_BASE +
+			ch->ch_num * MIC_DMA_SBOX_CHAN_OFF + reg);
+}
+
+static inline void mic_dma_write_reg(struct mic_dma_chan *ch, u32 reg, u32 val)
+{
+	iowrite32(val, mic_dma_chan_to_mmio(ch) + MIC_DMA_SBOX_CH_BASE +
+		  ch->ch_num * MIC_DMA_SBOX_CHAN_OFF + reg);
+}
+
+static inline u32 mic_dma_mmio_read(struct mic_dma_chan *ch, u32 offset)
+{
+	return ioread32(mic_dma_chan_to_mmio(ch) + offset);
+}
+
+static inline void mic_dma_mmio_write(struct mic_dma_chan *ch, u32 val,
+				      u32 offset)
+{
+	iowrite32(val, mic_dma_chan_to_mmio(ch) + offset);
+}
+
+static inline u32 mic_dma_read_cmp_cnt(struct mic_dma_chan *ch)
+{
+	return mic_dma_read_reg(ch, MIC_DMA_REG_DSTAT) &
+	       MIC_DMA_HW_CMP_CNT_MASK;
+}
+
+static inline void mic_dma_chan_set_owner(struct mic_dma_chan *ch)
+{
+	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+	u32 chan_num = ch->ch_num;
+
+	dcr = (dcr & ~(0x1 << (chan_num * 2))) | (ch->owner << (chan_num * 2));
+	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+}
+
+static inline void mic_dma_enable_chan(struct mic_dma_chan *ch)
+{
+	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+
+	dcr |= 2 << (ch->ch_num << 1);
+	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+}
+
+static inline void mic_dma_disable_chan(struct mic_dma_chan *ch)
+{
+	u32 dcr = mic_dma_mmio_read(ch, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+
+	dcr &= ~(2 << (ch->ch_num << 1));
+	mic_dma_mmio_write(ch, dcr, MIC_DMA_SBOX_BASE + MIC_DMA_SBOX_DCR);
+}
+
+static void mic_dma_chan_set_desc_ring(struct mic_dma_chan *ch)
+{
+	u32 drar_hi;
+	dma_addr_t desc_ring_micpa = ch->desc_ring_micpa;
+
+	drar_hi = (MIC_DMA_DESC_RX_SIZE & 0x1ffff) << 4;
+	if (MIC_DMA_CHAN_MIC == ch->owner) {
+		drar_hi |= (desc_ring_micpa >> 32) & 0xf;
+	} else {
+		drar_hi |= MIC_DMA_SBOX_DRARHI_SYS_MASK;
+		drar_hi |= ((desc_ring_micpa >> 34)
+			    & 0x1f) << 21;
+		drar_hi |= (desc_ring_micpa >> 32) & 0x3;
+	}
+	mic_dma_write_reg(ch, MIC_DMA_REG_DRAR_LO, (u32) desc_ring_micpa);
+	mic_dma_write_reg(ch, MIC_DMA_REG_DRAR_HI, drar_hi);
+}
+
+static inline void mic_dma_chan_mask_intr(struct mic_dma_chan *ch)
+{
+	u32 dcar = mic_dma_read_reg(ch, MIC_DMA_REG_DCAR);
+
+	if (MIC_DMA_CHAN_MIC == ch->owner)
+		dcar |= MIC_DMA_SBOX_DCAR_IM0;
+	else
+		dcar |= MIC_DMA_SBOX_DCAR_IM1;
+	mic_dma_write_reg(ch, MIC_DMA_REG_DCAR, dcar);
+}
+
+static inline void mic_dma_chan_unmask_intr(struct mic_dma_chan *ch)
+{
+	u32 dcar = mic_dma_read_reg(ch, MIC_DMA_REG_DCAR);
+
+	if (MIC_DMA_CHAN_MIC == ch->owner)
+		dcar &= ~MIC_DMA_SBOX_DCAR_IM0;
+	else
+		dcar &= ~MIC_DMA_SBOX_DCAR_IM1;
+	mic_dma_write_reg(ch, MIC_DMA_REG_DCAR, dcar);
+}
+
+static void mic_dma_ack_interrupt(struct mic_dma_chan *ch)
+{
+	if (MIC_DMA_CHAN_MIC == ch->owner) {
+		/* HW errata */
+		mic_dma_chan_mask_intr(ch);
+		mic_dma_chan_unmask_intr(ch);
+	}
+	to_mbus_hw_ops(ch)->ack_interrupt(to_mbus_device(ch), ch->ch_num);
+}
+#endif
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
new file mode 100644
index 0000000..eb3a1f4
--- /dev/null
+++ b/drivers/dma/mmp_pdma.c
@@ -0,0 +1,1151 @@
+/*
+ * Copyright 2012 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/dma/mmp-pdma.h>
+
+#include "dmaengine.h"
+
+#define DCSR		0x0000
+#define DALGN		0x00a0
+#define DINT		0x00f0
+#define DDADR		0x0200
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
+#define DCMD		0x020c
+
+#define DCSR_RUN	BIT(31)	/* Run Bit (read / write) */
+#define DCSR_NODESC	BIT(30)	/* No-Descriptor Fetch (read / write) */
+#define DCSR_STOPIRQEN	BIT(29)	/* Stop Interrupt Enable (read / write) */
+#define DCSR_REQPEND	BIT(8)	/* Request Pending (read-only) */
+#define DCSR_STOPSTATE	BIT(3)	/* Stop State (read-only) */
+#define DCSR_ENDINTR	BIT(2)	/* End Interrupt (read / write) */
+#define DCSR_STARTINTR	BIT(1)	/* Start Interrupt (read / write) */
+#define DCSR_BUSERR	BIT(0)	/* Bus Error Interrupt (read / write) */
+
+#define DCSR_EORIRQEN	BIT(28)	/* End of Receive Interrupt Enable (R/W) */
+#define DCSR_EORJMPEN	BIT(27)	/* Jump to next descriptor on EOR */
+#define DCSR_EORSTOPEN	BIT(26)	/* STOP on an EOR */
+#define DCSR_SETCMPST	BIT(25)	/* Set Descriptor Compare Status */
+#define DCSR_CLRCMPST	BIT(24)	/* Clear Descriptor Compare Status */
+#define DCSR_CMPST	BIT(10)	/* The Descriptor Compare Status */
+#define DCSR_EORINTR	BIT(9)	/* The end of Receive */
+
+#define DRCMR(n)	((((n) < 64) ? 0x0100 : 0x1100) + (((n) & 0x3f) << 2))
+#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
+
+#define DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
+#define DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
+#define DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
+#define DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
+#define DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
+#define DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
+#define DCMD_ENDIAN	BIT(18)	/* Device Endian-ness. */
+#define DCMD_BURST8	(1 << 16)	/* 8 byte burst */
+#define DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define DCMD_WIDTH1	(1 << 14)	/* 1 byte width */
+#define DCMD_WIDTH2	(2 << 14)	/* 2 byte width (HalfWord) */
+#define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
+#define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_MAX_DESC_BYTES	DCMD_LENGTH
+
+struct mmp_pdma_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(32);
+
+struct mmp_pdma_desc_sw {
+	struct mmp_pdma_desc_hw desc;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+};
+
+struct mmp_pdma_phy;
+
+struct mmp_pdma_chan {
+	struct device *dev;
+	struct dma_chan chan;
+	struct dma_async_tx_descriptor desc;
+	struct mmp_pdma_phy *phy;
+	enum dma_transfer_direction dir;
+
+	struct mmp_pdma_desc_sw *cyclic_first;	/* first desc_sw if channel
+						 * is in cyclic mode */
+
+	/* channel's basic info */
+	struct tasklet_struct tasklet;
+	u32 dcmd;
+	u32 drcmr;
+	u32 dev_addr;
+
+	/* list for desc */
+	spinlock_t desc_lock;		/* Descriptor list lock */
+	struct list_head chain_pending;	/* Link descriptors queue for pending */
+	struct list_head chain_running;	/* Link descriptors queue for running */
+	bool idle;			/* channel statue machine */
+	bool byte_align;
+
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+};
+
+struct mmp_pdma_phy {
+	int idx;
+	void __iomem *base;
+	struct mmp_pdma_chan *vchan;
+};
+
+struct mmp_pdma_device {
+	int				dma_channels;
+	void __iomem			*base;
+	struct device			*dev;
+	struct dma_device		device;
+	struct mmp_pdma_phy		*phy;
+	spinlock_t phy_lock; /* protect alloc/free phy channels */
+};
+
+#define tx_to_mmp_pdma_desc(tx)					\
+	container_of(tx, struct mmp_pdma_desc_sw, async_tx)
+#define to_mmp_pdma_desc(lh)					\
+	container_of(lh, struct mmp_pdma_desc_sw, node)
+#define to_mmp_pdma_chan(dchan)					\
+	container_of(dchan, struct mmp_pdma_chan, chan)
+#define to_mmp_pdma_dev(dmadev)					\
+	container_of(dmadev, struct mmp_pdma_device, device)
+
+static void set_desc(struct mmp_pdma_phy *phy, dma_addr_t addr)
+{
+	u32 reg = (phy->idx << 4) + DDADR;
+
+	writel(addr, phy->base + reg);
+}
+
+static void enable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg, dalgn;
+
+	if (!phy->vchan)
+		return;
+
+	reg = DRCMR(phy->vchan->drcmr);
+	writel(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+
+	dalgn = readl(phy->base + DALGN);
+	if (phy->vchan->byte_align)
+		dalgn |= 1 << phy->idx;
+	else
+		dalgn &= ~(1 << phy->idx);
+	writel(dalgn, phy->base + DALGN);
+
+	reg = (phy->idx << 2) + DCSR;
+	writel(readl(phy->base + reg) | DCSR_RUN, phy->base + reg);
+}
+
+static void disable_chan(struct mmp_pdma_phy *phy)
+{
+	u32 reg;
+
+	if (!phy)
+		return;
+
+	reg = (phy->idx << 2) + DCSR;
+	writel(readl(phy->base + reg) & ~DCSR_RUN, phy->base + reg);
+}
+
+static int clear_chan_irq(struct mmp_pdma_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+	u32 reg = (phy->idx << 2) + DCSR;
+
+	if (!(dint & BIT(phy->idx)))
+		return -EAGAIN;
+
+	/* clear irq */
+	dcsr = readl(phy->base + reg);
+	writel(dcsr, phy->base + reg);
+	if ((dcsr & DCSR_BUSERR) && (phy->vchan))
+		dev_warn(phy->vchan->dev, "DCSR_BUSERR\n");
+
+	return 0;
+}
+
+static irqreturn_t mmp_pdma_chan_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_phy *phy = dev_id;
+
+	if (clear_chan_irq(phy) != 0)
+		return IRQ_NONE;
+
+	tasklet_schedule(&phy->vchan->tasklet);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mmp_pdma_int_handler(int irq, void *dev_id)
+{
+	struct mmp_pdma_device *pdev = dev_id;
+	struct mmp_pdma_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret;
+	int irq_num = 0;
+
+	while (dint) {
+		i = __ffs(dint);
+		/* only handle interrupts belonging to pdma driver*/
+		if (i >= pdev->dma_channels)
+			break;
+		dint &= (dint - 1);
+		phy = &pdev->phy[i];
+		ret = mmp_pdma_chan_handler(irq, phy);
+		if (ret == IRQ_HANDLED)
+			irq_num++;
+	}
+
+	if (irq_num)
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+/* lookup free phy channel as descending priority */
+static struct mmp_pdma_phy *lookup_phy(struct mmp_pdma_chan *pchan)
+{
+	int prio, i;
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	struct mmp_pdma_phy *phy, *found = NULL;
+	unsigned long flags;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (prio = 0; prio <= ((pdev->dma_channels - 1) & 0xf) >> 2; prio++) {
+		for (i = 0; i < pdev->dma_channels; i++) {
+			if (prio != (i & 0xf) >> 2)
+				continue;
+			phy = &pdev->phy[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				found = phy;
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	return found;
+}
+
+static void mmp_pdma_free_phy(struct mmp_pdma_chan *pchan)
+{
+	struct mmp_pdma_device *pdev = to_mmp_pdma_dev(pchan->chan.device);
+	unsigned long flags;
+	u32 reg;
+
+	if (!pchan->phy)
+		return;
+
+	/* clear the channel mapping in DRCMR */
+	reg = DRCMR(pchan->drcmr);
+	writel(0, pchan->phy->base + reg);
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	pchan->phy->vchan = NULL;
+	pchan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+/**
+ * start_pending_queue - transfer any pending transactions
+ * pending list ==> running list
+ */
+static void start_pending_queue(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+
+	/* still in running, irq will start the pending list */
+	if (!chan->idle) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		return;
+	}
+
+	if (list_empty(&chan->chain_pending)) {
+		/* chance to re-fetch phy channel with higher prio */
+		mmp_pdma_free_phy(chan);
+		dev_dbg(chan->dev, "no pending list\n");
+		return;
+	}
+
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(chan->dev, "no free dma channel\n");
+			return;
+		}
+	}
+
+	/*
+	 * pending -> running
+	 * reintilize pending list
+	 */
+	desc = list_first_entry(&chan->chain_pending,
+				struct mmp_pdma_desc_sw, node);
+	list_splice_tail_init(&chan->chain_pending, &chan->chain_running);
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	set_desc(chan->phy, desc->async_tx.phys);
+	enable_chan(chan->phy);
+	chan->idle = false;
+}
+
+
+/* desc->tx_list ==> pending list */
+static dma_cookie_t mmp_pdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(tx->chan);
+	struct mmp_pdma_desc_sw *desc = tx_to_mmp_pdma_desc(tx);
+	struct mmp_pdma_desc_sw *child;
+	unsigned long flags;
+	dma_cookie_t cookie = -EBUSY;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	list_for_each_entry(child, &desc->tx_list, node) {
+		cookie = dma_cookie_assign(&child->async_tx);
+	}
+
+	/* softly link to pending list - desc->tx_list ==> pending list */
+	list_splice_tail_init(&desc->tx_list, &chan->chain_pending);
+
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	return cookie;
+}
+
+static struct mmp_pdma_desc_sw *
+mmp_pdma_alloc_descriptor(struct mmp_pdma_chan *chan)
+{
+	struct mmp_pdma_desc_sw *desc;
+	dma_addr_t pdesc;
+
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (!desc) {
+		dev_err(chan->dev, "out of memory for link descriptor\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+	/* each desc has submit */
+	desc->async_tx.tx_submit = mmp_pdma_tx_submit;
+	desc->async_tx.phys = pdesc;
+
+	return desc;
+}
+
+/**
+ * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel.
+ *
+ * This function will create a dma pool for descriptor allocation.
+ * Request irq only when channel is requested
+ * Return - The number of allocated descriptors.
+ */
+
+static int mmp_pdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(dev_name(&dchan->dev->device),
+					  chan->dev,
+					  sizeof(struct mmp_pdma_desc_sw),
+					  __alignof__(struct mmp_pdma_desc_sw),
+					  0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	mmp_pdma_free_phy(chan);
+	chan->idle = true;
+	chan->dev_addr = 0;
+	return 1;
+}
+
+static void mmp_pdma_free_desc_list(struct mmp_pdma_chan *chan,
+				    struct list_head *list)
+{
+	struct mmp_pdma_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node) {
+		list_del(&desc->node);
+		dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
+	}
+}
+
+static void mmp_pdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+	mmp_pdma_free_desc_list(chan, &chan->chain_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+	chan->idle = true;
+	chan->dev_addr = 0;
+	mmp_pdma_free_phy(chan);
+	return;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_memcpy(struct dma_chan *dchan,
+		     dma_addr_t dma_dst, dma_addr_t dma_src,
+		     size_t len, unsigned long flags)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy = 0;
+
+	if (!dchan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+	chan->byte_align = false;
+
+	if (!chan->dir) {
+		chan->dir = DMA_MEM_TO_MEM;
+		chan->dcmd = DCMD_INCTRGADDR | DCMD_INCSRCADDR;
+		chan->dcmd |= DCMD_BURST32;
+	}
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		if (dma_src & 0x7 || dma_dst & 0x7)
+			chan->byte_align = true;
+
+		new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & copy);
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+
+		if (chan->dir == DMA_MEM_TO_DEV) {
+			dma_src += copy;
+		} else if (chan->dir == DMA_DEV_TO_MEM) {
+			dma_dst += copy;
+		} else if (chan->dir == DMA_MEM_TO_MEM) {
+			dma_src += copy;
+			dma_dst += copy;
+		}
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	chan->cyclic_first = NULL;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		       unsigned int sg_len, enum dma_transfer_direction dir,
+		       unsigned long flags, void *context)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new = NULL;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t addr;
+	int i;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	chan->byte_align = false;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sgl);
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (addr & 0x7)
+				chan->byte_align = true;
+
+			/* allocate and populate the descriptor */
+			new = mmp_pdma_alloc_descriptor(chan);
+			if (!new) {
+				dev_err(chan->dev, "no memory for desc\n");
+				goto fail;
+			}
+
+			new->desc.dcmd = chan->dcmd | (DCMD_LENGTH & len);
+			if (dir == DMA_MEM_TO_DEV) {
+				new->desc.dsadr = addr;
+				new->desc.dtadr = chan->dev_addr;
+			} else {
+				new->desc.dsadr = chan->dev_addr;
+				new->desc.dtadr = addr;
+			}
+
+			if (!first)
+				first = new;
+			else
+				prev->desc.ddadr = new->async_tx.phys;
+
+			new->async_tx.cookie = 0;
+			async_tx_ack(&new->async_tx);
+			prev = new;
+
+			/* Insert the link descriptor to the LD ring */
+			list_add_tail(&new->node, &first->tx_list);
+
+			/* update metadata */
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	first->async_tx.cookie = -EBUSY;
+	first->async_tx.flags = flags;
+
+	/* last desc and fire IRQ */
+	new->desc.ddadr = DDADR_STOP;
+	new->desc.dcmd |= DCMD_ENDIRQEN;
+
+	chan->dir = dir;
+	chan->cyclic_first = NULL;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mmp_pdma_prep_dma_cyclic(struct dma_chan *dchan,
+			 dma_addr_t buf_addr, size_t len, size_t period_len,
+			 enum dma_transfer_direction direction,
+			 unsigned long flags)
+{
+	struct mmp_pdma_chan *chan;
+	struct mmp_pdma_desc_sw *first = NULL, *prev = NULL, *new;
+	dma_addr_t dma_src, dma_dst;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0)
+		return NULL;
+
+	if (period_len > PDMA_MAX_DESC_BYTES)
+		return NULL;
+
+	chan = to_mmp_pdma_chan(dchan);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		dma_src = buf_addr;
+		dma_dst = chan->dev_addr;
+		break;
+	case DMA_DEV_TO_MEM:
+		dma_dst = buf_addr;
+		dma_src = chan->dev_addr;
+		break;
+	default:
+		dev_err(chan->dev, "Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+
+	chan->dir = direction;
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = mmp_pdma_alloc_descriptor(chan);
+		if (!new) {
+			dev_err(chan->dev, "no memory for desc\n");
+			goto fail;
+		}
+
+		new->desc.dcmd = (chan->dcmd | DCMD_ENDIRQEN |
+				  (DCMD_LENGTH & period_len));
+		new->desc.dsadr = dma_src;
+		new->desc.dtadr = dma_dst;
+
+		if (!first)
+			first = new;
+		else
+			prev->desc.ddadr = new->async_tx.phys;
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= period_len;
+
+		if (chan->dir == DMA_MEM_TO_DEV)
+			dma_src += period_len;
+		else
+			dma_dst += period_len;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	first->async_tx.flags = flags; /* client is in control of this ack */
+	first->async_tx.cookie = -EBUSY;
+
+	/* make the cyclic link */
+	new->desc.ddadr = first->async_tx.phys;
+	chan->cyclic_first = first;
+
+	return &first->async_tx;
+
+fail:
+	if (first)
+		mmp_pdma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static int mmp_pdma_config(struct dma_chan *dchan,
+			   struct dma_slave_config *cfg)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	u32 maxburst = 0, addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	if (!dchan)
+		return -EINVAL;
+
+	if (cfg->direction == DMA_DEV_TO_MEM) {
+		chan->dcmd = DCMD_INCTRGADDR | DCMD_FLOWSRC;
+		maxburst = cfg->src_maxburst;
+		width = cfg->src_addr_width;
+		addr = cfg->src_addr;
+	} else if (cfg->direction == DMA_MEM_TO_DEV) {
+		chan->dcmd = DCMD_INCSRCADDR | DCMD_FLOWTRG;
+		maxburst = cfg->dst_maxburst;
+		width = cfg->dst_addr_width;
+		addr = cfg->dst_addr;
+	}
+
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		chan->dcmd |= DCMD_WIDTH1;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		chan->dcmd |= DCMD_WIDTH2;
+	else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+		chan->dcmd |= DCMD_WIDTH4;
+
+	if (maxburst == 8)
+		chan->dcmd |= DCMD_BURST8;
+	else if (maxburst == 16)
+		chan->dcmd |= DCMD_BURST16;
+	else if (maxburst == 32)
+		chan->dcmd |= DCMD_BURST32;
+
+	chan->dir = cfg->direction;
+	chan->dev_addr = addr;
+	/* FIXME: drivers should be ported over to use the filter
+	 * function. Once that's done, the following two lines can
+	 * be removed.
+	 */
+	if (cfg->slave_id)
+		chan->drcmr = cfg->slave_id;
+
+	return 0;
+}
+
+static int mmp_pdma_terminate_all(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	if (!dchan)
+		return -EINVAL;
+
+	disable_chan(chan->phy);
+	mmp_pdma_free_phy(chan);
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	mmp_pdma_free_desc_list(chan, &chan->chain_pending);
+	mmp_pdma_free_desc_list(chan, &chan->chain_running);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+	chan->idle = true;
+
+	return 0;
+}
+
+static unsigned int mmp_pdma_residue(struct mmp_pdma_chan *chan,
+				     dma_cookie_t cookie)
+{
+	struct mmp_pdma_desc_sw *sw;
+	u32 curr, residue = 0;
+	bool passed = false;
+	bool cyclic = chan->cyclic_first != NULL;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	if (chan->dir == DMA_DEV_TO_MEM)
+		curr = readl(chan->phy->base + DTADR(chan->phy->idx));
+	else
+		curr = readl(chan->phy->base + DSADR(chan->phy->idx));
+
+	list_for_each_entry(sw, &chan->chain_running, node) {
+		u32 start, end, len;
+
+		if (chan->dir == DMA_DEV_TO_MEM)
+			start = sw->desc.dtadr;
+		else
+			start = sw->desc.dsadr;
+
+		len = sw->desc.dcmd & DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor which
+		 * lies inside the boundaries of the curr pointer. All
+		 * descriptors that occur in the list _after_ we found that
+		 * partially handled descriptor are still to be processed and
+		 * are hence added to the residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+
+		/*
+		 * Descriptors that have the ENDIRQEN bit set mark the end of a
+		 * transaction chain, and the cookie assigned with it has been
+		 * returned previously from mmp_pdma_tx_submit().
+		 *
+		 * In case we have multiple transactions in the running chain,
+		 * and the cookie does not match the one the user asked us
+		 * about, reset the state variables and start over.
+		 *
+		 * This logic does not apply to cyclic transactions, where all
+		 * descriptors have the ENDIRQEN bit set, and for which we
+		 * can't have multiple transactions on one channel anyway.
+		 */
+		if (cyclic || !(sw->desc.dcmd & DCMD_ENDIRQEN))
+			continue;
+
+		if (sw->async_tx.cookie == cookie) {
+			return residue;
+		} else {
+			residue = 0;
+			passed = false;
+		}
+	}
+
+	/* We should only get here in case of cyclic transactions */
+	return residue;
+}
+
+static enum dma_status mmp_pdma_tx_status(struct dma_chan *dchan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(ret != DMA_ERROR))
+		dma_set_residue(txstate, mmp_pdma_residue(chan, cookie));
+
+	return ret;
+}
+
+/**
+ * mmp_pdma_issue_pending - Issue the DMA start command
+ * pending list ==> running list
+ */
+static void mmp_pdma_issue_pending(struct dma_chan *dchan)
+{
+	struct mmp_pdma_chan *chan = to_mmp_pdma_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->desc_lock, flags);
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+}
+
+/*
+ * dma_do_tasklet
+ * Do call back
+ * Start pending list
+ */
+static void dma_do_tasklet(unsigned long data)
+{
+	struct mmp_pdma_chan *chan = (struct mmp_pdma_chan *)data;
+	struct mmp_pdma_desc_sw *desc, *_desc;
+	LIST_HEAD(chain_cleanup);
+	unsigned long flags;
+	struct dmaengine_desc_callback cb;
+
+	if (chan->cyclic_first) {
+		spin_lock_irqsave(&chan->desc_lock, flags);
+		desc = chan->cyclic_first;
+		dmaengine_desc_get_callback(&desc->async_tx, &cb);
+		spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+		dmaengine_desc_callback_invoke(&cb, NULL);
+
+		return;
+	}
+
+	/* submit pending list; callback for each desc; free desc */
+	spin_lock_irqsave(&chan->desc_lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &chan->chain_running, node) {
+		/*
+		 * move the descriptors to a temporary list so we can drop
+		 * the lock during the entire cleanup operation
+		 */
+		list_move(&desc->node, &chain_cleanup);
+
+		/*
+		 * Look for the first list entry which has the ENDIRQEN flag
+		 * set. That is the descriptor we got an interrupt for, so
+		 * complete that transaction and its cookie.
+		 */
+		if (desc->desc.dcmd & DCMD_ENDIRQEN) {
+			dma_cookie_t cookie = desc->async_tx.cookie;
+			dma_cookie_complete(&desc->async_tx);
+			dev_dbg(chan->dev, "completed_cookie=%d\n", cookie);
+			break;
+		}
+	}
+
+	/*
+	 * The hardware is idle and ready for more when the
+	 * chain_running list is empty.
+	 */
+	chan->idle = list_empty(&chan->chain_running);
+
+	/* Start any pending transactions automatically */
+	start_pending_queue(chan);
+	spin_unlock_irqrestore(&chan->desc_lock, flags);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chain_cleanup, node) {
+		struct dma_async_tx_descriptor *txd = &desc->async_tx;
+
+		/* Remove from the list of transactions */
+		list_del(&desc->node);
+		/* Run the link descriptor callback function */
+		dmaengine_desc_get_callback(txd, &cb);
+		dmaengine_desc_callback_invoke(&cb, NULL);
+
+		dma_pool_free(chan->desc_pool, desc, txd->phys);
+	}
+}
+
+static int mmp_pdma_remove(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev = platform_get_drvdata(op);
+	struct mmp_pdma_phy *phy;
+	int i, irq = 0, irq_num = 0;
+
+
+	for (i = 0; i < pdev->dma_channels; i++) {
+		if (platform_get_irq(op, i) > 0)
+			irq_num++;
+	}
+
+	if (irq_num != pdev->dma_channels) {
+		irq = platform_get_irq(op, 0);
+		devm_free_irq(&op->dev, irq, pdev);
+	} else {
+		for (i = 0; i < pdev->dma_channels; i++) {
+			phy = &pdev->phy[i];
+			irq = platform_get_irq(op, i);
+			devm_free_irq(&op->dev, irq, phy);
+		}
+	}
+
+	dma_async_device_unregister(&pdev->device);
+	return 0;
+}
+
+static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev, int idx, int irq)
+{
+	struct mmp_pdma_phy *phy  = &pdev->phy[idx];
+	struct mmp_pdma_chan *chan;
+	int ret;
+
+	chan = devm_kzalloc(pdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (chan == NULL)
+		return -ENOMEM;
+
+	phy->idx = idx;
+	phy->base = pdev->base;
+
+	if (irq) {
+		ret = devm_request_irq(pdev->dev, irq, mmp_pdma_chan_handler,
+				       IRQF_SHARED, "pdma", phy);
+		if (ret) {
+			dev_err(pdev->dev, "channel request irq fail!\n");
+			return ret;
+		}
+	}
+
+	spin_lock_init(&chan->desc_lock);
+	chan->dev = pdev->dev;
+	chan->chan.device = &pdev->device;
+	tasklet_init(&chan->tasklet, dma_do_tasklet, (unsigned long)chan);
+	INIT_LIST_HEAD(&chan->chain_pending);
+	INIT_LIST_HEAD(&chan->chain_running);
+
+	/* register virt channel to dma engine */
+	list_add_tail(&chan->chan.device_node, &pdev->device.channels);
+
+	return 0;
+}
+
+static const struct of_device_id mmp_pdma_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_pdma_dt_ids);
+
+static struct dma_chan *mmp_pdma_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct mmp_pdma_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->device);
+	if (!chan)
+		return NULL;
+
+	to_mmp_pdma_chan(chan)->drcmr = dma_spec->args[0];
+
+	return chan;
+}
+
+static int mmp_pdma_probe(struct platform_device *op)
+{
+	struct mmp_pdma_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int i, ret, irq = 0;
+	int dma_channels = 0, irq_num = 0;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+
+	pdev->dev = &op->dev;
+
+	spin_lock_init(&pdev->phy_lock);
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	pdev->base = devm_ioremap_resource(pdev->dev, iores);
+	if (IS_ERR(pdev->base))
+		return PTR_ERR(pdev->base);
+
+	of_id = of_match_device(mmp_pdma_dt_ids, pdev->dev);
+	if (of_id)
+		of_property_read_u32(pdev->dev->of_node, "#dma-channels",
+				     &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+	pdev->dma_channels = dma_channels;
+
+	for (i = 0; i < dma_channels; i++) {
+		if (platform_get_irq(op, i) > 0)
+			irq_num++;
+	}
+
+	pdev->phy = devm_kcalloc(pdev->dev, dma_channels, sizeof(*pdev->phy),
+				 GFP_KERNEL);
+	if (pdev->phy == NULL)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pdev->device.channels);
+
+	if (irq_num != dma_channels) {
+		/* all chan share one irq, demux inside */
+		irq = platform_get_irq(op, 0);
+		ret = devm_request_irq(pdev->dev, irq, mmp_pdma_int_handler,
+				       IRQF_SHARED, "pdma", pdev);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < dma_channels; i++) {
+		irq = (irq_num != dma_channels) ? 0 : platform_get_irq(op, i);
+		ret = mmp_pdma_chan_init(pdev, i, irq);
+		if (ret)
+			return ret;
+	}
+
+	dma_cap_set(DMA_SLAVE, pdev->device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->device.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->device.cap_mask);
+	pdev->device.dev = &op->dev;
+	pdev->device.device_alloc_chan_resources = mmp_pdma_alloc_chan_resources;
+	pdev->device.device_free_chan_resources = mmp_pdma_free_chan_resources;
+	pdev->device.device_tx_status = mmp_pdma_tx_status;
+	pdev->device.device_prep_dma_memcpy = mmp_pdma_prep_memcpy;
+	pdev->device.device_prep_slave_sg = mmp_pdma_prep_slave_sg;
+	pdev->device.device_prep_dma_cyclic = mmp_pdma_prep_dma_cyclic;
+	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
+	pdev->device.device_config = mmp_pdma_config;
+	pdev->device.device_terminate_all = mmp_pdma_terminate_all;
+	pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
+	pdev->device.src_addr_widths = widths;
+	pdev->device.dst_addr_widths = widths;
+	pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	pdev->device.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	if (pdev->dev->coherent_dma_mask)
+		dma_set_mask(pdev->dev, pdev->dev->coherent_dma_mask);
+	else
+		dma_set_mask(pdev->dev, DMA_BIT_MASK(64));
+
+	ret = dma_async_device_register(&pdev->device);
+	if (ret) {
+		dev_err(pdev->device.dev, "unable to register\n");
+		return ret;
+	}
+
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 mmp_pdma_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(&op->dev, "of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(op, pdev);
+	dev_info(pdev->device.dev, "initialized %d channels\n", dma_channels);
+	return 0;
+}
+
+static const struct platform_device_id mmp_pdma_id_table[] = {
+	{ "mmp-pdma", },
+	{ },
+};
+
+static struct platform_driver mmp_pdma_driver = {
+	.driver		= {
+		.name	= "mmp-pdma",
+		.of_match_table = mmp_pdma_dt_ids,
+	},
+	.id_table	= mmp_pdma_id_table,
+	.probe		= mmp_pdma_probe,
+	.remove		= mmp_pdma_remove,
+};
+
+bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
+
+	if (chan->device->dev->driver != &mmp_pdma_driver.driver)
+		return false;
+
+	c->drcmr = *(unsigned int *)param;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(mmp_pdma_filter_fn);
+
+module_platform_driver(mmp_pdma_driver);
+
+MODULE_DESCRIPTION("MARVELL MMP Peripheral DMA Driver");
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
new file mode 100644
index 0000000..13c68b6
--- /dev/null
+++ b/drivers/dma/mmp_tdma.c
@@ -0,0 +1,741 @@
+/*
+ * Driver For Marvell Two-channel DMA Engine
+ *
+ * Copyright: Marvell International Ltd.
+ *
+ * The code contained herein is licensed under the GNU General Public
+ * License. You may obtain a copy of the GNU General Public License
+ * Version 2 or later at the following locations:
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/dma-mmp_tdma.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#include "dmaengine.h"
+
+/*
+ * Two-Channel DMA registers
+ */
+#define TDBCR		0x00	/* Byte Count */
+#define TDSAR		0x10	/* Src Addr */
+#define TDDAR		0x20	/* Dst Addr */
+#define TDNDPR		0x30	/* Next Desc */
+#define TDCR		0x40	/* Control */
+#define TDCP		0x60	/* Priority*/
+#define TDCDPR		0x70	/* Current Desc */
+#define TDIMR		0x80	/* Int Mask */
+#define TDISR		0xa0	/* Int Status */
+
+/* Two-Channel DMA Control Register */
+#define TDCR_SSZ_8_BITS		(0x0 << 22)	/* Sample Size */
+#define TDCR_SSZ_12_BITS	(0x1 << 22)
+#define TDCR_SSZ_16_BITS	(0x2 << 22)
+#define TDCR_SSZ_20_BITS	(0x3 << 22)
+#define TDCR_SSZ_24_BITS	(0x4 << 22)
+#define TDCR_SSZ_32_BITS	(0x5 << 22)
+#define TDCR_SSZ_SHIFT		(0x1 << 22)
+#define TDCR_SSZ_MASK		(0x7 << 22)
+#define TDCR_SSPMOD		(0x1 << 21)	/* SSP MOD */
+#define TDCR_ABR		(0x1 << 20)	/* Channel Abort */
+#define TDCR_CDE		(0x1 << 17)	/* Close Desc Enable */
+#define TDCR_PACKMOD		(0x1 << 16)	/* Pack Mode (ADMA Only) */
+#define TDCR_CHANACT		(0x1 << 14)	/* Channel Active */
+#define TDCR_FETCHND		(0x1 << 13)	/* Fetch Next Desc */
+#define TDCR_CHANEN		(0x1 << 12)	/* Channel Enable */
+#define TDCR_INTMODE		(0x1 << 10)	/* Interrupt Mode */
+#define TDCR_CHAINMOD		(0x1 << 9)	/* Chain Mode */
+#define TDCR_BURSTSZ_MSK	(0x7 << 6)	/* Burst Size */
+#define TDCR_BURSTSZ_4B		(0x0 << 6)
+#define TDCR_BURSTSZ_8B		(0x1 << 6)
+#define TDCR_BURSTSZ_16B	(0x3 << 6)
+#define TDCR_BURSTSZ_32B	(0x6 << 6)
+#define TDCR_BURSTSZ_64B	(0x7 << 6)
+#define TDCR_BURSTSZ_SQU_1B		(0x5 << 6)
+#define TDCR_BURSTSZ_SQU_2B		(0x6 << 6)
+#define TDCR_BURSTSZ_SQU_4B		(0x0 << 6)
+#define TDCR_BURSTSZ_SQU_8B		(0x1 << 6)
+#define TDCR_BURSTSZ_SQU_16B	(0x3 << 6)
+#define TDCR_BURSTSZ_SQU_32B	(0x7 << 6)
+#define TDCR_BURSTSZ_128B	(0x5 << 6)
+#define TDCR_DSTDIR_MSK		(0x3 << 4)	/* Dst Direction */
+#define TDCR_DSTDIR_ADDR_HOLD	(0x2 << 4)	/* Dst Addr Hold */
+#define TDCR_DSTDIR_ADDR_INC	(0x0 << 4)	/* Dst Addr Increment */
+#define TDCR_SRCDIR_MSK		(0x3 << 2)	/* Src Direction */
+#define TDCR_SRCDIR_ADDR_HOLD	(0x2 << 2)	/* Src Addr Hold */
+#define TDCR_SRCDIR_ADDR_INC	(0x0 << 2)	/* Src Addr Increment */
+#define TDCR_DSTDESCCONT	(0x1 << 1)
+#define TDCR_SRCDESTCONT	(0x1 << 0)
+
+/* Two-Channel DMA Int Mask Register */
+#define TDIMR_COMP		(0x1 << 0)
+
+/* Two-Channel DMA Int Status Register */
+#define TDISR_COMP		(0x1 << 0)
+
+/*
+ * Two-Channel DMA Descriptor Struct
+ * NOTE: desc's buf must be aligned to 16 bytes.
+ */
+struct mmp_tdma_desc {
+	u32 byte_cnt;
+	u32 src_addr;
+	u32 dst_addr;
+	u32 nxt_desc;
+};
+
+enum mmp_tdma_type {
+	MMP_AUD_TDMA = 0,
+	PXA910_SQU,
+};
+
+#define TDMA_MAX_XFER_BYTES    SZ_64K
+
+struct mmp_tdma_chan {
+	struct device			*dev;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+
+	struct mmp_tdma_desc		*desc_arr;
+	dma_addr_t			desc_arr_phys;
+	int				desc_num;
+	enum dma_transfer_direction	dir;
+	dma_addr_t			dev_addr;
+	u32				burst_sz;
+	enum dma_slave_buswidth		buswidth;
+	enum dma_status			status;
+
+	int				idx;
+	enum mmp_tdma_type		type;
+	int				irq;
+	void __iomem			*reg_base;
+
+	size_t				buf_len;
+	size_t				period_len;
+	size_t				pos;
+
+	struct gen_pool			*pool;
+};
+
+#define TDMA_CHANNEL_NUM 2
+struct mmp_tdma_device {
+	struct device			*dev;
+	void __iomem			*base;
+	struct dma_device		device;
+	struct mmp_tdma_chan		*tdmac[TDMA_CHANNEL_NUM];
+};
+
+#define to_mmp_tdma_chan(dchan) container_of(dchan, struct mmp_tdma_chan, chan)
+
+static void mmp_tdma_chan_set_desc(struct mmp_tdma_chan *tdmac, dma_addr_t phys)
+{
+	writel(phys, tdmac->reg_base + TDNDPR);
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_FETCHND,
+					tdmac->reg_base + TDCR);
+}
+
+static void mmp_tdma_enable_irq(struct mmp_tdma_chan *tdmac, bool enable)
+{
+	if (enable)
+		writel(TDIMR_COMP, tdmac->reg_base + TDIMR);
+	else
+		writel(0, tdmac->reg_base + TDIMR);
+}
+
+static void mmp_tdma_enable_chan(struct mmp_tdma_chan *tdmac)
+{
+	/* enable dma chan */
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_IN_PROGRESS;
+}
+
+static int mmp_tdma_disable_chan(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	u32 tdcr;
+
+	tdcr = readl(tdmac->reg_base + TDCR);
+	tdcr |= TDCR_ABR;
+	tdcr &= ~TDCR_CHANEN;
+	writel(tdcr, tdmac->reg_base + TDCR);
+
+	tdmac->status = DMA_COMPLETE;
+
+	return 0;
+}
+
+static int mmp_tdma_resume_chan(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	writel(readl(tdmac->reg_base + TDCR) | TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_IN_PROGRESS;
+
+	return 0;
+}
+
+static int mmp_tdma_pause_chan(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	writel(readl(tdmac->reg_base + TDCR) & ~TDCR_CHANEN,
+					tdmac->reg_base + TDCR);
+	tdmac->status = DMA_PAUSED;
+
+	return 0;
+}
+
+static int mmp_tdma_config_chan(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	unsigned int tdcr = 0;
+
+	mmp_tdma_disable_chan(chan);
+
+	if (tdmac->dir == DMA_MEM_TO_DEV)
+		tdcr = TDCR_DSTDIR_ADDR_HOLD | TDCR_SRCDIR_ADDR_INC;
+	else if (tdmac->dir == DMA_DEV_TO_MEM)
+		tdcr = TDCR_SRCDIR_ADDR_HOLD | TDCR_DSTDIR_ADDR_INC;
+
+	if (tdmac->type == MMP_AUD_TDMA) {
+		tdcr |= TDCR_PACKMOD;
+
+		switch (tdmac->burst_sz) {
+		case 4:
+			tdcr |= TDCR_BURSTSZ_4B;
+			break;
+		case 8:
+			tdcr |= TDCR_BURSTSZ_8B;
+			break;
+		case 16:
+			tdcr |= TDCR_BURSTSZ_16B;
+			break;
+		case 32:
+			tdcr |= TDCR_BURSTSZ_32B;
+			break;
+		case 64:
+			tdcr |= TDCR_BURSTSZ_64B;
+			break;
+		case 128:
+			tdcr |= TDCR_BURSTSZ_128B;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+			return -EINVAL;
+		}
+
+		switch (tdmac->buswidth) {
+		case DMA_SLAVE_BUSWIDTH_1_BYTE:
+			tdcr |= TDCR_SSZ_8_BITS;
+			break;
+		case DMA_SLAVE_BUSWIDTH_2_BYTES:
+			tdcr |= TDCR_SSZ_16_BITS;
+			break;
+		case DMA_SLAVE_BUSWIDTH_4_BYTES:
+			tdcr |= TDCR_SSZ_32_BITS;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown bus size.\n");
+			return -EINVAL;
+		}
+	} else if (tdmac->type == PXA910_SQU) {
+		tdcr |= TDCR_SSPMOD;
+
+		switch (tdmac->burst_sz) {
+		case 1:
+			tdcr |= TDCR_BURSTSZ_SQU_1B;
+			break;
+		case 2:
+			tdcr |= TDCR_BURSTSZ_SQU_2B;
+			break;
+		case 4:
+			tdcr |= TDCR_BURSTSZ_SQU_4B;
+			break;
+		case 8:
+			tdcr |= TDCR_BURSTSZ_SQU_8B;
+			break;
+		case 16:
+			tdcr |= TDCR_BURSTSZ_SQU_16B;
+			break;
+		case 32:
+			tdcr |= TDCR_BURSTSZ_SQU_32B;
+			break;
+		default:
+			dev_err(tdmac->dev, "mmp_tdma: unknown burst size.\n");
+			return -EINVAL;
+		}
+	}
+
+	writel(tdcr, tdmac->reg_base + TDCR);
+	return 0;
+}
+
+static int mmp_tdma_clear_chan_irq(struct mmp_tdma_chan *tdmac)
+{
+	u32 reg = readl(tdmac->reg_base + TDISR);
+
+	if (reg & TDISR_COMP) {
+		/* clear irq */
+		reg &= ~TDISR_COMP;
+		writel(reg, tdmac->reg_base + TDISR);
+
+		return 0;
+	}
+	return -EAGAIN;
+}
+
+static size_t mmp_tdma_get_pos(struct mmp_tdma_chan *tdmac)
+{
+	size_t reg;
+
+	if (tdmac->idx == 0) {
+		reg = __raw_readl(tdmac->reg_base + TDSAR);
+		reg -= tdmac->desc_arr[0].src_addr;
+	} else if (tdmac->idx == 1) {
+		reg = __raw_readl(tdmac->reg_base + TDDAR);
+		reg -= tdmac->desc_arr[0].dst_addr;
+	} else
+		return -EINVAL;
+
+	return reg;
+}
+
+static irqreturn_t mmp_tdma_chan_handler(int irq, void *dev_id)
+{
+	struct mmp_tdma_chan *tdmac = dev_id;
+
+	if (mmp_tdma_clear_chan_irq(tdmac) == 0) {
+		tasklet_schedule(&tdmac->tasklet);
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+static irqreturn_t mmp_tdma_int_handler(int irq, void *dev_id)
+{
+	struct mmp_tdma_device *tdev = dev_id;
+	int i, ret;
+	int irq_num = 0;
+
+	for (i = 0; i < TDMA_CHANNEL_NUM; i++) {
+		struct mmp_tdma_chan *tdmac = tdev->tdmac[i];
+
+		ret = mmp_tdma_chan_handler(irq, tdmac);
+		if (ret == IRQ_HANDLED)
+			irq_num++;
+	}
+
+	if (irq_num)
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+	struct mmp_tdma_chan *tdmac = (struct mmp_tdma_chan *)data;
+
+	dmaengine_desc_get_callback_invoke(&tdmac->desc, NULL);
+}
+
+static void mmp_tdma_free_descriptor(struct mmp_tdma_chan *tdmac)
+{
+	struct gen_pool *gpool;
+	int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+	gpool = tdmac->pool;
+	if (gpool && tdmac->desc_arr)
+		gen_pool_free(gpool, (unsigned long)tdmac->desc_arr,
+				size);
+	tdmac->desc_arr = NULL;
+
+	return;
+}
+
+static dma_cookie_t mmp_tdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(tx->chan);
+
+	mmp_tdma_chan_set_desc(tdmac, tdmac->desc_arr_phys);
+
+	return 0;
+}
+
+static int mmp_tdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	int ret;
+
+	dma_async_tx_descriptor_init(&tdmac->desc, chan);
+	tdmac->desc.tx_submit = mmp_tdma_tx_submit;
+
+	if (tdmac->irq) {
+		ret = devm_request_irq(tdmac->dev, tdmac->irq,
+			mmp_tdma_chan_handler, 0, "tdma", tdmac);
+		if (ret)
+			return ret;
+	}
+	return 1;
+}
+
+static void mmp_tdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	if (tdmac->irq)
+		devm_free_irq(tdmac->dev, tdmac->irq, tdmac);
+	mmp_tdma_free_descriptor(tdmac);
+	return;
+}
+
+static struct mmp_tdma_desc *mmp_tdma_alloc_descriptor(struct mmp_tdma_chan *tdmac)
+{
+	struct gen_pool *gpool;
+	int size = tdmac->desc_num * sizeof(struct mmp_tdma_desc);
+
+	gpool = tdmac->pool;
+	if (!gpool)
+		return NULL;
+
+	tdmac->desc_arr = gen_pool_dma_alloc(gpool, size, &tdmac->desc_arr_phys);
+
+	return tdmac->desc_arr;
+}
+
+static struct dma_async_tx_descriptor *mmp_tdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	struct mmp_tdma_desc *desc;
+	int num_periods = buf_len / period_len;
+	int i = 0, buf = 0;
+
+	if (tdmac->status != DMA_COMPLETE)
+		return NULL;
+
+	if (period_len > TDMA_MAX_XFER_BYTES) {
+		dev_err(tdmac->dev,
+				"maximum period size exceeded: %zu > %d\n",
+				period_len, TDMA_MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	tdmac->status = DMA_IN_PROGRESS;
+	tdmac->desc_num = num_periods;
+	desc = mmp_tdma_alloc_descriptor(tdmac);
+	if (!desc)
+		goto err_out;
+
+	while (buf < buf_len) {
+		desc = &tdmac->desc_arr[i];
+
+		if (i + 1 == num_periods)
+			desc->nxt_desc = tdmac->desc_arr_phys;
+		else
+			desc->nxt_desc = tdmac->desc_arr_phys +
+				sizeof(*desc) * (i + 1);
+
+		if (direction == DMA_MEM_TO_DEV) {
+			desc->src_addr = dma_addr;
+			desc->dst_addr = tdmac->dev_addr;
+		} else {
+			desc->src_addr = tdmac->dev_addr;
+			desc->dst_addr = dma_addr;
+		}
+		desc->byte_cnt = period_len;
+		dma_addr += period_len;
+		buf += period_len;
+		i++;
+	}
+
+	/* enable interrupt */
+	if (flags & DMA_PREP_INTERRUPT)
+		mmp_tdma_enable_irq(tdmac, true);
+
+	tdmac->buf_len = buf_len;
+	tdmac->period_len = period_len;
+	tdmac->pos = 0;
+
+	return &tdmac->desc;
+
+err_out:
+	tdmac->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mmp_tdma_terminate_all(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	mmp_tdma_disable_chan(chan);
+	/* disable interrupt */
+	mmp_tdma_enable_irq(tdmac, false);
+
+	return 0;
+}
+
+static int mmp_tdma_config(struct dma_chan *chan,
+			   struct dma_slave_config *dmaengine_cfg)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	if (dmaengine_cfg->direction == DMA_DEV_TO_MEM) {
+		tdmac->dev_addr = dmaengine_cfg->src_addr;
+		tdmac->burst_sz = dmaengine_cfg->src_maxburst;
+		tdmac->buswidth = dmaengine_cfg->src_addr_width;
+	} else {
+		tdmac->dev_addr = dmaengine_cfg->dst_addr;
+		tdmac->burst_sz = dmaengine_cfg->dst_maxburst;
+		tdmac->buswidth = dmaengine_cfg->dst_addr_width;
+	}
+	tdmac->dir = dmaengine_cfg->direction;
+
+	return mmp_tdma_config_chan(chan);
+}
+
+static enum dma_status mmp_tdma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	tdmac->pos = mmp_tdma_get_pos(tdmac);
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			 tdmac->buf_len - tdmac->pos);
+
+	return tdmac->status;
+}
+
+static void mmp_tdma_issue_pending(struct dma_chan *chan)
+{
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+
+	mmp_tdma_enable_chan(tdmac);
+}
+
+static int mmp_tdma_remove(struct platform_device *pdev)
+{
+	struct mmp_tdma_device *tdev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&tdev->device);
+	return 0;
+}
+
+static int mmp_tdma_chan_init(struct mmp_tdma_device *tdev,
+					int idx, int irq,
+					int type, struct gen_pool *pool)
+{
+	struct mmp_tdma_chan *tdmac;
+
+	if (idx >= TDMA_CHANNEL_NUM) {
+		dev_err(tdev->dev, "too many channels for device!\n");
+		return -EINVAL;
+	}
+
+	/* alloc channel */
+	tdmac = devm_kzalloc(tdev->dev, sizeof(*tdmac), GFP_KERNEL);
+	if (!tdmac)
+		return -ENOMEM;
+
+	if (irq)
+		tdmac->irq = irq;
+	tdmac->dev	   = tdev->dev;
+	tdmac->chan.device = &tdev->device;
+	tdmac->idx	   = idx;
+	tdmac->type	   = type;
+	tdmac->reg_base	   = tdev->base + idx * 4;
+	tdmac->pool	   = pool;
+	tdmac->status = DMA_COMPLETE;
+	tdev->tdmac[tdmac->idx] = tdmac;
+	tasklet_init(&tdmac->tasklet, dma_do_tasklet, (unsigned long)tdmac);
+
+	/* add the channel to tdma_chan list */
+	list_add_tail(&tdmac->chan.device_node,
+			&tdev->device.channels);
+	return 0;
+}
+
+struct mmp_tdma_filter_param {
+	struct device_node *of_node;
+	unsigned int chan_id;
+};
+
+static bool mmp_tdma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+	struct mmp_tdma_filter_param *param = fn_param;
+	struct mmp_tdma_chan *tdmac = to_mmp_tdma_chan(chan);
+	struct dma_device *pdma_device = tdmac->chan.device;
+
+	if (pdma_device->dev->of_node != param->of_node)
+		return false;
+
+	if (chan->chan_id != param->chan_id)
+		return false;
+
+	return true;
+}
+
+static struct dma_chan *mmp_tdma_xlate(struct of_phandle_args *dma_spec,
+			       struct of_dma *ofdma)
+{
+	struct mmp_tdma_device *tdev = ofdma->of_dma_data;
+	dma_cap_mask_t mask = tdev->device.cap_mask;
+	struct mmp_tdma_filter_param param;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	param.of_node = ofdma->of_node;
+	param.chan_id = dma_spec->args[0];
+
+	if (param.chan_id >= TDMA_CHANNEL_NUM)
+		return NULL;
+
+	return dma_request_channel(mask, mmp_tdma_filter_fn, &param);
+}
+
+static const struct of_device_id mmp_tdma_dt_ids[] = {
+	{ .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
+	{ .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
+	{}
+};
+MODULE_DEVICE_TABLE(of, mmp_tdma_dt_ids);
+
+static int mmp_tdma_probe(struct platform_device *pdev)
+{
+	enum mmp_tdma_type type;
+	const struct of_device_id *of_id;
+	struct mmp_tdma_device *tdev;
+	struct resource *iores;
+	int i, ret;
+	int irq = 0, irq_num = 0;
+	int chan_num = TDMA_CHANNEL_NUM;
+	struct gen_pool *pool = NULL;
+
+	of_id = of_match_device(mmp_tdma_dt_ids, &pdev->dev);
+	if (of_id)
+		type = (enum mmp_tdma_type) of_id->data;
+	else
+		type = platform_get_device_id(pdev)->driver_data;
+
+	/* always have couple channels */
+	tdev = devm_kzalloc(&pdev->dev, sizeof(*tdev), GFP_KERNEL);
+	if (!tdev)
+		return -ENOMEM;
+
+	tdev->dev = &pdev->dev;
+
+	for (i = 0; i < chan_num; i++) {
+		if (platform_get_irq(pdev, i) > 0)
+			irq_num++;
+	}
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tdev->base = devm_ioremap_resource(&pdev->dev, iores);
+	if (IS_ERR(tdev->base))
+		return PTR_ERR(tdev->base);
+
+	INIT_LIST_HEAD(&tdev->device.channels);
+
+	if (pdev->dev.of_node)
+		pool = of_gen_pool_get(pdev->dev.of_node, "asram", 0);
+	else
+		pool = sram_get_gpool("asram");
+	if (!pool) {
+		dev_err(&pdev->dev, "asram pool not available\n");
+		return -ENOMEM;
+	}
+
+	if (irq_num != chan_num) {
+		irq = platform_get_irq(pdev, 0);
+		ret = devm_request_irq(&pdev->dev, irq,
+			mmp_tdma_int_handler, 0, "tdma", tdev);
+		if (ret)
+			return ret;
+	}
+
+	/* initialize channel parameters */
+	for (i = 0; i < chan_num; i++) {
+		irq = (irq_num != chan_num) ? 0 : platform_get_irq(pdev, i);
+		ret = mmp_tdma_chan_init(tdev, i, irq, type, pool);
+		if (ret)
+			return ret;
+	}
+
+	dma_cap_set(DMA_SLAVE, tdev->device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdev->device.cap_mask);
+	tdev->device.dev = &pdev->dev;
+	tdev->device.device_alloc_chan_resources =
+					mmp_tdma_alloc_chan_resources;
+	tdev->device.device_free_chan_resources =
+					mmp_tdma_free_chan_resources;
+	tdev->device.device_prep_dma_cyclic = mmp_tdma_prep_dma_cyclic;
+	tdev->device.device_tx_status = mmp_tdma_tx_status;
+	tdev->device.device_issue_pending = mmp_tdma_issue_pending;
+	tdev->device.device_config = mmp_tdma_config;
+	tdev->device.device_pause = mmp_tdma_pause_chan;
+	tdev->device.device_resume = mmp_tdma_resume_chan;
+	tdev->device.device_terminate_all = mmp_tdma_terminate_all;
+	tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
+
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+	platform_set_drvdata(pdev, tdev);
+
+	ret = dma_async_device_register(&tdev->device);
+	if (ret) {
+		dev_err(tdev->device.dev, "unable to register\n");
+		return ret;
+	}
+
+	if (pdev->dev.of_node) {
+		ret = of_dma_controller_register(pdev->dev.of_node,
+							mmp_tdma_xlate, tdev);
+		if (ret) {
+			dev_err(tdev->device.dev,
+				"failed to register controller\n");
+			dma_async_device_unregister(&tdev->device);
+		}
+	}
+
+	dev_info(tdev->device.dev, "initialized\n");
+	return 0;
+}
+
+static const struct platform_device_id mmp_tdma_id_table[] = {
+	{ "mmp-adma",	MMP_AUD_TDMA },
+	{ "pxa910-squ",	PXA910_SQU },
+	{ },
+};
+
+static struct platform_driver mmp_tdma_driver = {
+	.driver		= {
+		.name	= "mmp-tdma",
+		.of_match_table = mmp_tdma_dt_ids,
+	},
+	.id_table	= mmp_tdma_id_table,
+	.probe		= mmp_tdma_probe,
+	.remove		= mmp_tdma_remove,
+};
+
+module_platform_driver(mmp_tdma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MMP Two-Channel DMA Driver");
+MODULE_ALIAS("platform:mmp-tdma");
+MODULE_AUTHOR("Leo Yan <leoy@marvell.com>");
+MODULE_AUTHOR("Zhangfei Gao <zhangfei.gao@marvell.com>");
diff --git a/drivers/dma/moxart-dma.c b/drivers/dma/moxart-dma.c
new file mode 100644
index 0000000..e04499c
--- /dev/null
+++ b/drivers/dma/moxart-dma.c
@@ -0,0 +1,682 @@
+/*
+ * MOXA ART SoCs DMA Engine support.
+ *
+ * Copyright (C) 2013 Jonas Jensen
+ *
+ * Jonas Jensen <jonas.jensen@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/bitops.h>
+
+#include <asm/cacheflush.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define APB_DMA_MAX_CHANNEL			4
+
+#define REG_OFF_ADDRESS_SOURCE			0
+#define REG_OFF_ADDRESS_DEST			4
+#define REG_OFF_CYCLES				8
+#define REG_OFF_CTRL				12
+#define REG_OFF_CHAN_SIZE			16
+
+#define APB_DMA_ENABLE				BIT(0)
+#define APB_DMA_FIN_INT_STS			BIT(1)
+#define APB_DMA_FIN_INT_EN			BIT(2)
+#define APB_DMA_BURST_MODE			BIT(3)
+#define APB_DMA_ERR_INT_STS			BIT(4)
+#define APB_DMA_ERR_INT_EN			BIT(5)
+
+/*
+ * Unset: APB
+ * Set:   AHB
+ */
+#define APB_DMA_SOURCE_SELECT			0x40
+#define APB_DMA_DEST_SELECT			0x80
+
+#define APB_DMA_SOURCE				0x100
+#define APB_DMA_DEST				0x1000
+
+#define APB_DMA_SOURCE_MASK			0x700
+#define APB_DMA_DEST_MASK			0x7000
+
+/*
+ * 000: No increment
+ * 001: +1 (Burst=0), +4  (Burst=1)
+ * 010: +2 (Burst=0), +8  (Burst=1)
+ * 011: +4 (Burst=0), +16 (Burst=1)
+ * 101: -1 (Burst=0), -4  (Burst=1)
+ * 110: -2 (Burst=0), -8  (Burst=1)
+ * 111: -4 (Burst=0), -16 (Burst=1)
+ */
+#define APB_DMA_SOURCE_INC_0			0
+#define APB_DMA_SOURCE_INC_1_4			0x100
+#define APB_DMA_SOURCE_INC_2_8			0x200
+#define APB_DMA_SOURCE_INC_4_16			0x300
+#define APB_DMA_SOURCE_DEC_1_4			0x500
+#define APB_DMA_SOURCE_DEC_2_8			0x600
+#define APB_DMA_SOURCE_DEC_4_16			0x700
+#define APB_DMA_DEST_INC_0			0
+#define APB_DMA_DEST_INC_1_4			0x1000
+#define APB_DMA_DEST_INC_2_8			0x2000
+#define APB_DMA_DEST_INC_4_16			0x3000
+#define APB_DMA_DEST_DEC_1_4			0x5000
+#define APB_DMA_DEST_DEC_2_8			0x6000
+#define APB_DMA_DEST_DEC_4_16			0x7000
+
+/*
+ * Request signal select source/destination address for DMA hardware handshake.
+ *
+ * The request line number is a property of the DMA controller itself,
+ * e.g. MMC must always request channels where dma_slave_config->slave_id is 5.
+ *
+ * 0:    No request / Grant signal
+ * 1-15: Request    / Grant signal
+ */
+#define APB_DMA_SOURCE_REQ_NO			0x1000000
+#define APB_DMA_SOURCE_REQ_NO_MASK		0xf000000
+#define APB_DMA_DEST_REQ_NO			0x10000
+#define APB_DMA_DEST_REQ_NO_MASK		0xf0000
+
+#define APB_DMA_DATA_WIDTH			0x100000
+#define APB_DMA_DATA_WIDTH_MASK			0x300000
+/*
+ * Data width of transfer:
+ *
+ * 00: Word
+ * 01: Half
+ * 10: Byte
+ */
+#define APB_DMA_DATA_WIDTH_4			0
+#define APB_DMA_DATA_WIDTH_2			0x100000
+#define APB_DMA_DATA_WIDTH_1			0x200000
+
+#define APB_DMA_CYCLES_MASK			0x00ffffff
+
+#define MOXART_DMA_DATA_TYPE_S8			0x00
+#define MOXART_DMA_DATA_TYPE_S16		0x01
+#define MOXART_DMA_DATA_TYPE_S32		0x02
+
+struct moxart_sg {
+	dma_addr_t addr;
+	uint32_t len;
+};
+
+struct moxart_desc {
+	enum dma_transfer_direction	dma_dir;
+	dma_addr_t			dev_addr;
+	unsigned int			sglen;
+	unsigned int			dma_cycles;
+	struct virt_dma_desc		vd;
+	uint8_t				es;
+	struct moxart_sg		sg[0];
+};
+
+struct moxart_chan {
+	struct virt_dma_chan		vc;
+
+	void __iomem			*base;
+	struct moxart_desc		*desc;
+
+	struct dma_slave_config		cfg;
+
+	bool				allocated;
+	bool				error;
+	int				ch_num;
+	unsigned int			line_reqno;
+	unsigned int			sgidx;
+};
+
+struct moxart_dmadev {
+	struct dma_device		dma_slave;
+	struct moxart_chan		slave_chans[APB_DMA_MAX_CHANNEL];
+	unsigned int			irq;
+};
+
+struct moxart_filter_data {
+	struct moxart_dmadev		*mdc;
+	struct of_phandle_args		*dma_spec;
+};
+
+static const unsigned int es_bytes[] = {
+	[MOXART_DMA_DATA_TYPE_S8] = 1,
+	[MOXART_DMA_DATA_TYPE_S16] = 2,
+	[MOXART_DMA_DATA_TYPE_S32] = 4,
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct moxart_chan *to_moxart_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct moxart_chan, vc.chan);
+}
+
+static inline struct moxart_desc *to_moxart_dma_desc(
+	struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct moxart_desc, vd.tx);
+}
+
+static void moxart_dma_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct moxart_desc, vd));
+}
+
+static int moxart_terminate_all(struct dma_chan *chan)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+	u32 ctrl;
+
+	dev_dbg(chan2dev(chan), "%s: ch=%p\n", __func__, ch);
+
+	spin_lock_irqsave(&ch->vc.lock, flags);
+
+	if (ch->desc) {
+		moxart_dma_desc_free(&ch->desc->vd);
+		ch->desc = NULL;
+	}
+
+	ctrl = readl(ch->base + REG_OFF_CTRL);
+	ctrl &= ~(APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN);
+	writel(ctrl, ch->base + REG_OFF_CTRL);
+
+	vchan_get_all_descriptors(&ch->vc, &head);
+	spin_unlock_irqrestore(&ch->vc.lock, flags);
+	vchan_dma_desc_free_list(&ch->vc, &head);
+
+	return 0;
+}
+
+static int moxart_slave_config(struct dma_chan *chan,
+			       struct dma_slave_config *cfg)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	u32 ctrl;
+
+	ch->cfg = *cfg;
+
+	ctrl = readl(ch->base + REG_OFF_CTRL);
+	ctrl |= APB_DMA_BURST_MODE;
+	ctrl &= ~(APB_DMA_DEST_MASK | APB_DMA_SOURCE_MASK);
+	ctrl &= ~(APB_DMA_DEST_REQ_NO_MASK | APB_DMA_SOURCE_REQ_NO_MASK);
+
+	switch (ch->cfg.src_addr_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		ctrl |= APB_DMA_DATA_WIDTH_1;
+		if (ch->cfg.direction != DMA_MEM_TO_DEV)
+			ctrl |= APB_DMA_DEST_INC_1_4;
+		else
+			ctrl |= APB_DMA_SOURCE_INC_1_4;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		ctrl |= APB_DMA_DATA_WIDTH_2;
+		if (ch->cfg.direction != DMA_MEM_TO_DEV)
+			ctrl |= APB_DMA_DEST_INC_2_8;
+		else
+			ctrl |= APB_DMA_SOURCE_INC_2_8;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		ctrl &= ~APB_DMA_DATA_WIDTH;
+		if (ch->cfg.direction != DMA_MEM_TO_DEV)
+			ctrl |= APB_DMA_DEST_INC_4_16;
+		else
+			ctrl |= APB_DMA_SOURCE_INC_4_16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ch->cfg.direction == DMA_MEM_TO_DEV) {
+		ctrl &= ~APB_DMA_DEST_SELECT;
+		ctrl |= APB_DMA_SOURCE_SELECT;
+		ctrl |= (ch->line_reqno << 16 &
+			 APB_DMA_DEST_REQ_NO_MASK);
+	} else {
+		ctrl |= APB_DMA_DEST_SELECT;
+		ctrl &= ~APB_DMA_SOURCE_SELECT;
+		ctrl |= (ch->line_reqno << 24 &
+			 APB_DMA_SOURCE_REQ_NO_MASK);
+	}
+
+	writel(ctrl, ch->base + REG_OFF_CTRL);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *moxart_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction dir,
+	unsigned long tx_flags, void *context)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	struct moxart_desc *d;
+	enum dma_slave_buswidth dev_width;
+	dma_addr_t dev_addr;
+	struct scatterlist *sgent;
+	unsigned int es;
+	unsigned int i;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "%s: invalid DMA direction\n",
+			__func__);
+		return NULL;
+	}
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_addr = ch->cfg.src_addr;
+		dev_width = ch->cfg.src_addr_width;
+	} else {
+		dev_addr = ch->cfg.dst_addr;
+		dev_width = ch->cfg.dst_addr_width;
+	}
+
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		es = MOXART_DMA_DATA_TYPE_S8;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		es = MOXART_DMA_DATA_TYPE_S16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		es = MOXART_DMA_DATA_TYPE_S32;
+		break;
+	default:
+		dev_err(chan2dev(chan), "%s: unsupported data width (%u)\n",
+			__func__, dev_width);
+		return NULL;
+	}
+
+	d = kzalloc(struct_size(d, sg, sg_len), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	d->dma_dir = dir;
+	d->dev_addr = dev_addr;
+	d->es = es;
+
+	for_each_sg(sgl, sgent, sg_len, i) {
+		d->sg[i].addr = sg_dma_address(sgent);
+		d->sg[i].len = sg_dma_len(sgent);
+	}
+
+	d->sglen = sg_len;
+
+	ch->error = 0;
+
+	return vchan_tx_prep(&ch->vc, &d->vd, tx_flags);
+}
+
+static struct dma_chan *moxart_of_xlate(struct of_phandle_args *dma_spec,
+					struct of_dma *ofdma)
+{
+	struct moxart_dmadev *mdc = ofdma->of_dma_data;
+	struct dma_chan *chan;
+	struct moxart_chan *ch;
+
+	chan = dma_get_any_slave_channel(&mdc->dma_slave);
+	if (!chan)
+		return NULL;
+
+	ch = to_moxart_dma_chan(chan);
+	ch->line_reqno = dma_spec->args[0];
+
+	return chan;
+}
+
+static int moxart_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+
+	dev_dbg(chan2dev(chan), "%s: allocating channel #%u\n",
+		__func__, ch->ch_num);
+	ch->allocated = 1;
+
+	return 0;
+}
+
+static void moxart_free_chan_resources(struct dma_chan *chan)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+
+	vchan_free_chan_resources(&ch->vc);
+
+	dev_dbg(chan2dev(chan), "%s: freeing channel #%u\n",
+		__func__, ch->ch_num);
+	ch->allocated = 0;
+}
+
+static void moxart_dma_set_params(struct moxart_chan *ch, dma_addr_t src_addr,
+				  dma_addr_t dst_addr)
+{
+	writel(src_addr, ch->base + REG_OFF_ADDRESS_SOURCE);
+	writel(dst_addr, ch->base + REG_OFF_ADDRESS_DEST);
+}
+
+static void moxart_set_transfer_params(struct moxart_chan *ch, unsigned int len)
+{
+	struct moxart_desc *d = ch->desc;
+	unsigned int sglen_div = es_bytes[d->es];
+
+	d->dma_cycles = len >> sglen_div;
+
+	/*
+	 * There are 4 cycles on 64 bytes copied, i.e. one cycle copies 16
+	 * bytes ( when width is APB_DMAB_DATA_WIDTH_4 ).
+	 */
+	writel(d->dma_cycles, ch->base + REG_OFF_CYCLES);
+
+	dev_dbg(chan2dev(&ch->vc.chan), "%s: set %u DMA cycles (len=%u)\n",
+		__func__, d->dma_cycles, len);
+}
+
+static void moxart_start_dma(struct moxart_chan *ch)
+{
+	u32 ctrl;
+
+	ctrl = readl(ch->base + REG_OFF_CTRL);
+	ctrl |= (APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN);
+	writel(ctrl, ch->base + REG_OFF_CTRL);
+}
+
+static void moxart_dma_start_sg(struct moxart_chan *ch, unsigned int idx)
+{
+	struct moxart_desc *d = ch->desc;
+	struct moxart_sg *sg = ch->desc->sg + idx;
+
+	if (ch->desc->dma_dir == DMA_MEM_TO_DEV)
+		moxart_dma_set_params(ch, sg->addr, d->dev_addr);
+	else if (ch->desc->dma_dir == DMA_DEV_TO_MEM)
+		moxart_dma_set_params(ch, d->dev_addr, sg->addr);
+
+	moxart_set_transfer_params(ch, sg->len);
+
+	moxart_start_dma(ch);
+}
+
+static void moxart_dma_start_desc(struct dma_chan *chan)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&ch->vc);
+
+	if (!vd) {
+		ch->desc = NULL;
+		return;
+	}
+
+	list_del(&vd->node);
+
+	ch->desc = to_moxart_dma_desc(&vd->tx);
+	ch->sgidx = 0;
+
+	moxart_dma_start_sg(ch, 0);
+}
+
+static void moxart_issue_pending(struct dma_chan *chan)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&ch->vc.lock, flags);
+	if (vchan_issue_pending(&ch->vc) && !ch->desc)
+		moxart_dma_start_desc(chan);
+	spin_unlock_irqrestore(&ch->vc.lock, flags);
+}
+
+static size_t moxart_dma_desc_size(struct moxart_desc *d,
+				   unsigned int completed_sgs)
+{
+	unsigned int i;
+	size_t size;
+
+	for (size = i = completed_sgs; i < d->sglen; i++)
+		size += d->sg[i].len;
+
+	return size;
+}
+
+static size_t moxart_dma_desc_size_in_flight(struct moxart_chan *ch)
+{
+	size_t size;
+	unsigned int completed_cycles, cycles;
+
+	size = moxart_dma_desc_size(ch->desc, ch->sgidx);
+	cycles = readl(ch->base + REG_OFF_CYCLES);
+	completed_cycles = (ch->desc->dma_cycles - cycles);
+	size -= completed_cycles << es_bytes[ch->desc->es];
+
+	dev_dbg(chan2dev(&ch->vc.chan), "%s: size=%zu\n", __func__, size);
+
+	return size;
+}
+
+static enum dma_status moxart_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct moxart_chan *ch = to_moxart_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	struct moxart_desc *d;
+	enum dma_status ret;
+	unsigned long flags;
+
+	/*
+	 * dma_cookie_status() assigns initial residue value.
+	 */
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	spin_lock_irqsave(&ch->vc.lock, flags);
+	vd = vchan_find_desc(&ch->vc, cookie);
+	if (vd) {
+		d = to_moxart_dma_desc(&vd->tx);
+		txstate->residue = moxart_dma_desc_size(d, 0);
+	} else if (ch->desc && ch->desc->vd.tx.cookie == cookie) {
+		txstate->residue = moxart_dma_desc_size_in_flight(ch);
+	}
+	spin_unlock_irqrestore(&ch->vc.lock, flags);
+
+	if (ch->error)
+		return DMA_ERROR;
+
+	return ret;
+}
+
+static void moxart_dma_init(struct dma_device *dma, struct device *dev)
+{
+	dma->device_prep_slave_sg		= moxart_prep_slave_sg;
+	dma->device_alloc_chan_resources	= moxart_alloc_chan_resources;
+	dma->device_free_chan_resources		= moxart_free_chan_resources;
+	dma->device_issue_pending		= moxart_issue_pending;
+	dma->device_tx_status			= moxart_tx_status;
+	dma->device_config			= moxart_slave_config;
+	dma->device_terminate_all		= moxart_terminate_all;
+	dma->dev				= dev;
+
+	INIT_LIST_HEAD(&dma->channels);
+}
+
+static irqreturn_t moxart_dma_interrupt(int irq, void *devid)
+{
+	struct moxart_dmadev *mc = devid;
+	struct moxart_chan *ch = &mc->slave_chans[0];
+	unsigned int i;
+	unsigned long flags;
+	u32 ctrl;
+
+	dev_dbg(chan2dev(&ch->vc.chan), "%s\n", __func__);
+
+	for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) {
+		if (!ch->allocated)
+			continue;
+
+		ctrl = readl(ch->base + REG_OFF_CTRL);
+
+		dev_dbg(chan2dev(&ch->vc.chan), "%s: ch=%p ch->base=%p ctrl=%x\n",
+			__func__, ch, ch->base, ctrl);
+
+		if (ctrl & APB_DMA_FIN_INT_STS) {
+			ctrl &= ~APB_DMA_FIN_INT_STS;
+			if (ch->desc) {
+				spin_lock_irqsave(&ch->vc.lock, flags);
+				if (++ch->sgidx < ch->desc->sglen) {
+					moxart_dma_start_sg(ch, ch->sgidx);
+				} else {
+					vchan_cookie_complete(&ch->desc->vd);
+					moxart_dma_start_desc(&ch->vc.chan);
+				}
+				spin_unlock_irqrestore(&ch->vc.lock, flags);
+			}
+		}
+
+		if (ctrl & APB_DMA_ERR_INT_STS) {
+			ctrl &= ~APB_DMA_ERR_INT_STS;
+			ch->error = 1;
+		}
+
+		writel(ctrl, ch->base + REG_OFF_CTRL);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int moxart_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct resource *res;
+	static void __iomem *dma_base_addr;
+	int ret, i;
+	unsigned int irq;
+	struct moxart_chan *ch;
+	struct moxart_dmadev *mdc;
+
+	mdc = devm_kzalloc(dev, sizeof(*mdc), GFP_KERNEL);
+	if (!mdc)
+		return -ENOMEM;
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (!irq) {
+		dev_err(dev, "no IRQ resource\n");
+		return -EINVAL;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dma_base_addr = devm_ioremap_resource(dev, res);
+	if (IS_ERR(dma_base_addr))
+		return PTR_ERR(dma_base_addr);
+
+	dma_cap_zero(mdc->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, mdc->dma_slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, mdc->dma_slave.cap_mask);
+
+	moxart_dma_init(&mdc->dma_slave, dev);
+
+	ch = &mdc->slave_chans[0];
+	for (i = 0; i < APB_DMA_MAX_CHANNEL; i++, ch++) {
+		ch->ch_num = i;
+		ch->base = dma_base_addr + i * REG_OFF_CHAN_SIZE;
+		ch->allocated = 0;
+
+		ch->vc.desc_free = moxart_dma_desc_free;
+		vchan_init(&ch->vc, &mdc->dma_slave);
+
+		dev_dbg(dev, "%s: chs[%d]: ch->ch_num=%u ch->base=%p\n",
+			__func__, i, ch->ch_num, ch->base);
+	}
+
+	platform_set_drvdata(pdev, mdc);
+
+	ret = devm_request_irq(dev, irq, moxart_dma_interrupt, 0,
+			       "moxart-dma-engine", mdc);
+	if (ret) {
+		dev_err(dev, "devm_request_irq failed\n");
+		return ret;
+	}
+	mdc->irq = irq;
+
+	ret = dma_async_device_register(&mdc->dma_slave);
+	if (ret) {
+		dev_err(dev, "dma_async_device_register failed\n");
+		return ret;
+	}
+
+	ret = of_dma_controller_register(node, moxart_of_xlate, mdc);
+	if (ret) {
+		dev_err(dev, "of_dma_controller_register failed\n");
+		dma_async_device_unregister(&mdc->dma_slave);
+		return ret;
+	}
+
+	dev_dbg(dev, "%s: IRQ=%u\n", __func__, irq);
+
+	return 0;
+}
+
+static int moxart_remove(struct platform_device *pdev)
+{
+	struct moxart_dmadev *m = platform_get_drvdata(pdev);
+
+	devm_free_irq(&pdev->dev, m->irq, m);
+
+	dma_async_device_unregister(&m->dma_slave);
+
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
+	return 0;
+}
+
+static const struct of_device_id moxart_dma_match[] = {
+	{ .compatible = "moxa,moxart-dma" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, moxart_dma_match);
+
+static struct platform_driver moxart_driver = {
+	.probe	= moxart_probe,
+	.remove	= moxart_remove,
+	.driver = {
+		.name		= "moxart-dma-engine",
+		.of_match_table	= moxart_dma_match,
+	},
+};
+
+static int moxart_init(void)
+{
+	return platform_driver_register(&moxart_driver);
+}
+subsys_initcall(moxart_init);
+
+static void __exit moxart_exit(void)
+{
+	platform_driver_unregister(&moxart_driver);
+}
+module_exit(moxart_exit);
+
+MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>");
+MODULE_DESCRIPTION("MOXART DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c
new file mode 100644
index 0000000..dde7134
--- /dev/null
+++ b/drivers/dma/mpc512x_dma.c
@@ -0,0 +1,1136 @@
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
+ * Copyright (C) Mario Six, Guntermann & Drunck GmbH, 2016
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009;  for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * MPC512x and MPC8308 DMA driver. It supports memory to memory data transfers
+ * (tested using dmatest module) and data transfers between memory and
+ * peripheral I/O memory by means of slave scatter/gather with these
+ * limitations:
+ *  - chunked transfers (described by s/g lists with more than one item) are
+ *     refused as long as proper support for scatter/gather is missing
+ *  - transfers on MPC8308 always start from software as this SoC does not have
+ *     external request lines for peripheral flow control
+ *  - memory <-> I/O memory transfer chunks of sizes of 1, 2, 4, 16 (for
+ *     MPC512x), and 32 bytes are supported, and, consequently, source
+ *     addresses and destination addresses must be aligned accordingly;
+ *     furthermore, for MPC512x SoCs, the transfer size must be aligned on
+ *     (chunk size * maxburst)
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+
+#include <linux/random.h>
+
+#include "dmaengine.h"
+
+/* Number of DMA Transfer descriptors allocated per channel */
+#define MPC_DMA_DESCRIPTORS	64
+
+/* Macro definitions */
+#define MPC_DMA_TCD_OFFSET	0x1000
+
+/*
+ * Maximum channel counts for individual hardware variants
+ * and the maximum channel count over all supported controllers,
+ * used for data structure size
+ */
+#define MPC8308_DMACHAN_MAX	16
+#define MPC512x_DMACHAN_MAX	64
+#define MPC_DMA_CHANNELS	64
+
+/* Arbitration mode of group and channel */
+#define MPC_DMA_DMACR_EDCG	(1 << 31)
+#define MPC_DMA_DMACR_ERGA	(1 << 3)
+#define MPC_DMA_DMACR_ERCA	(1 << 2)
+
+/* Error codes */
+#define MPC_DMA_DMAES_VLD	(1 << 31)
+#define MPC_DMA_DMAES_GPE	(1 << 15)
+#define MPC_DMA_DMAES_CPE	(1 << 14)
+#define MPC_DMA_DMAES_ERRCHN(err) \
+				(((err) >> 8) & 0x3f)
+#define MPC_DMA_DMAES_SAE	(1 << 7)
+#define MPC_DMA_DMAES_SOE	(1 << 6)
+#define MPC_DMA_DMAES_DAE	(1 << 5)
+#define MPC_DMA_DMAES_DOE	(1 << 4)
+#define MPC_DMA_DMAES_NCE	(1 << 3)
+#define MPC_DMA_DMAES_SGE	(1 << 2)
+#define MPC_DMA_DMAES_SBE	(1 << 1)
+#define MPC_DMA_DMAES_DBE	(1 << 0)
+
+#define MPC_DMA_DMAGPOR_SNOOP_ENABLE	(1 << 6)
+
+#define MPC_DMA_TSIZE_1		0x00
+#define MPC_DMA_TSIZE_2		0x01
+#define MPC_DMA_TSIZE_4		0x02
+#define MPC_DMA_TSIZE_16	0x04
+#define MPC_DMA_TSIZE_32	0x05
+
+/* MPC5121 DMA engine registers */
+struct __attribute__ ((__packed__)) mpc_dma_regs {
+	/* 0x00 */
+	u32 dmacr;		/* DMA control register */
+	u32 dmaes;		/* DMA error status */
+	/* 0x08 */
+	u32 dmaerqh;		/* DMA enable request high(channels 63~32) */
+	u32 dmaerql;		/* DMA enable request low(channels 31~0) */
+	u32 dmaeeih;		/* DMA enable error interrupt high(ch63~32) */
+	u32 dmaeeil;		/* DMA enable error interrupt low(ch31~0) */
+	/* 0x18 */
+	u8 dmaserq;		/* DMA set enable request */
+	u8 dmacerq;		/* DMA clear enable request */
+	u8 dmaseei;		/* DMA set enable error interrupt */
+	u8 dmaceei;		/* DMA clear enable error interrupt */
+	/* 0x1c */
+	u8 dmacint;		/* DMA clear interrupt request */
+	u8 dmacerr;		/* DMA clear error */
+	u8 dmassrt;		/* DMA set start bit */
+	u8 dmacdne;		/* DMA clear DONE status bit */
+	/* 0x20 */
+	u32 dmainth;		/* DMA interrupt request high(ch63~32) */
+	u32 dmaintl;		/* DMA interrupt request low(ch31~0) */
+	u32 dmaerrh;		/* DMA error high(ch63~32) */
+	u32 dmaerrl;		/* DMA error low(ch31~0) */
+	/* 0x30 */
+	u32 dmahrsh;		/* DMA hw request status high(ch63~32) */
+	u32 dmahrsl;		/* DMA hardware request status low(ch31~0) */
+	union {
+		u32 dmaihsa;	/* DMA interrupt high select AXE(ch63~32) */
+		u32 dmagpor;	/* (General purpose register on MPC8308) */
+	};
+	u32 dmailsa;		/* DMA interrupt low select AXE(ch31~0) */
+	/* 0x40 ~ 0xff */
+	u32 reserve0[48];	/* Reserved */
+	/* 0x100 */
+	u8 dchpri[MPC_DMA_CHANNELS];
+	/* DMA channels(0~63) priority */
+};
+
+struct __attribute__ ((__packed__)) mpc_dma_tcd {
+	/* 0x00 */
+	u32 saddr;		/* Source address */
+
+	u32 smod:5;		/* Source address modulo */
+	u32 ssize:3;		/* Source data transfer size */
+	u32 dmod:5;		/* Destination address modulo */
+	u32 dsize:3;		/* Destination data transfer size */
+	u32 soff:16;		/* Signed source address offset */
+
+	/* 0x08 */
+	u32 nbytes;		/* Inner "minor" byte count */
+	u32 slast;		/* Last source address adjustment */
+	u32 daddr;		/* Destination address */
+
+	/* 0x14 */
+	u32 citer_elink:1;	/* Enable channel-to-channel linking on
+				 * minor loop complete
+				 */
+	u32 citer_linkch:6;	/* Link channel for minor loop complete */
+	u32 citer:9;		/* Current "major" iteration count */
+	u32 doff:16;		/* Signed destination address offset */
+
+	/* 0x18 */
+	u32 dlast_sga;		/* Last Destination address adjustment/scatter
+				 * gather address
+				 */
+
+	/* 0x1c */
+	u32 biter_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 biter_linkch:6;
+	u32 biter:9;		/* Beginning "major" iteration count */
+	u32 bwc:2;		/* Bandwidth control */
+	u32 major_linkch:6;	/* Link channel number */
+	u32 done:1;		/* Channel done */
+	u32 active:1;		/* Channel active */
+	u32 major_elink:1;	/* Enable channel-to-channel linking on major
+				 * loop complete
+				 */
+	u32 e_sg:1;		/* Enable scatter/gather processing */
+	u32 d_req:1;		/* Disable request */
+	u32 int_half:1;		/* Enable an interrupt when major counter is
+				 * half complete
+				 */
+	u32 int_maj:1;		/* Enable an interrupt when major iteration
+				 * count completes
+				 */
+	u32 start:1;		/* Channel start */
+};
+
+struct mpc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+	int				error;
+	struct list_head		node;
+	int				will_access_peripheral;
+};
+
+struct mpc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	struct mpc_dma_tcd		*tcd;
+	dma_addr_t			tcd_paddr;
+
+	/* Settings for access to peripheral FIFO */
+	dma_addr_t			src_per_paddr;
+	u32				src_tcd_nunits;
+	u8				swidth;
+	dma_addr_t			dst_per_paddr;
+	u32				dst_tcd_nunits;
+	u8				dwidth;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+};
+
+struct mpc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct mpc_dma_chan		channels[MPC_DMA_CHANNELS];
+	struct mpc_dma_regs __iomem	*regs;
+	struct mpc_dma_tcd __iomem	*tcd;
+	int				irq;
+	int				irq2;
+	uint				error_status;
+	int				is_mpc8308;
+
+	/* Lock for error_status field in this structure */
+	spinlock_t			error_status_lock;
+};
+
+#define DRV_NAME	"mpc512x_dma"
+
+/* Convert struct dma_chan to struct mpc_dma_chan */
+static inline struct mpc_dma_chan *dma_chan_to_mpc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct mpc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct mpc_dma */
+static inline struct mpc_dma *dma_chan_to_mpc_dma(struct dma_chan *c)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(c);
+
+	return container_of(mchan, struct mpc_dma, channels[c->chan_id]);
+}
+
+/*
+ * Execute all queued DMA descriptors.
+ *
+ * Following requirements must be met while calling mpc_dma_execute():
+ *	a) mchan->lock is acquired,
+ *	b) mchan->active list is empty,
+ *	c) mchan->queued list contains at least one entry.
+ */
+static void mpc_dma_execute(struct mpc_dma_chan *mchan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+	struct mpc_dma_desc *first = NULL;
+	struct mpc_dma_desc *prev = NULL;
+	struct mpc_dma_desc *mdesc;
+	int cid = mchan->chan.chan_id;
+
+	while (!list_empty(&mchan->queued)) {
+		mdesc = list_first_entry(&mchan->queued,
+						struct mpc_dma_desc, node);
+		/*
+		 * Grab either several mem-to-mem transfer descriptors
+		 * or one peripheral transfer descriptor,
+		 * don't mix mem-to-mem and peripheral transfer descriptors
+		 * within the same 'active' list.
+		 */
+		if (mdesc->will_access_peripheral) {
+			if (list_empty(&mchan->active))
+				list_move_tail(&mdesc->node, &mchan->active);
+			break;
+		} else {
+			list_move_tail(&mdesc->node, &mchan->active);
+		}
+	}
+
+	/* Chain descriptors into one transaction */
+	list_for_each_entry(mdesc, &mchan->active, node) {
+		if (!first)
+			first = mdesc;
+
+		if (!prev) {
+			prev = mdesc;
+			continue;
+		}
+
+		prev->tcd->dlast_sga = mdesc->tcd_paddr;
+		prev->tcd->e_sg = 1;
+		mdesc->tcd->start = 1;
+
+		prev = mdesc;
+	}
+
+	prev->tcd->int_maj = 1;
+
+	/* Send first descriptor in chain into hardware */
+	memcpy_toio(&mdma->tcd[cid], first->tcd, sizeof(struct mpc_dma_tcd));
+
+	if (first != prev)
+		mdma->tcd[cid].e_sg = 1;
+
+	if (mdma->is_mpc8308) {
+		/* MPC8308, no request lines, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	} else if (first->will_access_peripheral) {
+		/* Peripherals involved, start by external request signal */
+		out_8(&mdma->regs->dmaserq, cid);
+	} else {
+		/* Memory to memory transfer, software initiated start */
+		out_8(&mdma->regs->dmassrt, cid);
+	}
+}
+
+/* Handle interrupt on one half of DMA controller (32 channels) */
+static void mpc_dma_irq_process(struct mpc_dma *mdma, u32 is, u32 es, int off)
+{
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	u32 status = is | es;
+	int ch;
+
+	while ((ch = fls(status) - 1) >= 0) {
+		status &= ~(1 << ch);
+		mchan = &mdma->channels[ch + off];
+
+		spin_lock(&mchan->lock);
+
+		out_8(&mdma->regs->dmacint, ch + off);
+		out_8(&mdma->regs->dmacerr, ch + off);
+
+		/* Check error status */
+		if (es & (1 << ch))
+			list_for_each_entry(mdesc, &mchan->active, node)
+				mdesc->error = -EIO;
+
+		/* Execute queued descriptors */
+		list_splice_tail_init(&mchan->active, &mchan->completed);
+		if (!list_empty(&mchan->queued))
+			mpc_dma_execute(mchan);
+
+		spin_unlock(&mchan->lock);
+	}
+}
+
+/* Interrupt handler */
+static irqreturn_t mpc_dma_irq(int irq, void *data)
+{
+	struct mpc_dma *mdma = data;
+	uint es;
+
+	/* Save error status register */
+	es = in_be32(&mdma->regs->dmaes);
+	spin_lock(&mdma->error_status_lock);
+	if ((es & MPC_DMA_DMAES_VLD) && mdma->error_status == 0)
+		mdma->error_status = es;
+	spin_unlock(&mdma->error_status_lock);
+
+	/* Handle interrupt on each channel */
+	if (mdma->dma.chancnt > 32) {
+		mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmainth),
+					in_be32(&mdma->regs->dmaerrh), 32);
+	}
+	mpc_dma_irq_process(mdma, in_be32(&mdma->regs->dmaintl),
+					in_be32(&mdma->regs->dmaerrl), 0);
+
+	/* Schedule tasklet */
+	tasklet_schedule(&mdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void mpc_dma_process_completed(struct mpc_dma *mdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct mpc_dma_chan *mchan;
+	struct mpc_dma_desc *mdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < mdma->dma.chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		if (!list_empty(&mchan->completed))
+			list_splice_tail_init(&mchan->completed, &list);
+		spin_unlock_irqrestore(&mchan->lock, flags);
+
+		if (list_empty(&list))
+			continue;
+
+		/* Execute callbacks and run dependencies */
+		list_for_each_entry(mdesc, &list, node) {
+			desc = &mdesc->desc;
+
+			dmaengine_desc_get_callback_invoke(desc, NULL);
+
+			last_cookie = desc->cookie;
+			dma_run_dependencies(desc);
+		}
+
+		/* Free descriptors */
+		spin_lock_irqsave(&mchan->lock, flags);
+		list_splice_tail_init(&list, &mchan->free);
+		mchan->chan.completed_cookie = last_cookie;
+		spin_unlock_irqrestore(&mchan->lock, flags);
+	}
+}
+
+/* DMA Tasklet */
+static void mpc_dma_tasklet(unsigned long data)
+{
+	struct mpc_dma *mdma = (void *)data;
+	unsigned long flags;
+	uint es;
+
+	spin_lock_irqsave(&mdma->error_status_lock, flags);
+	es = mdma->error_status;
+	mdma->error_status = 0;
+	spin_unlock_irqrestore(&mdma->error_status_lock, flags);
+
+	/* Print nice error report */
+	if (es) {
+		dev_err(mdma->dma.dev,
+			"Hardware reported following error(s) on channel %u:\n",
+						      MPC_DMA_DMAES_ERRCHN(es));
+
+		if (es & MPC_DMA_DMAES_GPE)
+			dev_err(mdma->dma.dev, "- Group Priority Error\n");
+		if (es & MPC_DMA_DMAES_CPE)
+			dev_err(mdma->dma.dev, "- Channel Priority Error\n");
+		if (es & MPC_DMA_DMAES_SAE)
+			dev_err(mdma->dma.dev, "- Source Address Error\n");
+		if (es & MPC_DMA_DMAES_SOE)
+			dev_err(mdma->dma.dev, "- Source Offset Configuration Error\n");
+		if (es & MPC_DMA_DMAES_DAE)
+			dev_err(mdma->dma.dev, "- Destination Address Error\n");
+		if (es & MPC_DMA_DMAES_DOE)
+			dev_err(mdma->dma.dev, "- Destination Offset Configuration Error\n");
+		if (es & MPC_DMA_DMAES_NCE)
+			dev_err(mdma->dma.dev, "- NBytes/Citter Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SGE)
+			dev_err(mdma->dma.dev, "- Scatter/Gather Configuration Error\n");
+		if (es & MPC_DMA_DMAES_SBE)
+			dev_err(mdma->dma.dev, "- Source Bus Error\n");
+		if (es & MPC_DMA_DMAES_DBE)
+			dev_err(mdma->dma.dev, "- Destination Bus Error\n");
+	}
+
+	mpc_dma_process_completed(mdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t mpc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(txd->chan);
+	struct mpc_dma_desc *mdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	mdesc = container_of(txd, struct mpc_dma_desc, desc);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&mdesc->node, &mchan->queued);
+
+	/* If channel is idle, execute all queued descriptors */
+	if (list_empty(&mchan->active))
+		mpc_dma_execute(mchan);
+
+	/* Update cookie */
+	cookie = dma_cookie_assign(txd);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return cookie;
+}
+
+/* Alloc channel resources */
+static int mpc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	/* Alloc DMA memory for Transfer Control Descriptors */
+	tcd = dma_alloc_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+							&tcd_paddr, GFP_KERNEL);
+	if (!tcd)
+		return -ENOMEM;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < MPC_DMA_DESCRIPTORS; i++) {
+		mdesc = kzalloc(sizeof(struct mpc_dma_desc), GFP_KERNEL);
+		if (!mdesc) {
+			dev_notice(mdma->dma.dev,
+				"Memory allocation error. Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&mdesc->desc, chan);
+		mdesc->desc.flags = DMA_CTRL_ACK;
+		mdesc->desc.tx_submit = mpc_dma_tx_submit;
+
+		mdesc->tcd = &tcd[i];
+		mdesc->tcd_paddr = tcd_paddr + (i * sizeof(struct mpc_dma_tcd));
+
+		list_add_tail(&mdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0) {
+		dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	mchan->tcd = tcd;
+	mchan->tcd_paddr = tcd_paddr;
+	list_splice_tail_init(&descs, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Enable Error Interrupt */
+	out_8(&mdma->regs->dmaseei, chan->chan_id);
+
+	return 0;
+}
+
+/* Free channel resources */
+static void mpc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc, *tmp;
+	struct mpc_dma_tcd *tcd;
+	dma_addr_t tcd_paddr;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&mchan->prepared));
+	BUG_ON(!list_empty(&mchan->queued));
+	BUG_ON(!list_empty(&mchan->active));
+	BUG_ON(!list_empty(&mchan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&mchan->free, &descs);
+	tcd = mchan->tcd;
+	tcd_paddr = mchan->tcd_paddr;
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* Free DMA memory used by descriptors */
+	dma_free_coherent(mdma->dma.dev,
+			MPC_DMA_DESCRIPTORS * sizeof(struct mpc_dma_tcd),
+								tcd, tcd_paddr);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(mdesc, tmp, &descs, node)
+		kfree(mdesc);
+
+	/* Disable Error Interrupt */
+	out_8(&mdma->regs->dmaceei, chan->chan_id);
+}
+
+/* Send all pending descriptor to hardware */
+static void mpc_dma_issue_pending(struct dma_chan *chan)
+{
+	/*
+	 * We are posting descriptors to the hardware as soon as
+	 * they are ready, so this function does nothing.
+	 */
+}
+
+/* Check request completion status */
+static enum dma_status
+mpc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	       struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+/* Prepare descriptor for memory to memory copy */
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst, dma_addr_t src,
+					size_t len, unsigned long flags)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct mpc_dma_desc,
+									node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	if (!mdesc) {
+		/* try to free completed descriptors */
+		mpc_dma_process_completed(mdma);
+		return NULL;
+	}
+
+	mdesc->error = 0;
+	mdesc->will_access_peripheral = 0;
+	tcd = mdesc->tcd;
+
+	/* Prepare Transfer Control Descriptor for this transaction */
+	memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+	if (IS_ALIGNED(src | dst | len, 32)) {
+		tcd->ssize = MPC_DMA_TSIZE_32;
+		tcd->dsize = MPC_DMA_TSIZE_32;
+		tcd->soff = 32;
+		tcd->doff = 32;
+	} else if (!mdma->is_mpc8308 && IS_ALIGNED(src | dst | len, 16)) {
+		/* MPC8308 doesn't support 16 byte transfers */
+		tcd->ssize = MPC_DMA_TSIZE_16;
+		tcd->dsize = MPC_DMA_TSIZE_16;
+		tcd->soff = 16;
+		tcd->doff = 16;
+	} else if (IS_ALIGNED(src | dst | len, 4)) {
+		tcd->ssize = MPC_DMA_TSIZE_4;
+		tcd->dsize = MPC_DMA_TSIZE_4;
+		tcd->soff = 4;
+		tcd->doff = 4;
+	} else if (IS_ALIGNED(src | dst | len, 2)) {
+		tcd->ssize = MPC_DMA_TSIZE_2;
+		tcd->dsize = MPC_DMA_TSIZE_2;
+		tcd->soff = 2;
+		tcd->doff = 2;
+	} else {
+		tcd->ssize = MPC_DMA_TSIZE_1;
+		tcd->dsize = MPC_DMA_TSIZE_1;
+		tcd->soff = 1;
+		tcd->doff = 1;
+	}
+
+	tcd->saddr = src;
+	tcd->daddr = dst;
+	tcd->nbytes = len;
+	tcd->biter = 1;
+	tcd->citer = 1;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return &mdesc->desc;
+}
+
+inline u8 buswidth_to_dmatsize(u8 buswidth)
+{
+	u8 res;
+
+	for (res = 0; buswidth > 1; buswidth /= 2)
+		res++;
+	return res;
+}
+
+static struct dma_async_tx_descriptor *
+mpc_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma_desc *mdesc = NULL;
+	dma_addr_t per_paddr;
+	u32 tcd_nunits;
+	struct mpc_dma_tcd *tcd;
+	unsigned long iflags;
+	struct scatterlist *sg;
+	size_t len;
+	int iter, i;
+
+	/* Currently there is no proper support for scatter/gather */
+	if (sg_len != 1)
+		return NULL;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		spin_lock_irqsave(&mchan->lock, iflags);
+
+		mdesc = list_first_entry(&mchan->free,
+						struct mpc_dma_desc, node);
+		if (!mdesc) {
+			spin_unlock_irqrestore(&mchan->lock, iflags);
+			/* Try to free completed descriptors */
+			mpc_dma_process_completed(mdma);
+			return NULL;
+		}
+
+		list_del(&mdesc->node);
+
+		if (direction == DMA_DEV_TO_MEM) {
+			per_paddr = mchan->src_per_paddr;
+			tcd_nunits = mchan->src_tcd_nunits;
+		} else {
+			per_paddr = mchan->dst_per_paddr;
+			tcd_nunits = mchan->dst_tcd_nunits;
+		}
+
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+
+		if (per_paddr == 0 || tcd_nunits == 0)
+			goto err_prep;
+
+		mdesc->error = 0;
+		mdesc->will_access_peripheral = 1;
+
+		/* Prepare Transfer Control Descriptor for this transaction */
+		tcd = mdesc->tcd;
+
+		memset(tcd, 0, sizeof(struct mpc_dma_tcd));
+
+		if (direction == DMA_DEV_TO_MEM) {
+			tcd->saddr = per_paddr;
+			tcd->daddr = sg_dma_address(sg);
+
+			if (!IS_ALIGNED(sg_dma_address(sg), mchan->dwidth))
+				goto err_prep;
+
+			tcd->soff = 0;
+			tcd->doff = mchan->dwidth;
+		} else {
+			tcd->saddr = sg_dma_address(sg);
+			tcd->daddr = per_paddr;
+
+			if (!IS_ALIGNED(sg_dma_address(sg), mchan->swidth))
+				goto err_prep;
+
+			tcd->soff = mchan->swidth;
+			tcd->doff = 0;
+		}
+
+		tcd->ssize = buswidth_to_dmatsize(mchan->swidth);
+		tcd->dsize = buswidth_to_dmatsize(mchan->dwidth);
+
+		if (mdma->is_mpc8308) {
+			tcd->nbytes = sg_dma_len(sg);
+			if (!IS_ALIGNED(tcd->nbytes, mchan->swidth))
+				goto err_prep;
+
+			/* No major loops for MPC8303 */
+			tcd->biter = 1;
+			tcd->citer = 1;
+		} else {
+			len = sg_dma_len(sg);
+			tcd->nbytes = tcd_nunits * tcd->ssize;
+			if (!IS_ALIGNED(len, tcd->nbytes))
+				goto err_prep;
+
+			iter = len / tcd->nbytes;
+			if (iter >= 1 << 15) {
+				/* len is too big */
+				goto err_prep;
+			}
+			/* citer_linkch contains the high bits of iter */
+			tcd->biter = iter & 0x1ff;
+			tcd->biter_linkch = iter >> 9;
+			tcd->citer = tcd->biter;
+			tcd->citer_linkch = tcd->biter_linkch;
+		}
+
+		tcd->e_sg = 0;
+		tcd->d_req = 1;
+
+		/* Place descriptor in prepared list */
+		spin_lock_irqsave(&mchan->lock, iflags);
+		list_add_tail(&mdesc->node, &mchan->prepared);
+		spin_unlock_irqrestore(&mchan->lock, iflags);
+	}
+
+	return &mdesc->desc;
+
+err_prep:
+	/* Put the descriptor back */
+	spin_lock_irqsave(&mchan->lock, iflags);
+	list_add_tail(&mdesc->node, &mchan->free);
+	spin_unlock_irqrestore(&mchan->lock, iflags);
+
+	return NULL;
+}
+
+inline bool is_buswidth_valid(u8 buswidth, bool is_mpc8308)
+{
+	switch (buswidth) {
+	case 16:
+		if (is_mpc8308)
+			return false;
+	case 1:
+	case 2:
+	case 4:
+	case 32:
+		break;
+	default:
+		return false;
+	}
+
+	return true;
+}
+
+static int mpc_dma_device_config(struct dma_chan *chan,
+				 struct dma_slave_config *cfg)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(&mchan->chan);
+	unsigned long flags;
+
+	/*
+	 * Software constraints:
+	 *  - only transfers between a peripheral device and memory are
+	 *     supported
+	 *  - transfer chunk sizes of 1, 2, 4, 16 (for MPC512x), and 32 bytes
+	 *     are supported, and, consequently, source addresses and
+	 *     destination addresses; must be aligned accordingly; furthermore,
+	 *     for MPC512x SoCs, the transfer size must be aligned on (chunk
+	 *     size * maxburst)
+	 *  - during the transfer, the RAM address is incremented by the size
+	 *     of transfer chunk
+	 *  - the peripheral port's address is constant during the transfer.
+	 */
+
+	if (!IS_ALIGNED(cfg->src_addr, cfg->src_addr_width) ||
+	    !IS_ALIGNED(cfg->dst_addr, cfg->dst_addr_width)) {
+		return -EINVAL;
+	}
+
+	if (!is_buswidth_valid(cfg->src_addr_width, mdma->is_mpc8308) ||
+	    !is_buswidth_valid(cfg->dst_addr_width, mdma->is_mpc8308))
+		return -EINVAL;
+
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	mchan->src_per_paddr = cfg->src_addr;
+	mchan->src_tcd_nunits = cfg->src_maxburst;
+	mchan->swidth = cfg->src_addr_width;
+	mchan->dst_per_paddr = cfg->dst_addr;
+	mchan->dst_tcd_nunits = cfg->dst_maxburst;
+	mchan->dwidth = cfg->dst_addr_width;
+
+	/* Apply defaults */
+	if (mchan->src_tcd_nunits == 0)
+		mchan->src_tcd_nunits = 1;
+	if (mchan->dst_tcd_nunits == 0)
+		mchan->dst_tcd_nunits = 1;
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return 0;
+}
+
+static int mpc_dma_device_terminate_all(struct dma_chan *chan)
+{
+	struct mpc_dma_chan *mchan = dma_chan_to_mpc_dma_chan(chan);
+	struct mpc_dma *mdma = dma_chan_to_mpc_dma(chan);
+	unsigned long flags;
+
+	/* Disable channel requests */
+	spin_lock_irqsave(&mchan->lock, flags);
+
+	out_8(&mdma->regs->dmacerq, chan->chan_id);
+	list_splice_tail_init(&mchan->prepared, &mchan->free);
+	list_splice_tail_init(&mchan->queued, &mchan->free);
+	list_splice_tail_init(&mchan->active, &mchan->free);
+
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	return 0;
+}
+
+static int mpc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct mpc_dma *mdma;
+	struct mpc_dma_chan *mchan;
+	struct resource res;
+	ulong regs_start, regs_size;
+	int retval, i;
+	u8 chancnt;
+
+	mdma = devm_kzalloc(dev, sizeof(struct mpc_dma), GFP_KERNEL);
+	if (!mdma) {
+		retval = -ENOMEM;
+		goto err;
+	}
+
+	mdma->irq = irq_of_parse_and_map(dn, 0);
+	if (!mdma->irq) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		retval = -EINVAL;
+		goto err;
+	}
+
+	if (of_device_is_compatible(dn, "fsl,mpc8308-dma")) {
+		mdma->is_mpc8308 = 1;
+		mdma->irq2 = irq_of_parse_and_map(dn, 1);
+		if (!mdma->irq2) {
+			dev_err(dev, "Error mapping IRQ!\n");
+			retval = -EINVAL;
+			goto err_dispose1;
+		}
+	}
+
+	retval = of_address_to_resource(dn, 0, &res);
+	if (retval) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto err_dispose2;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	if (!devm_request_mem_region(dev, regs_start, regs_size, DRV_NAME)) {
+		dev_err(dev, "Error requesting memory region!\n");
+		retval = -EBUSY;
+		goto err_dispose2;
+	}
+
+	mdma->regs = devm_ioremap(dev, regs_start, regs_size);
+	if (!mdma->regs) {
+		dev_err(dev, "Error mapping memory region!\n");
+		retval = -ENOMEM;
+		goto err_dispose2;
+	}
+
+	mdma->tcd = (struct mpc_dma_tcd *)((u8 *)(mdma->regs)
+							+ MPC_DMA_TCD_OFFSET);
+
+	retval = request_irq(mdma->irq, &mpc_dma_irq, 0, DRV_NAME, mdma);
+	if (retval) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		retval = -EINVAL;
+		goto err_dispose2;
+	}
+
+	if (mdma->is_mpc8308) {
+		retval = request_irq(mdma->irq2, &mpc_dma_irq, 0,
+							DRV_NAME, mdma);
+		if (retval) {
+			dev_err(dev, "Error requesting IRQ2!\n");
+			retval = -EINVAL;
+			goto err_free1;
+		}
+	}
+
+	spin_lock_init(&mdma->error_status_lock);
+
+	dma = &mdma->dma;
+	dma->dev = dev;
+	dma->device_alloc_chan_resources = mpc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = mpc_dma_free_chan_resources;
+	dma->device_issue_pending = mpc_dma_issue_pending;
+	dma->device_tx_status = mpc_dma_tx_status;
+	dma->device_prep_dma_memcpy = mpc_dma_prep_memcpy;
+	dma->device_prep_slave_sg = mpc_dma_prep_slave_sg;
+	dma->device_config = mpc_dma_device_config;
+	dma->device_terminate_all = mpc_dma_device_terminate_all;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+
+	if (mdma->is_mpc8308)
+		chancnt = MPC8308_DMACHAN_MAX;
+	else
+		chancnt = MPC512x_DMACHAN_MAX;
+
+	for (i = 0; i < chancnt; i++) {
+		mchan = &mdma->channels[i];
+
+		mchan->chan.device = dma;
+		dma_cookie_init(&mchan->chan);
+
+		INIT_LIST_HEAD(&mchan->free);
+		INIT_LIST_HEAD(&mchan->prepared);
+		INIT_LIST_HEAD(&mchan->queued);
+		INIT_LIST_HEAD(&mchan->active);
+		INIT_LIST_HEAD(&mchan->completed);
+
+		spin_lock_init(&mchan->lock);
+		list_add_tail(&mchan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&mdma->tasklet, mpc_dma_tasklet, (unsigned long)mdma);
+
+	/*
+	 * Configure DMA Engine:
+	 * - Dynamic clock,
+	 * - Round-robin group arbitration,
+	 * - Round-robin channel arbitration.
+	 */
+	if (mdma->is_mpc8308) {
+		/* MPC8308 has 16 channels and lacks some registers */
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_ERCA);
+
+		/* enable snooping */
+		out_be32(&mdma->regs->dmagpor, MPC_DMA_DMAGPOR_SNOOP_ENABLE);
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmaintl, 0xFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFF);
+	} else {
+		out_be32(&mdma->regs->dmacr, MPC_DMA_DMACR_EDCG |
+						MPC_DMA_DMACR_ERGA |
+						MPC_DMA_DMACR_ERCA);
+
+		/* Disable hardware DMA requests */
+		out_be32(&mdma->regs->dmaerqh, 0);
+		out_be32(&mdma->regs->dmaerql, 0);
+
+		/* Disable error interrupts */
+		out_be32(&mdma->regs->dmaeeih, 0);
+		out_be32(&mdma->regs->dmaeeil, 0);
+
+		/* Clear interrupts status */
+		out_be32(&mdma->regs->dmainth, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaintl, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaerrh, 0xFFFFFFFF);
+		out_be32(&mdma->regs->dmaerrl, 0xFFFFFFFF);
+
+		/* Route interrupts to IPIC */
+		out_be32(&mdma->regs->dmaihsa, 0);
+		out_be32(&mdma->regs->dmailsa, 0);
+	}
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, mdma);
+	retval = dma_async_device_register(dma);
+	if (retval)
+		goto err_free2;
+
+	/* Register with OF helpers for DMA lookups (nonfatal) */
+	if (dev->of_node) {
+		retval = of_dma_controller_register(dev->of_node,
+						of_dma_xlate_by_chan_id, mdma);
+		if (retval)
+			dev_warn(dev, "Could not register for OF lookup\n");
+	}
+
+	return 0;
+
+err_free2:
+	if (mdma->is_mpc8308)
+		free_irq(mdma->irq2, mdma);
+err_free1:
+	free_irq(mdma->irq, mdma);
+err_dispose2:
+	if (mdma->is_mpc8308)
+		irq_dispose_mapping(mdma->irq2);
+err_dispose1:
+	irq_dispose_mapping(mdma->irq);
+err:
+	return retval;
+}
+
+static int mpc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct mpc_dma *mdma = dev_get_drvdata(dev);
+
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
+	dma_async_device_unregister(&mdma->dma);
+	if (mdma->is_mpc8308) {
+		free_irq(mdma->irq2, mdma);
+		irq_dispose_mapping(mdma->irq2);
+	}
+	free_irq(mdma->irq, mdma);
+	irq_dispose_mapping(mdma->irq);
+	tasklet_kill(&mdma->tasklet);
+
+	return 0;
+}
+
+static const struct of_device_id mpc_dma_match[] = {
+	{ .compatible = "fsl,mpc5121-dma", },
+	{ .compatible = "fsl,mpc8308-dma", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mpc_dma_match);
+
+static struct platform_driver mpc_dma_driver = {
+	.probe		= mpc_dma_probe,
+	.remove		= mpc_dma_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table	= mpc_dma_match,
+	},
+};
+
+module_platform_driver(mpc_dma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Piotr Ziecik <kosmo@semihalf.com>");
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..969534c
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1474 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/cpumask.h>
+#include <linux/platform_data/dma-mv_xor.h>
+
+#include "dmaengine.h"
+#include "mv_xor.h"
+
+enum mv_xor_type {
+	XOR_ORION,
+	XOR_ARMADA_38X,
+	XOR_ARMADA_37XX,
+};
+
+enum mv_xor_mode {
+	XOR_MODE_IN_REG,
+	XOR_MODE_IN_DESC,
+};
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan)		\
+	container_of(chan, struct mv_xor_chan, dmachan)
+
+#define to_mv_xor_slot(tx)		\
+	container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+#define mv_chan_to_devp(chan)           \
+	((chan)->dmadev.dev)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc,
+			 dma_addr_t addr, u32 byte_count,
+			 enum dma_ctrl_flags flags)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+	hw_desc->status = XOR_DESC_DMA_OWNED;
+	hw_desc->phy_next_desc = 0;
+	/* Enable end-of-descriptor interrupts only for DMA_PREP_INTERRUPT */
+	hw_desc->desc_command = (flags & DMA_PREP_INTERRUPT) ?
+				XOR_DESC_EOD_INT_EN : 0;
+	hw_desc->phy_dest_addr = addr;
+	hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+	switch (desc->type) {
+	case DMA_XOR:
+	case DMA_INTERRUPT:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
+		break;
+	case DMA_MEMCPY:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
+		break;
+	default:
+		BUG();
+		return;
+	}
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+				  u32 next_desc_addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	BUG_ON(hw_desc->phy_next_desc);
+	hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+				 int index, dma_addr_t addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_src_addr[mv_phy_src_idx(index)] = addr;
+	if (desc->type == DMA_XOR)
+		hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+	return readl_relaxed(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+					u32 next_desc_addr)
+{
+	writel_relaxed(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+	u32 val = readl_relaxed(XOR_INTR_MASK(chan));
+	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+	writel_relaxed(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+	u32 intr_cause = readl_relaxed(XOR_INTR_CAUSE(chan));
+	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+	return intr_cause;
+}
+
+static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+	u32 val;
+
+	val = XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | XOR_INT_STOPPED;
+	val = ~(val << (chan->idx * 16));
+	dev_dbg(mv_chan_to_devp(chan), "%s, val 0x%08x\n", __func__, val);
+	writel_relaxed(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
+{
+	u32 val = 0xFFFF0000 >> (chan->idx * 16);
+	writel_relaxed(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_chan_set_mode(struct mv_xor_chan *chan,
+			     u32 op_mode)
+{
+	u32 config = readl_relaxed(XOR_CONFIG(chan));
+
+	config &= ~0x7;
+	config |= op_mode;
+
+#if defined(__BIG_ENDIAN)
+	config |= XOR_DESCRIPTOR_SWAP;
+#else
+	config &= ~XOR_DESCRIPTOR_SWAP;
+#endif
+
+	writel_relaxed(config, XOR_CONFIG(chan));
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
+
+	/* writel ensures all descriptors are flushed before activation */
+	writel(BIT(0), XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+	u32 state = readl_relaxed(XOR_ACTIVATION(chan));
+
+	state = (state >> 4) & 0x3;
+
+	return (state == 1) ? 1 : 0;
+}
+
+/*
+ * mv_chan_start_new_chain - program the engine to operate on new
+ * chain headed by sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
+				    struct mv_xor_desc_slot *sw_desc)
+{
+	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
+		__func__, __LINE__, sw_desc);
+
+	/* set the hardware chain */
+	mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+
+	mv_chan->pending++;
+	mv_xor_issue_pending(&mv_chan->dmachan);
+}
+
+static dma_cookie_t
+mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+				struct mv_xor_chan *mv_chan,
+				dma_cookie_t cookie)
+{
+	BUG_ON(desc->async_tx.cookie < 0);
+
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+
+		dma_descriptor_unmap(&desc->async_tx);
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+static int
+mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+
+	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 node) {
+
+		if (async_tx_test_ack(&iter->async_tx)) {
+			list_move_tail(&iter->node, &mv_chan->free_slots);
+			if (!list_empty(&iter->sg_tx_list)) {
+				list_splice_tail_init(&iter->sg_tx_list,
+							&mv_chan->free_slots);
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
+		   struct mv_xor_chan *mv_chan)
+{
+	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
+		__func__, __LINE__, desc, desc->async_tx.flags);
+
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/* move this slot to the completed_slots */
+		list_move_tail(&desc->node, &mv_chan->completed_slots);
+		if (!list_empty(&desc->sg_tx_list)) {
+			list_splice_tail_init(&desc->sg_tx_list,
+					      &mv_chan->completed_slots);
+		}
+	} else {
+		list_move_tail(&desc->node, &mv_chan->free_slots);
+		if (!list_empty(&desc->sg_tx_list)) {
+			list_splice_tail_init(&desc->sg_tx_list,
+					      &mv_chan->free_slots);
+		}
+	}
+
+	return 0;
+}
+
+/* This function must be called with the mv_xor_chan spinlock held */
+static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+	dma_cookie_t cookie = 0;
+	int busy = mv_chan_is_busy(mv_chan);
+	u32 current_desc = mv_chan_get_current_desc(mv_chan);
+	int current_cleaned = 0;
+	struct mv_xor_desc *hw_desc;
+
+	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
+	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
+	mv_chan_clean_completed_slots(mv_chan);
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+				 node) {
+
+		/* clean finished descriptors */
+		hw_desc = iter->hw_desc;
+		if (hw_desc->status & XOR_DESC_SUCCESS) {
+			cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
+								 cookie);
+
+			/* done processing desc, clean slot */
+			mv_desc_clean_slot(iter, mv_chan);
+
+			/* break if we did cleaned the current */
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 1;
+				break;
+			}
+		} else {
+			if (iter->async_tx.phys == current_desc) {
+				current_cleaned = 0;
+				break;
+			}
+		}
+	}
+
+	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+		if (current_cleaned) {
+			/*
+			 * current descriptor cleaned and removed, run
+			 * from list head
+			 */
+			iter = list_entry(mv_chan->chain.next,
+					  struct mv_xor_desc_slot,
+					  node);
+			mv_chan_start_new_chain(mv_chan, iter);
+		} else {
+			if (!list_is_last(&iter->node, &mv_chan->chain)) {
+				/*
+				 * descriptors are still waiting after
+				 * current, trigger them
+				 */
+				iter = list_entry(iter->node.next,
+						  struct mv_xor_desc_slot,
+						  node);
+				mv_chan_start_new_chain(mv_chan, iter);
+			} else {
+				/*
+				 * some descriptors are still waiting
+				 * to be cleaned
+				 */
+				tasklet_schedule(&mv_chan->irq_tasklet);
+			}
+		}
+	}
+
+	if (cookie > 0)
+		mv_chan->dmachan.completed_cookie = cookie;
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+
+	spin_lock_bh(&chan->lock);
+	mv_chan_slot_cleanup(chan);
+	spin_unlock_bh(&chan->lock);
+}
+
+static struct mv_xor_desc_slot *
+mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter;
+
+	spin_lock_bh(&mv_chan->lock);
+
+	if (!list_empty(&mv_chan->free_slots)) {
+		iter = list_first_entry(&mv_chan->free_slots,
+					struct mv_xor_desc_slot,
+					node);
+
+		list_move_tail(&iter->node, &mv_chan->allocated_slots);
+
+		spin_unlock_bh(&mv_chan->lock);
+
+		/* pre-ack descriptor */
+		async_tx_ack(&iter->async_tx);
+		iter->async_tx.cookie = -EBUSY;
+
+		return iter;
+
+	}
+
+	spin_unlock_bh(&mv_chan->lock);
+
+	/* try to free some slots if the allocation fails */
+	tasklet_schedule(&mv_chan->irq_tasklet);
+
+	return NULL;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+	struct mv_xor_desc_slot *old_chain_tail;
+	dma_cookie_t cookie;
+	int new_hw_chain = 1;
+
+	dev_dbg(mv_chan_to_devp(mv_chan),
+		"%s sw_desc %p: async_tx %p\n",
+		__func__, sw_desc, &sw_desc->async_tx);
+
+	spin_lock_bh(&mv_chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&mv_chan->chain))
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
+	else {
+		new_hw_chain = 0;
+
+		old_chain_tail = list_entry(mv_chan->chain.prev,
+					    struct mv_xor_desc_slot,
+					    node);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
+
+		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
+			&old_chain_tail->async_tx.phys);
+
+		/* fix up the hardware chain */
+		mv_desc_set_next_desc(old_chain_tail, sw_desc->async_tx.phys);
+
+		/* if the channel is not busy */
+		if (!mv_chan_is_busy(mv_chan)) {
+			u32 current_desc = mv_chan_get_current_desc(mv_chan);
+			/*
+			 * and the curren desc is the end of the chain before
+			 * the append, then we need to start the channel
+			 */
+			if (current_desc == old_chain_tail->async_tx.phys)
+				new_hw_chain = 1;
+		}
+	}
+
+	if (new_hw_chain)
+		mv_chan_start_new_chain(mv_chan, sw_desc);
+
+	spin_unlock_bh(&mv_chan->lock);
+
+	return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
+{
+	void *virt_desc;
+	dma_addr_t dma_desc;
+	int idx;
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *slot = NULL;
+	int num_descs_in_pool = MV_XOR_POOL_SIZE/MV_XOR_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	idx = mv_chan->slots_allocated;
+	while (idx < num_descs_in_pool) {
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			dev_info(mv_chan_to_devp(mv_chan),
+				 "channel only initialized %d descriptor slots",
+				 idx);
+			break;
+		}
+		virt_desc = mv_chan->dma_desc_pool_virt;
+		slot->hw_desc = virt_desc + idx * MV_XOR_SLOT_SIZE;
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = mv_xor_tx_submit;
+		INIT_LIST_HEAD(&slot->node);
+		INIT_LIST_HEAD(&slot->sg_tx_list);
+		dma_desc = mv_chan->dma_desc_pool;
+		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
+		slot->idx = idx++;
+
+		spin_lock_bh(&mv_chan->lock);
+		mv_chan->slots_allocated = idx;
+		list_add_tail(&slot->node, &mv_chan->free_slots);
+		spin_unlock_bh(&mv_chan->lock);
+	}
+
+	dev_dbg(mv_chan_to_devp(mv_chan),
+		"allocated %d descriptor slots\n",
+		mv_chan->slots_allocated);
+
+	return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+/*
+ * Check if source or destination is an PCIe/IO address (non-SDRAM) and add
+ * a new MBus window if necessary. Use a cache for these check so that
+ * the MMIO mapped registers don't have to be accessed for this check
+ * to speed up this process.
+ */
+static int mv_xor_add_io_win(struct mv_xor_chan *mv_chan, u32 addr)
+{
+	struct mv_xor_device *xordev = mv_chan->xordev;
+	void __iomem *base = mv_chan->mmr_high_base;
+	u32 win_enable;
+	u32 size;
+	u8 target, attr;
+	int ret;
+	int i;
+
+	/* Nothing needs to get done for the Armada 3700 */
+	if (xordev->xor_type == XOR_ARMADA_37XX)
+		return 0;
+
+	/*
+	 * Loop over the cached windows to check, if the requested area
+	 * is already mapped. If this the case, nothing needs to be done
+	 * and we can return.
+	 */
+	for (i = 0; i < WINDOW_COUNT; i++) {
+		if (addr >= xordev->win_start[i] &&
+		    addr <= xordev->win_end[i]) {
+			/* Window is already mapped */
+			return 0;
+		}
+	}
+
+	/*
+	 * The window is not mapped, so we need to create the new mapping
+	 */
+
+	/* If no IO window is found that addr has to be located in SDRAM */
+	ret = mvebu_mbus_get_io_win_info(addr, &size, &target, &attr);
+	if (ret < 0)
+		return 0;
+
+	/*
+	 * Mask the base addr 'addr' according to 'size' read back from the
+	 * MBus window. Otherwise we might end up with an address located
+	 * somewhere in the middle of this area here.
+	 */
+	size -= 1;
+	addr &= ~size;
+
+	/*
+	 * Reading one of both enabled register is enough, as they are always
+	 * programmed to the identical values
+	 */
+	win_enable = readl(base + WINDOW_BAR_ENABLE(0));
+
+	/* Set 'i' to the first free window to write the new values to */
+	i = ffs(~win_enable) - 1;
+	if (i >= WINDOW_COUNT)
+		return -ENOMEM;
+
+	writel((addr & 0xffff0000) | (attr << 8) | target,
+	       base + WINDOW_BASE(i));
+	writel(size & 0xffff0000, base + WINDOW_SIZE(i));
+
+	/* Fill the caching variables for later use */
+	xordev->win_start[i] = addr;
+	xordev->win_end[i] = addr + size;
+
+	win_enable |= (1 << i);
+	win_enable |= 3 << (16 + (2 * i));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc;
+	int ret;
+
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(len > MV_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(mv_chan_to_devp(mv_chan),
+		"%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
+		__func__, src_cnt, len, &dest, flags);
+
+	/* Check if a new window needs to get added for 'dest' */
+	ret = mv_xor_add_io_win(mv_chan, dest);
+	if (ret)
+		return NULL;
+
+	sw_desc = mv_chan_alloc_slot(mv_chan);
+	if (sw_desc) {
+		sw_desc->type = DMA_XOR;
+		sw_desc->async_tx.flags = flags;
+		mv_desc_init(sw_desc, dest, len, flags);
+		if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+			mv_desc_set_mode(sw_desc);
+		while (src_cnt--) {
+			/* Check if a new window needs to get added for 'src' */
+			ret = mv_xor_add_io_win(mv_chan, src[src_cnt]);
+			if (ret)
+				return NULL;
+			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
+		}
+	}
+
+	dev_dbg(mv_chan_to_devp(mv_chan),
+		"%s sw_desc %p async_tx %p \n",
+		__func__, sw_desc, &sw_desc->async_tx);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	/*
+	 * A MEMCPY operation is identical to an XOR operation with only
+	 * a single source address.
+	 */
+	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	dma_addr_t src, dest;
+	size_t len;
+
+	src = mv_chan->dummy_src_addr;
+	dest = mv_chan->dummy_dst_addr;
+	len = MV_XOR_MIN_BYTE_COUNT;
+
+	/*
+	 * We implement the DMA_INTERRUPT operation as a minimum sized
+	 * XOR operation with a single dummy source address.
+	 */
+	return mv_xor_prep_dma_xor(chan, dest, &src, 1, len, flags);
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	spin_lock_bh(&mv_chan->lock);
+
+	mv_chan_slot_cleanup(mv_chan);
+
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+					node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
+				 node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &mv_chan->free_slots, node) {
+		list_del(&iter->node);
+		kfree(iter);
+		mv_chan->slots_allocated--;
+	}
+
+	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
+		__func__, mv_chan->slots_allocated);
+	spin_unlock_bh(&mv_chan->lock);
+
+	if (in_use_descs)
+		dev_err(mv_chan_to_devp(mv_chan),
+			"freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_status - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ * @txstate: XOR transactions state holder (or NULL)
+ */
+static enum dma_status mv_xor_status(struct dma_chan *chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_bh(&mv_chan->lock);
+	mv_chan_slot_cleanup(mv_chan);
+	spin_unlock_bh(&mv_chan->lock);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void mv_chan_dump_regs(struct mv_xor_chan *chan)
+{
+	u32 val;
+
+	val = readl_relaxed(XOR_CONFIG(chan));
+	dev_err(mv_chan_to_devp(chan), "config       0x%08x\n", val);
+
+	val = readl_relaxed(XOR_ACTIVATION(chan));
+	dev_err(mv_chan_to_devp(chan), "activation   0x%08x\n", val);
+
+	val = readl_relaxed(XOR_INTR_CAUSE(chan));
+	dev_err(mv_chan_to_devp(chan), "intr cause   0x%08x\n", val);
+
+	val = readl_relaxed(XOR_INTR_MASK(chan));
+	dev_err(mv_chan_to_devp(chan), "intr mask    0x%08x\n", val);
+
+	val = readl_relaxed(XOR_ERROR_CAUSE(chan));
+	dev_err(mv_chan_to_devp(chan), "error cause  0x%08x\n", val);
+
+	val = readl_relaxed(XOR_ERROR_ADDR(chan));
+	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
+}
+
+static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
+					  u32 intr_cause)
+{
+	if (intr_cause & XOR_INT_ERR_DECODE) {
+		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
+		return;
+	}
+
+	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
+		chan->idx, intr_cause);
+
+	mv_chan_dump_regs(chan);
+	WARN_ON(1);
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+	struct mv_xor_chan *chan = data;
+	u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
+
+	if (intr_cause & XOR_INTR_ERRORS)
+		mv_chan_err_interrupt_handler(chan, intr_cause);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	mv_chan_clear_eoc_cause(chan);
+
+	return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+	if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+		mv_chan->pending = 0;
+		mv_chan_activate(mv_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+
+static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
+{
+	int i, ret;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	struct dmaengine_unmap_data *unmap;
+	int err = 0;
+
+	src = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	dest = kzalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < PAGE_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	dma_chan = &mv_chan->dmachan;
+	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, 2, GFP_KERNEL);
+	if (!unmap) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+
+	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
+			       offset_in_page(src), PAGE_SIZE,
+			       DMA_TO_DEVICE);
+	unmap->addr[0] = src_dma;
+
+	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
+	if (ret) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+	unmap->to_cnt = 1;
+
+	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
+				offset_in_page(dest), PAGE_SIZE,
+				DMA_FROM_DEVICE);
+	unmap->addr[1] = dest_dma;
+
+	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
+	if (ret) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+	unmap->from_cnt = 1;
+	unmap->len = PAGE_SIZE;
+
+	tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				    PAGE_SIZE, 0);
+	if (!tx) {
+		dev_err(dma_chan->device->dev,
+			"Self-test cannot prepare operation, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	cookie = mv_xor_tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(dma_chan->device->dev,
+			"Self-test submit error, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(1);
+
+	if (mv_xor_status(dma_chan, cookie, NULL) !=
+	    DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, PAGE_SIZE)) {
+		dev_err(dma_chan->device->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	dmaengine_unmap_put(unmap);
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int
+mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
+{
+	int i, src_idx, ret;
+	struct page *dest;
+	struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dmaengine_unmap_data *unmap;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	int err = 0;
+	int src_count = MV_XOR_NUM_SRC_TEST;
+
+	for (src_idx = 0; src_idx < src_count; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < src_count; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < src_count; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+		(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = &mv_chan->dmachan;
+	if (mv_xor_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	unmap = dmaengine_get_unmap_data(dma_chan->device->dev, src_count + 1,
+					 GFP_KERNEL);
+	if (!unmap) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+
+	/* test xor */
+	for (i = 0; i < src_count; i++) {
+		unmap->addr[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					      0, PAGE_SIZE, DMA_TO_DEVICE);
+		dma_srcs[i] = unmap->addr[i];
+		ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[i]);
+		if (ret) {
+			err = -ENOMEM;
+			goto free_resources;
+		}
+		unmap->to_cnt++;
+	}
+
+	unmap->addr[src_count] = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+				      DMA_FROM_DEVICE);
+	dest_dma = unmap->addr[src_count];
+	ret = dma_mapping_error(dma_chan->device->dev, unmap->addr[src_count]);
+	if (ret) {
+		err = -ENOMEM;
+		goto free_resources;
+	}
+	unmap->from_cnt = 1;
+	unmap->len = PAGE_SIZE;
+
+	tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				 src_count, PAGE_SIZE, 0);
+	if (!tx) {
+		dev_err(dma_chan->device->dev,
+			"Self-test cannot prepare operation, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	cookie = mv_xor_tx_submit(tx);
+	if (dma_submit_error(cookie)) {
+		dev_err(dma_chan->device->dev,
+			"Self-test submit error, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(8);
+
+	if (mv_xor_status(dma_chan, cookie, NULL) !=
+	    DMA_COMPLETE) {
+		dev_err(dma_chan->device->dev,
+			"Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	dma_sync_single_for_cpu(dma_chan->device->dev, dest_dma,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_err(dma_chan->device->dev,
+				"Self-test xor failed compare, disabling. index %d, data %x, expected %x\n",
+				i, ptr[i], cmp_word);
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+free_resources:
+	dmaengine_unmap_put(unmap);
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	src_idx = src_count;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
+{
+	struct dma_chan *chan, *_chan;
+	struct device *dev = mv_chan->dmadev.dev;
+
+	dma_async_device_unregister(&mv_chan->dmadev);
+
+	dma_free_coherent(dev, MV_XOR_POOL_SIZE,
+			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
+	dma_unmap_single(dev, mv_chan->dummy_src_addr,
+			 MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
+	dma_unmap_single(dev, mv_chan->dummy_dst_addr,
+			 MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
+
+	list_for_each_entry_safe(chan, _chan, &mv_chan->dmadev.channels,
+				 device_node) {
+		list_del(&chan->device_node);
+	}
+
+	free_irq(mv_chan->irq, mv_chan);
+
+	return 0;
+}
+
+static struct mv_xor_chan *
+mv_xor_channel_add(struct mv_xor_device *xordev,
+		   struct platform_device *pdev,
+		   int idx, dma_cap_mask_t cap_mask, int irq)
+{
+	int ret = 0;
+	struct mv_xor_chan *mv_chan;
+	struct dma_device *dma_dev;
+
+	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+	if (!mv_chan)
+		return ERR_PTR(-ENOMEM);
+
+	mv_chan->idx = idx;
+	mv_chan->irq = irq;
+	if (xordev->xor_type == XOR_ORION)
+		mv_chan->op_in_desc = XOR_MODE_IN_REG;
+	else
+		mv_chan->op_in_desc = XOR_MODE_IN_DESC;
+
+	dma_dev = &mv_chan->dmadev;
+	mv_chan->xordev = xordev;
+
+	/*
+	 * These source and destination dummy buffers are used to implement
+	 * a DMA_INTERRUPT operation as a minimum-sized XOR operation.
+	 * Hence, we only need to map the buffers at initialization-time.
+	 */
+	mv_chan->dummy_src_addr = dma_map_single(dma_dev->dev,
+		mv_chan->dummy_src, MV_XOR_MIN_BYTE_COUNT, DMA_FROM_DEVICE);
+	mv_chan->dummy_dst_addr = dma_map_single(dma_dev->dev,
+		mv_chan->dummy_dst, MV_XOR_MIN_BYTE_COUNT, DMA_TO_DEVICE);
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	mv_chan->dma_desc_pool_virt =
+	  dma_alloc_wc(&pdev->dev, MV_XOR_POOL_SIZE, &mv_chan->dma_desc_pool,
+		       GFP_KERNEL);
+	if (!mv_chan->dma_desc_pool_virt)
+		return ERR_PTR(-ENOMEM);
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = cap_mask;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+	dma_dev->device_tx_status = mv_xor_status;
+	dma_dev->device_issue_pending = mv_xor_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_interrupt = mv_xor_prep_dma_interrupt;
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = 8;
+		dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+	}
+
+	mv_chan->mmr_base = xordev->xor_base;
+	mv_chan->mmr_high_base = xordev->xor_high_base;
+	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+		     mv_chan);
+
+	/* clear errors before enabling interrupts */
+	mv_chan_clear_err_status(mv_chan);
+
+	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
+			  0, dev_name(&pdev->dev), mv_chan);
+	if (ret)
+		goto err_free_dma;
+
+	mv_chan_unmask_interrupts(mv_chan);
+
+	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_IN_DESC);
+	else
+		mv_chan_set_mode(mv_chan, XOR_OPERATION_MODE_XOR);
+
+	spin_lock_init(&mv_chan->lock);
+	INIT_LIST_HEAD(&mv_chan->chain);
+	INIT_LIST_HEAD(&mv_chan->completed_slots);
+	INIT_LIST_HEAD(&mv_chan->free_slots);
+	INIT_LIST_HEAD(&mv_chan->allocated_slots);
+	mv_chan->dmachan.device = dma_dev;
+	dma_cookie_init(&mv_chan->dmachan);
+
+	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = mv_chan_memcpy_self_test(mv_chan);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_irq;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		ret = mv_chan_xor_self_test(mv_chan);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_irq;
+	}
+
+	dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n",
+		 mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
+		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	return mv_chan;
+
+err_free_irq:
+	free_irq(mv_chan->irq, mv_chan);
+err_free_dma:
+	dma_free_coherent(&pdev->dev, MV_XOR_POOL_SIZE,
+			  mv_chan->dma_desc_pool_virt, mv_chan->dma_desc_pool);
+	return ERR_PTR(ret);
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
+			 const struct mbus_dram_target_info *dram)
+{
+	void __iomem *base = xordev->xor_high_base;
+	u32 win_enable = 0;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+
+	for (i = 0; i < dram->num_cs; i++) {
+		const struct mbus_dram_window *cs = dram->cs + i;
+
+		writel((cs->base & 0xffff0000) |
+		       (cs->mbus_attr << 8) |
+		       dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+		/* Fill the caching variables for later use */
+		xordev->win_start[i] = cs->base;
+		xordev->win_end[i] = cs->base + cs->size - 1;
+
+		win_enable |= (1 << i);
+		win_enable |= 3 << (16 + (2 * i));
+	}
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
+}
+
+static void
+mv_xor_conf_mbus_windows_a3700(struct mv_xor_device *xordev)
+{
+	void __iomem *base = xordev->xor_high_base;
+	u32 win_enable = 0;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+	/*
+	 * For Armada3700 open default 4GB Mbus window. The dram
+	 * related configuration are done at AXIS level.
+	 */
+	writel(0xffff0000, base + WINDOW_SIZE(0));
+	win_enable |= 1;
+	win_enable |= 3 << 16;
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(0));
+	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
+}
+
+/*
+ * Since this XOR driver is basically used only for RAID5, we don't
+ * need to care about synchronizing ->suspend with DMA activity,
+ * because the DMA engine will naturally be quiet due to the block
+ * devices being suspended.
+ */
+static int mv_xor_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct mv_xor_device *xordev = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+		if (!mv_chan)
+			continue;
+
+		mv_chan->saved_config_reg =
+			readl_relaxed(XOR_CONFIG(mv_chan));
+		mv_chan->saved_int_mask_reg =
+			readl_relaxed(XOR_INTR_MASK(mv_chan));
+	}
+
+	return 0;
+}
+
+static int mv_xor_resume(struct platform_device *dev)
+{
+	struct mv_xor_device *xordev = platform_get_drvdata(dev);
+	const struct mbus_dram_target_info *dram;
+	int i;
+
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		struct mv_xor_chan *mv_chan = xordev->channels[i];
+
+		if (!mv_chan)
+			continue;
+
+		writel_relaxed(mv_chan->saved_config_reg,
+			       XOR_CONFIG(mv_chan));
+		writel_relaxed(mv_chan->saved_int_mask_reg,
+			       XOR_INTR_MASK(mv_chan));
+	}
+
+	if (xordev->xor_type == XOR_ARMADA_37XX) {
+		mv_xor_conf_mbus_windows_a3700(xordev);
+		return 0;
+	}
+
+	dram = mv_mbus_dram_info();
+	if (dram)
+		mv_xor_conf_mbus_windows(xordev, dram);
+
+	return 0;
+}
+
+static const struct of_device_id mv_xor_dt_ids[] = {
+	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_ORION },
+	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_ARMADA_38X },
+	{ .compatible = "marvell,armada-3700-xor", .data = (void *)XOR_ARMADA_37XX },
+	{},
+};
+
+static unsigned int mv_xor_engine_count;
+
+static int mv_xor_probe(struct platform_device *pdev)
+{
+	const struct mbus_dram_target_info *dram;
+	struct mv_xor_device *xordev;
+	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct resource *res;
+	unsigned int max_engines, max_channels;
+	int i, ret;
+
+	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
+
+	xordev = devm_kzalloc(&pdev->dev, sizeof(*xordev), GFP_KERNEL);
+	if (!xordev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	xordev->xor_base = devm_ioremap(&pdev->dev, res->start,
+					resource_size(res));
+	if (!xordev->xor_base)
+		return -EBUSY;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	xordev->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+					     resource_size(res));
+	if (!xordev->xor_high_base)
+		return -EBUSY;
+
+	platform_set_drvdata(pdev, xordev);
+
+
+	/*
+	 * We need to know which type of XOR device we use before
+	 * setting up. In non-dt case it can only be the legacy one.
+	 */
+	xordev->xor_type = XOR_ORION;
+	if (pdev->dev.of_node) {
+		const struct of_device_id *of_id =
+			of_match_device(mv_xor_dt_ids,
+					&pdev->dev);
+
+		xordev->xor_type = (uintptr_t)of_id->data;
+	}
+
+	/*
+	 * (Re-)program MBUS remapping windows if we are asked to.
+	 */
+	if (xordev->xor_type == XOR_ARMADA_37XX) {
+		mv_xor_conf_mbus_windows_a3700(xordev);
+	} else {
+		dram = mv_mbus_dram_info();
+		if (dram)
+			mv_xor_conf_mbus_windows(xordev, dram);
+	}
+
+	/* Not all platforms can gate the clock, so it is not
+	 * an error if the clock does not exists.
+	 */
+	xordev->clk = clk_get(&pdev->dev, NULL);
+	if (!IS_ERR(xordev->clk))
+		clk_prepare_enable(xordev->clk);
+
+	/*
+	 * We don't want to have more than one channel per CPU in
+	 * order for async_tx to perform well. So we limit the number
+	 * of engines and channels so that we take into account this
+	 * constraint. Note that we also want to use channels from
+	 * separate engines when possible.  For dual-CPU Armada 3700
+	 * SoC with single XOR engine allow using its both channels.
+	 */
+	max_engines = num_present_cpus();
+	if (xordev->xor_type == XOR_ARMADA_37XX)
+		max_channels =	num_present_cpus();
+	else
+		max_channels = min_t(unsigned int,
+				     MV_XOR_MAX_CHANNELS,
+				     DIV_ROUND_UP(num_present_cpus(), 2));
+
+	if (mv_xor_engine_count >= max_engines)
+		return 0;
+
+	if (pdev->dev.of_node) {
+		struct device_node *np;
+		int i = 0;
+
+		for_each_child_of_node(pdev->dev.of_node, np) {
+			struct mv_xor_chan *chan;
+			dma_cap_mask_t cap_mask;
+			int irq;
+
+			if (i >= max_channels)
+				continue;
+
+			dma_cap_zero(cap_mask);
+			dma_cap_set(DMA_MEMCPY, cap_mask);
+			dma_cap_set(DMA_XOR, cap_mask);
+			dma_cap_set(DMA_INTERRUPT, cap_mask);
+
+			irq = irq_of_parse_and_map(np, 0);
+			if (!irq) {
+				ret = -ENODEV;
+				goto err_channel_add;
+			}
+
+			chan = mv_xor_channel_add(xordev, pdev, i,
+						  cap_mask, irq);
+			if (IS_ERR(chan)) {
+				ret = PTR_ERR(chan);
+				irq_dispose_mapping(irq);
+				goto err_channel_add;
+			}
+
+			xordev->channels[i] = chan;
+			i++;
+		}
+	} else if (pdata && pdata->channels) {
+		for (i = 0; i < max_channels; i++) {
+			struct mv_xor_channel_data *cd;
+			struct mv_xor_chan *chan;
+			int irq;
+
+			cd = &pdata->channels[i];
+			irq = platform_get_irq(pdev, i);
+			if (irq < 0) {
+				ret = irq;
+				goto err_channel_add;
+			}
+
+			chan = mv_xor_channel_add(xordev, pdev, i,
+						  cd->cap_mask, irq);
+			if (IS_ERR(chan)) {
+				ret = PTR_ERR(chan);
+				goto err_channel_add;
+			}
+
+			xordev->channels[i] = chan;
+		}
+	}
+
+	return 0;
+
+err_channel_add:
+	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++)
+		if (xordev->channels[i]) {
+			mv_xor_channel_remove(xordev->channels[i]);
+			if (pdev->dev.of_node)
+				irq_dispose_mapping(xordev->channels[i]->irq);
+		}
+
+	if (!IS_ERR(xordev->clk)) {
+		clk_disable_unprepare(xordev->clk);
+		clk_put(xordev->clk);
+	}
+
+	return ret;
+}
+
+static struct platform_driver mv_xor_driver = {
+	.probe		= mv_xor_probe,
+	.suspend        = mv_xor_suspend,
+	.resume         = mv_xor_resume,
+	.driver		= {
+		.name	        = MV_XOR_NAME,
+		.of_match_table = of_match_ptr(mv_xor_dt_ids),
+	},
+};
+
+builtin_platform_driver(mv_xor_driver);
+
+/*
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
+*/
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..cf921dd
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define MV_XOR_POOL_SIZE		(MV_XOR_SLOT_SIZE * 3072)
+#define MV_XOR_SLOT_SIZE		64
+#define MV_XOR_THRESHOLD		1
+#define MV_XOR_MAX_CHANNELS             2
+
+#define MV_XOR_MIN_BYTE_COUNT		SZ_128
+#define MV_XOR_MAX_BYTE_COUNT		(SZ_16M - 1)
+
+/* Values for the XOR_CONFIG register */
+#define XOR_OPERATION_MODE_XOR		0
+#define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_IN_DESC      7
+#define XOR_DESCRIPTOR_SWAP		BIT(14)
+#define XOR_DESC_SUCCESS		0x40000000
+
+#define XOR_DESC_OPERATION_XOR          (0 << 24)
+#define XOR_DESC_OPERATION_CRC32C       (1 << 24)
+#define XOR_DESC_OPERATION_MEMCPY       (2 << 24)
+
+#define XOR_DESC_DMA_OWNED		BIT(31)
+#define XOR_DESC_EOD_INT_EN		BIT(31)
+
+#define XOR_CURR_DESC(chan)	(chan->mmr_high_base + 0x10 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)	(chan->mmr_high_base + 0x00 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)	(chan->mmr_high_base + 0x20 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan)	(chan->mmr_high_base + 0xB0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)	(chan->mmr_high_base + 0xC0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_high_base + 0xE0)
+#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_high_base + 0xE4)
+
+#define XOR_CONFIG(chan)	(chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan)	(chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan)	(chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan)	(chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan)	(chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan)	(chan->mmr_base + 0x60)
+
+#define XOR_INT_END_OF_DESC	BIT(0)
+#define XOR_INT_END_OF_CHAIN	BIT(1)
+#define XOR_INT_STOPPED		BIT(2)
+#define XOR_INT_PAUSED		BIT(3)
+#define XOR_INT_ERR_DECODE	BIT(4)
+#define XOR_INT_ERR_RDPROT	BIT(5)
+#define XOR_INT_ERR_WRPROT	BIT(6)
+#define XOR_INT_ERR_OWN		BIT(7)
+#define XOR_INT_ERR_PAR		BIT(8)
+#define XOR_INT_ERR_MBUS	BIT(9)
+
+#define XOR_INTR_ERRORS		(XOR_INT_ERR_DECODE | XOR_INT_ERR_RDPROT | \
+				 XOR_INT_ERR_WRPROT | XOR_INT_ERR_OWN    | \
+				 XOR_INT_ERR_PAR    | XOR_INT_ERR_MBUS)
+
+#define XOR_INTR_MASK_VALUE	(XOR_INT_END_OF_DESC | XOR_INT_END_OF_CHAIN | \
+				 XOR_INT_STOPPED     | XOR_INTR_ERRORS)
+
+#define WINDOW_BASE(w)		(0x50 + ((w) << 2))
+#define WINDOW_SIZE(w)		(0x70 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)	(0x90 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)	(0x40 + ((chan) << 2))
+#define WINDOW_OVERRIDE_CTRL(chan)	(0xA0 + ((chan) << 2))
+
+#define WINDOW_COUNT		8
+
+struct mv_xor_device {
+	void __iomem	     *xor_base;
+	void __iomem	     *xor_high_base;
+	struct clk	     *clk;
+	struct mv_xor_chan   *channels[MV_XOR_MAX_CHANNELS];
+	int		     xor_type;
+
+	u32                  win_start[WINDOW_COUNT];
+	u32                  win_end[WINDOW_COUNT];
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @free_slots: free slots usable by the channel
+ * @allocated_slots: slots allocated by the driver
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ * @op_in_desc: new mode of driver, each op is writen to descriptor.
+ */
+struct mv_xor_chan {
+	int			pending;
+	spinlock_t		lock; /* protects the descriptor slot pool */
+	void __iomem		*mmr_base;
+	void __iomem		*mmr_high_base;
+	unsigned int		idx;
+	int                     irq;
+	struct list_head	chain;
+	struct list_head	free_slots;
+	struct list_head	allocated_slots;
+	struct list_head	completed_slots;
+	dma_addr_t		dma_desc_pool;
+	void			*dma_desc_pool_virt;
+	size_t                  pool_size;
+	struct dma_device	dmadev;
+	struct dma_chan		dmachan;
+	int			slots_allocated;
+	struct tasklet_struct	irq_tasklet;
+	int                     op_in_desc;
+	char			dummy_src[MV_XOR_MIN_BYTE_COUNT];
+	char			dummy_dst[MV_XOR_MIN_BYTE_COUNT];
+	dma_addr_t		dummy_src_addr, dummy_dst_addr;
+	u32                     saved_config_reg, saved_int_mask_reg;
+
+	struct mv_xor_device	*xordev;
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @node: node on the mv_xor_chan lists
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @slot_used: slot in use or not
+ * @idx: pool index
+ * @tx_list: list of slots that make up a multi-descriptor transaction
+ * @async_tx: support for the async_tx api
+ */
+struct mv_xor_desc_slot {
+	struct list_head	node;
+	struct list_head	sg_tx_list;
+	enum dma_transaction_type	type;
+	void			*hw_desc;
+	u16			idx;
+	struct dma_async_tx_descriptor	async_tx;
+};
+
+/*
+ * This structure describes XOR descriptor size 64bytes. The
+ * mv_phy_src_idx() macro must be used when indexing the values of the
+ * phy_src_addr[] array. This is due to the fact that the 'descriptor
+ * swap' feature, used on big endian systems, swaps descriptors data
+ * within blocks of 8 bytes. So two consecutive values of the
+ * phy_src_addr[] array are actually swapped in big-endian, which
+ * explains the different mv_phy_src_idx() implementation.
+ */
+#if defined(__LITTLE_ENDIAN)
+struct mv_xor_desc {
+	u32 status;		/* descriptor execution status */
+	u32 crc32_result;	/* result of CRC-32 calculation */
+	u32 desc_command;	/* type of operation to be carried out */
+	u32 phy_next_desc;	/* next descriptor address pointer */
+	u32 byte_count;		/* size of src/dst blocks in bytes */
+	u32 phy_dest_addr;	/* destination block address */
+	u32 phy_src_addr[8];	/* source block addresses */
+	u32 reserved0;
+	u32 reserved1;
+};
+#define mv_phy_src_idx(src_idx) (src_idx)
+#else
+struct mv_xor_desc {
+	u32 crc32_result;	/* result of CRC-32 calculation */
+	u32 status;		/* descriptor execution status */
+	u32 phy_next_desc;	/* next descriptor address pointer */
+	u32 desc_command;	/* type of operation to be carried out */
+	u32 phy_dest_addr;	/* destination block address */
+	u32 byte_count;		/* size of src/dst blocks in bytes */
+	u32 phy_src_addr[8];	/* source block addresses */
+	u32 reserved1;
+	u32 reserved0;
+};
+#define mv_phy_src_idx(src_idx) (src_idx ^ 1)
+#endif
+
+#define to_mv_sw_desc(addr_hw_desc)		\
+	container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx)	\
+	((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#endif
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
new file mode 100644
index 0000000..8dc0aa4
--- /dev/null
+++ b/drivers/dma/mv_xor_v2.c
@@ -0,0 +1,933 @@
+/*
+ * Copyright (C) 2015-2016 Marvell International Ltd.
+
+ * This program is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 2 of the
+ * License, or any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+
+#include "dmaengine.h"
+
+/* DMA Engine Registers */
+#define MV_XOR_V2_DMA_DESQ_BALR_OFF			0x000
+#define MV_XOR_V2_DMA_DESQ_BAHR_OFF			0x004
+#define MV_XOR_V2_DMA_DESQ_SIZE_OFF			0x008
+#define MV_XOR_V2_DMA_DESQ_DONE_OFF			0x00C
+#define   MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK		0x7FFF
+#define   MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT		0
+#define   MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK		0x1FFF
+#define   MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT	16
+#define MV_XOR_V2_DMA_DESQ_ARATTR_OFF			0x010
+#define   MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK		0x3F3F
+#define   MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE	0x202
+#define   MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE		0x3C3C
+#define MV_XOR_V2_DMA_IMSG_CDAT_OFF			0x014
+#define MV_XOR_V2_DMA_IMSG_THRD_OFF			0x018
+#define   MV_XOR_V2_DMA_IMSG_THRD_MASK			0x7FFF
+#define   MV_XOR_V2_DMA_IMSG_THRD_SHIFT			0x0
+#define   MV_XOR_V2_DMA_IMSG_TIMER_EN			BIT(18)
+#define MV_XOR_V2_DMA_DESQ_AWATTR_OFF			0x01C
+  /* Same flags as MV_XOR_V2_DMA_DESQ_ARATTR_OFF */
+#define MV_XOR_V2_DMA_DESQ_ALLOC_OFF			0x04C
+#define   MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_MASK		0xFFFF
+#define   MV_XOR_V2_DMA_DESQ_ALLOC_WRPTR_SHIFT		16
+#define MV_XOR_V2_DMA_IMSG_BALR_OFF			0x050
+#define MV_XOR_V2_DMA_IMSG_BAHR_OFF			0x054
+#define MV_XOR_V2_DMA_DESQ_CTRL_OFF			0x100
+#define	  MV_XOR_V2_DMA_DESQ_CTRL_32B			1
+#define   MV_XOR_V2_DMA_DESQ_CTRL_128B			7
+#define MV_XOR_V2_DMA_DESQ_STOP_OFF			0x800
+#define MV_XOR_V2_DMA_DESQ_DEALLOC_OFF			0x804
+#define MV_XOR_V2_DMA_DESQ_ADD_OFF			0x808
+#define MV_XOR_V2_DMA_IMSG_TMOT				0x810
+#define   MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK		0x1FFF
+#define   MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT		0
+
+/* XOR Global registers */
+#define MV_XOR_V2_GLOB_BW_CTRL				0x4
+#define   MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT	0
+#define   MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL	64
+#define   MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT	8
+#define   MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL	8
+#define   MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT	12
+#define   MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL	4
+#define   MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT	16
+#define	  MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL	4
+#define MV_XOR_V2_GLOB_PAUSE				0x014
+#define   MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL		0x8
+#define MV_XOR_V2_GLOB_SYS_INT_CAUSE			0x200
+#define MV_XOR_V2_GLOB_SYS_INT_MASK			0x204
+#define MV_XOR_V2_GLOB_MEM_INT_CAUSE			0x220
+#define MV_XOR_V2_GLOB_MEM_INT_MASK			0x224
+
+#define MV_XOR_V2_MIN_DESC_SIZE				32
+#define MV_XOR_V2_EXT_DESC_SIZE				128
+
+#define MV_XOR_V2_DESC_RESERVED_SIZE			12
+#define MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE			12
+
+#define MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF		8
+
+/*
+ * Descriptors queue size. With 32 bytes descriptors, up to 2^14
+ * descriptors are allowed, with 128 bytes descriptors, up to 2^12
+ * descriptors are allowed. This driver uses 128 bytes descriptors,
+ * but experimentation has shown that a set of 1024 descriptors is
+ * sufficient to reach a good level of performance.
+ */
+#define MV_XOR_V2_DESC_NUM				1024
+
+/*
+ * Threshold values for descriptors and timeout, determined by
+ * experimentation as giving a good level of performance.
+ */
+#define MV_XOR_V2_DONE_IMSG_THRD  0x14
+#define MV_XOR_V2_TIMER_THRD      0xB0
+
+/**
+ * struct mv_xor_v2_descriptor - DMA HW descriptor
+ * @desc_id: used by S/W and is not affected by H/W.
+ * @flags: error and status flags
+ * @crc32_result: CRC32 calculation result
+ * @desc_ctrl: operation mode and control flags
+ * @buff_size: amount of bytes to be processed
+ * @fill_pattern_src_addr: Fill-Pattern or Source-Address and
+ * AW-Attributes
+ * @data_buff_addr: Source (and might be RAID6 destination)
+ * addresses of data buffers in RAID5 and RAID6
+ * @reserved: reserved
+ */
+struct mv_xor_v2_descriptor {
+	u16 desc_id;
+	u16 flags;
+	u32 crc32_result;
+	u32 desc_ctrl;
+
+	/* Definitions for desc_ctrl */
+#define DESC_NUM_ACTIVE_D_BUF_SHIFT	22
+#define DESC_OP_MODE_SHIFT		28
+#define DESC_OP_MODE_NOP		0	/* Idle operation */
+#define DESC_OP_MODE_MEMCPY		1	/* Pure-DMA operation */
+#define DESC_OP_MODE_MEMSET		2	/* Mem-Fill operation */
+#define DESC_OP_MODE_MEMINIT		3	/* Mem-Init operation */
+#define DESC_OP_MODE_MEM_COMPARE	4	/* Mem-Compare operation */
+#define DESC_OP_MODE_CRC32		5	/* CRC32 calculation */
+#define DESC_OP_MODE_XOR		6	/* RAID5 (XOR) operation */
+#define DESC_OP_MODE_RAID6		7	/* RAID6 P&Q-generation */
+#define DESC_OP_MODE_RAID6_REC		8	/* RAID6 Recovery */
+#define DESC_Q_BUFFER_ENABLE		BIT(16)
+#define DESC_P_BUFFER_ENABLE		BIT(17)
+#define DESC_IOD			BIT(27)
+
+	u32 buff_size;
+	u32 fill_pattern_src_addr[4];
+	u32 data_buff_addr[MV_XOR_V2_DESC_BUFF_D_ADDR_SIZE];
+	u32 reserved[MV_XOR_V2_DESC_RESERVED_SIZE];
+};
+
+/**
+ * struct mv_xor_v2_device - implements a xor device
+ * @lock: lock for the engine
+ * @dma_base: memory mapped DMA register base
+ * @glob_base: memory mapped global register base
+ * @irq_tasklet:
+ * @free_sw_desc: linked list of free SW descriptors
+ * @dmadev: dma device
+ * @dmachan: dma channel
+ * @hw_desq: HW descriptors queue
+ * @hw_desq_virt: virtual address of DESCQ
+ * @sw_desq: SW descriptors queue
+ * @desc_size: HW descriptor size
+ * @npendings: number of pending descriptors (for which tx_submit has
+ * been called, but not yet issue_pending)
+ */
+struct mv_xor_v2_device {
+	spinlock_t lock;
+	void __iomem *dma_base;
+	void __iomem *glob_base;
+	struct clk *clk;
+	struct clk *reg_clk;
+	struct tasklet_struct irq_tasklet;
+	struct list_head free_sw_desc;
+	struct dma_device dmadev;
+	struct dma_chan	dmachan;
+	dma_addr_t hw_desq;
+	struct mv_xor_v2_descriptor *hw_desq_virt;
+	struct mv_xor_v2_sw_desc *sw_desq;
+	int desc_size;
+	unsigned int npendings;
+	unsigned int hw_queue_idx;
+	struct msi_desc *msi_desc;
+};
+
+/**
+ * struct mv_xor_v2_sw_desc - implements a xor SW descriptor
+ * @idx: descriptor index
+ * @async_tx: support for the async_tx api
+ * @hw_desc: assosiated HW descriptor
+ * @free_list: node of the free SW descriprots list
+*/
+struct mv_xor_v2_sw_desc {
+	int idx;
+	struct dma_async_tx_descriptor async_tx;
+	struct mv_xor_v2_descriptor hw_desc;
+	struct list_head free_list;
+};
+
+/*
+ * Fill the data buffers to a HW descriptor
+ */
+static void mv_xor_v2_set_data_buffers(struct mv_xor_v2_device *xor_dev,
+					struct mv_xor_v2_descriptor *desc,
+					dma_addr_t src, int index)
+{
+	int arr_index = ((index >> 1) * 3);
+
+	/*
+	 * Fill the buffer's addresses to the descriptor.
+	 *
+	 * The format of the buffers address for 2 sequential buffers
+	 * X and X + 1:
+	 *
+	 *  First word:  Buffer-DX-Address-Low[31:0]
+	 *  Second word: Buffer-DX+1-Address-Low[31:0]
+	 *  Third word:  DX+1-Buffer-Address-High[47:32] [31:16]
+	 *		 DX-Buffer-Address-High[47:32] [15:0]
+	 */
+	if ((index & 0x1) == 0) {
+		desc->data_buff_addr[arr_index] = lower_32_bits(src);
+
+		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF;
+		desc->data_buff_addr[arr_index + 2] |=
+			upper_32_bits(src) & 0xFFFF;
+	} else {
+		desc->data_buff_addr[arr_index + 1] =
+			lower_32_bits(src);
+
+		desc->data_buff_addr[arr_index + 2] &= ~0xFFFF0000;
+		desc->data_buff_addr[arr_index + 2] |=
+			(upper_32_bits(src) & 0xFFFF) << 16;
+	}
+}
+
+/*
+ * notify the engine of new descriptors, and update the available index.
+ */
+static void mv_xor_v2_add_desc_to_desq(struct mv_xor_v2_device *xor_dev,
+				       int num_of_desc)
+{
+	/* write the number of new descriptors in the DESQ. */
+	writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ADD_OFF);
+}
+
+/*
+ * free HW descriptors
+ */
+static void mv_xor_v2_free_desc_from_desq(struct mv_xor_v2_device *xor_dev,
+					  int num_of_desc)
+{
+	/* write the number of new descriptors in the DESQ. */
+	writel(num_of_desc, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DEALLOC_OFF);
+}
+
+/*
+ * Set descriptor size
+ * Return the HW descriptor size in bytes
+ */
+static int mv_xor_v2_set_desc_size(struct mv_xor_v2_device *xor_dev)
+{
+	writel(MV_XOR_V2_DMA_DESQ_CTRL_128B,
+	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_CTRL_OFF);
+
+	return MV_XOR_V2_EXT_DESC_SIZE;
+}
+
+/*
+ * Set the IMSG threshold
+ */
+static inline
+void mv_xor_v2_enable_imsg_thrd(struct mv_xor_v2_device *xor_dev)
+{
+	u32 reg;
+
+	/* Configure threshold of number of descriptors, and enable timer */
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
+	reg &= (~MV_XOR_V2_DMA_IMSG_THRD_MASK << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
+	reg |= (MV_XOR_V2_DONE_IMSG_THRD << MV_XOR_V2_DMA_IMSG_THRD_SHIFT);
+	reg |= MV_XOR_V2_DMA_IMSG_TIMER_EN;
+	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_THRD_OFF);
+
+	/* Configure Timer Threshold */
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
+	reg &= (~MV_XOR_V2_DMA_IMSG_TIMER_THRD_MASK <<
+		MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT);
+	reg |= (MV_XOR_V2_TIMER_THRD << MV_XOR_V2_DMA_IMSG_TIMER_THRD_SHIFT);
+	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_TMOT);
+}
+
+static irqreturn_t mv_xor_v2_interrupt_handler(int irq, void *data)
+{
+	struct mv_xor_v2_device *xor_dev = data;
+	unsigned int ndescs;
+	u32 reg;
+
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF);
+
+	ndescs = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) &
+		  MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK);
+
+	/* No descriptors to process */
+	if (!ndescs)
+		return IRQ_NONE;
+
+	/* schedule a tasklet to handle descriptors callbacks */
+	tasklet_schedule(&xor_dev->irq_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * submit a descriptor to the DMA engine
+ */
+static dma_cookie_t
+mv_xor_v2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	void *dest_hw_desc;
+	dma_cookie_t cookie;
+	struct mv_xor_v2_sw_desc *sw_desc =
+		container_of(tx, struct mv_xor_v2_sw_desc, async_tx);
+	struct mv_xor_v2_device *xor_dev =
+		container_of(tx->chan, struct mv_xor_v2_device, dmachan);
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s sw_desc %p: async_tx %p\n",
+		__func__, sw_desc, &sw_desc->async_tx);
+
+	/* assign coookie */
+	spin_lock_bh(&xor_dev->lock);
+	cookie = dma_cookie_assign(tx);
+
+	/* copy the HW descriptor from the SW descriptor to the DESQ */
+	dest_hw_desc = xor_dev->hw_desq_virt + xor_dev->hw_queue_idx;
+
+	memcpy(dest_hw_desc, &sw_desc->hw_desc, xor_dev->desc_size);
+
+	xor_dev->npendings++;
+	xor_dev->hw_queue_idx++;
+	if (xor_dev->hw_queue_idx >= MV_XOR_V2_DESC_NUM)
+		xor_dev->hw_queue_idx = 0;
+
+	spin_unlock_bh(&xor_dev->lock);
+
+	return cookie;
+}
+
+/*
+ * Prepare a SW descriptor
+ */
+static struct mv_xor_v2_sw_desc	*
+mv_xor_v2_prep_sw_desc(struct mv_xor_v2_device *xor_dev)
+{
+	struct mv_xor_v2_sw_desc *sw_desc;
+	bool found = false;
+
+	/* Lock the channel */
+	spin_lock_bh(&xor_dev->lock);
+
+	if (list_empty(&xor_dev->free_sw_desc)) {
+		spin_unlock_bh(&xor_dev->lock);
+		/* schedule tasklet to free some descriptors */
+		tasklet_schedule(&xor_dev->irq_tasklet);
+		return NULL;
+	}
+
+	list_for_each_entry(sw_desc, &xor_dev->free_sw_desc, free_list) {
+		if (async_tx_test_ack(&sw_desc->async_tx)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		spin_unlock_bh(&xor_dev->lock);
+		return NULL;
+	}
+
+	list_del(&sw_desc->free_list);
+
+	/* Release the channel */
+	spin_unlock_bh(&xor_dev->lock);
+
+	return sw_desc;
+}
+
+/*
+ * Prepare a HW descriptor for a memcpy operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc *sw_desc;
+	struct mv_xor_v2_descriptor *hw_descriptor;
+	struct mv_xor_v2_device	*xor_dev;
+
+	xor_dev = container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s len: %zu src %pad dest %pad flags: %ld\n",
+		__func__, len, &src, &dest, flags);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
+
+	sw_desc->async_tx.flags = flags;
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the MEMCPY control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_MEMCPY << DESC_OP_MODE_SHIFT;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* Set source address */
+	hw_descriptor->fill_pattern_src_addr[0] = lower_32_bits(src);
+	hw_descriptor->fill_pattern_src_addr[1] =
+		upper_32_bits(src) & 0xFFFF;
+
+	/* Set Destination address */
+	hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+	hw_descriptor->fill_pattern_src_addr[3] =
+		upper_32_bits(dest) & 0xFFFF;
+
+	/* Set buffers size */
+	hw_descriptor->buff_size = len;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for a XOR operation
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		       unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc *sw_desc;
+	struct mv_xor_v2_descriptor *hw_descriptor;
+	struct mv_xor_v2_device	*xor_dev =
+		container_of(chan, struct mv_xor_v2_device, dmachan);
+	int i;
+
+	if (src_cnt > MV_XOR_V2_CMD_LINE_NUM_MAX_D_BUF || src_cnt < 1)
+		return NULL;
+
+	dev_dbg(xor_dev->dmadev.dev,
+		"%s src_cnt: %d len: %zu dest %pad flags: %ld\n",
+		__func__, src_cnt, len, &dest, flags);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
+
+	sw_desc->async_tx.flags = flags;
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the XOR control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_XOR << DESC_OP_MODE_SHIFT;
+	hw_descriptor->desc_ctrl |= DESC_P_BUFFER_ENABLE;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* Set the data buffers */
+	for (i = 0; i < src_cnt; i++)
+		mv_xor_v2_set_data_buffers(xor_dev, hw_descriptor, src[i], i);
+
+	hw_descriptor->desc_ctrl |=
+		src_cnt << DESC_NUM_ACTIVE_D_BUF_SHIFT;
+
+	/* Set Destination address */
+	hw_descriptor->fill_pattern_src_addr[2] = lower_32_bits(dest);
+	hw_descriptor->fill_pattern_src_addr[3] =
+		upper_32_bits(dest) & 0xFFFF;
+
+	/* Set buffers size */
+	hw_descriptor->buff_size = len;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * Prepare a HW descriptor for interrupt operation.
+ */
+static struct dma_async_tx_descriptor *
+mv_xor_v2_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct mv_xor_v2_sw_desc *sw_desc;
+	struct mv_xor_v2_descriptor *hw_descriptor;
+	struct mv_xor_v2_device	*xor_dev =
+		container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	sw_desc = mv_xor_v2_prep_sw_desc(xor_dev);
+	if (!sw_desc)
+		return NULL;
+
+	/* set the HW descriptor */
+	hw_descriptor = &sw_desc->hw_desc;
+
+	/* save the SW descriptor ID to restore when operation is done */
+	hw_descriptor->desc_id = sw_desc->idx;
+
+	/* Set the INTERRUPT control word */
+	hw_descriptor->desc_ctrl =
+		DESC_OP_MODE_NOP << DESC_OP_MODE_SHIFT;
+	hw_descriptor->desc_ctrl |= DESC_IOD;
+
+	/* return the async tx descriptor */
+	return &sw_desc->async_tx;
+}
+
+/*
+ * push pending transactions to hardware
+ */
+static void mv_xor_v2_issue_pending(struct dma_chan *chan)
+{
+	struct mv_xor_v2_device *xor_dev =
+		container_of(chan, struct mv_xor_v2_device, dmachan);
+
+	spin_lock_bh(&xor_dev->lock);
+
+	/*
+	 * update the engine with the number of descriptors to
+	 * process
+	 */
+	mv_xor_v2_add_desc_to_desq(xor_dev, xor_dev->npendings);
+	xor_dev->npendings = 0;
+
+	spin_unlock_bh(&xor_dev->lock);
+}
+
+static inline
+int mv_xor_v2_get_pending_params(struct mv_xor_v2_device *xor_dev,
+				 int *pending_ptr)
+{
+	u32 reg;
+
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_DONE_OFF);
+
+	/* get the next pending descriptor index */
+	*pending_ptr = ((reg >> MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_SHIFT) &
+			MV_XOR_V2_DMA_DESQ_DONE_READ_PTR_MASK);
+
+	/* get the number of descriptors pending handle */
+	return ((reg >> MV_XOR_V2_DMA_DESQ_DONE_PENDING_SHIFT) &
+		MV_XOR_V2_DMA_DESQ_DONE_PENDING_MASK);
+}
+
+/*
+ * handle the descriptors after HW process
+ */
+static void mv_xor_v2_tasklet(unsigned long data)
+{
+	struct mv_xor_v2_device *xor_dev = (struct mv_xor_v2_device *) data;
+	int pending_ptr, num_of_pending, i;
+	struct mv_xor_v2_sw_desc *next_pending_sw_desc = NULL;
+
+	dev_dbg(xor_dev->dmadev.dev, "%s %d\n", __func__, __LINE__);
+
+	/* get the pending descriptors parameters */
+	num_of_pending = mv_xor_v2_get_pending_params(xor_dev, &pending_ptr);
+
+	/* loop over free descriptors */
+	for (i = 0; i < num_of_pending; i++) {
+		struct mv_xor_v2_descriptor *next_pending_hw_desc =
+			xor_dev->hw_desq_virt + pending_ptr;
+
+		/* get the SW descriptor related to the HW descriptor */
+		next_pending_sw_desc =
+			&xor_dev->sw_desq[next_pending_hw_desc->desc_id];
+
+		/* call the callback */
+		if (next_pending_sw_desc->async_tx.cookie > 0) {
+			/*
+			 * update the channel's completed cookie - no
+			 * lock is required the IMSG threshold provide
+			 * the locking
+			 */
+			dma_cookie_complete(&next_pending_sw_desc->async_tx);
+
+			dma_descriptor_unmap(&next_pending_sw_desc->async_tx);
+			dmaengine_desc_get_callback_invoke(
+					&next_pending_sw_desc->async_tx, NULL);
+		}
+
+		dma_run_dependencies(&next_pending_sw_desc->async_tx);
+
+		/* Lock the channel */
+		spin_lock_bh(&xor_dev->lock);
+
+		/* add the SW descriptor to the free descriptors list */
+		list_add(&next_pending_sw_desc->free_list,
+			 &xor_dev->free_sw_desc);
+
+		/* Release the channel */
+		spin_unlock_bh(&xor_dev->lock);
+
+		/* increment the next descriptor */
+		pending_ptr++;
+		if (pending_ptr >= MV_XOR_V2_DESC_NUM)
+			pending_ptr = 0;
+	}
+
+	if (num_of_pending != 0) {
+		/* free the descriptores */
+		mv_xor_v2_free_desc_from_desq(xor_dev, num_of_pending);
+	}
+}
+
+/*
+ *	Set DMA Interrupt-message (IMSG) parameters
+ */
+static void mv_xor_v2_set_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	struct mv_xor_v2_device *xor_dev = dev_get_drvdata(desc->dev);
+
+	writel(msg->address_lo,
+	       xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BALR_OFF);
+	writel(msg->address_hi & 0xFFFF,
+	       xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_BAHR_OFF);
+	writel(msg->data,
+	       xor_dev->dma_base + MV_XOR_V2_DMA_IMSG_CDAT_OFF);
+}
+
+static int mv_xor_v2_descq_init(struct mv_xor_v2_device *xor_dev)
+{
+	u32 reg;
+
+	/* write the DESQ size to the DMA engine */
+	writel(MV_XOR_V2_DESC_NUM,
+	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_SIZE_OFF);
+
+	/* write the DESQ address to the DMA enngine*/
+	writel(lower_32_bits(xor_dev->hw_desq),
+	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BALR_OFF);
+	writel(upper_32_bits(xor_dev->hw_desq),
+	       xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_BAHR_OFF);
+
+	/*
+	 * This is a temporary solution, until we activate the
+	 * SMMU. Set the attributes for reading & writing data buffers
+	 * & descriptors to:
+	 *
+	 *  - OuterShareable - Snoops will be performed on CPU caches
+	 *  - Enable cacheable - Bufferable, Modifiable, Other Allocate
+	 *    and Allocate
+	 */
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF);
+	reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK;
+	reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE |
+		MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE;
+	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_ARATTR_OFF);
+
+	reg = readl(xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF);
+	reg &= ~MV_XOR_V2_DMA_DESQ_ATTR_CACHE_MASK;
+	reg |= MV_XOR_V2_DMA_DESQ_ATTR_OUTER_SHAREABLE |
+		MV_XOR_V2_DMA_DESQ_ATTR_CACHEABLE;
+	writel(reg, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_AWATTR_OFF);
+
+	/* BW CTRL - set values to optimize the XOR performance:
+	 *
+	 *  - Set WrBurstLen & RdBurstLen - the unit will issue
+	 *    maximum of 256B write/read transactions.
+	 * -  Limit the number of outstanding write & read data
+	 *    (OBB/IBB) requests to the maximal value.
+	*/
+	reg = ((MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_VAL <<
+		MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_RD_SHIFT) |
+	       (MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_VAL  <<
+		MV_XOR_V2_GLOB_BW_CTRL_NUM_OSTD_WR_SHIFT) |
+	       (MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_VAL <<
+		MV_XOR_V2_GLOB_BW_CTRL_RD_BURST_LEN_SHIFT) |
+	       (MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_VAL <<
+		MV_XOR_V2_GLOB_BW_CTRL_WR_BURST_LEN_SHIFT));
+	writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_BW_CTRL);
+
+	/* Disable the AXI timer feature */
+	reg = readl(xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
+	reg |= MV_XOR_V2_GLOB_PAUSE_AXI_TIME_DIS_VAL;
+	writel(reg, xor_dev->glob_base + MV_XOR_V2_GLOB_PAUSE);
+
+	/* enable the DMA engine */
+	writel(0, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+	return 0;
+}
+
+static int mv_xor_v2_suspend(struct platform_device *dev, pm_message_t state)
+{
+	struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+	/* Set this bit to disable to stop the XOR unit. */
+	writel(0x1, xor_dev->dma_base + MV_XOR_V2_DMA_DESQ_STOP_OFF);
+
+	return 0;
+}
+
+static int mv_xor_v2_resume(struct platform_device *dev)
+{
+	struct mv_xor_v2_device *xor_dev = platform_get_drvdata(dev);
+
+	mv_xor_v2_set_desc_size(xor_dev);
+	mv_xor_v2_enable_imsg_thrd(xor_dev);
+	mv_xor_v2_descq_init(xor_dev);
+
+	return 0;
+}
+
+static int mv_xor_v2_probe(struct platform_device *pdev)
+{
+	struct mv_xor_v2_device *xor_dev;
+	struct resource *res;
+	int i, ret = 0;
+	struct dma_device *dma_dev;
+	struct mv_xor_v2_sw_desc *sw_desc;
+	struct msi_desc *msi_desc;
+
+	BUILD_BUG_ON(sizeof(struct mv_xor_v2_descriptor) !=
+		     MV_XOR_V2_EXT_DESC_SIZE);
+
+	xor_dev = devm_kzalloc(&pdev->dev, sizeof(*xor_dev), GFP_KERNEL);
+	if (!xor_dev)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xor_dev->dma_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xor_dev->dma_base))
+		return PTR_ERR(xor_dev->dma_base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	xor_dev->glob_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(xor_dev->glob_base))
+		return PTR_ERR(xor_dev->glob_base);
+
+	platform_set_drvdata(pdev, xor_dev);
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+	if (ret)
+		return ret;
+
+	xor_dev->reg_clk = devm_clk_get(&pdev->dev, "reg");
+	if (PTR_ERR(xor_dev->reg_clk) != -ENOENT) {
+		if (!IS_ERR(xor_dev->reg_clk)) {
+			ret = clk_prepare_enable(xor_dev->reg_clk);
+			if (ret)
+				return ret;
+		} else {
+			return PTR_ERR(xor_dev->reg_clk);
+		}
+	}
+
+	xor_dev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(xor_dev->clk) && PTR_ERR(xor_dev->clk) == -EPROBE_DEFER) {
+		ret = EPROBE_DEFER;
+		goto disable_reg_clk;
+	}
+	if (!IS_ERR(xor_dev->clk)) {
+		ret = clk_prepare_enable(xor_dev->clk);
+		if (ret)
+			goto disable_reg_clk;
+	}
+
+	ret = platform_msi_domain_alloc_irqs(&pdev->dev, 1,
+					     mv_xor_v2_set_msi_msg);
+	if (ret)
+		goto disable_clk;
+
+	msi_desc = first_msi_entry(&pdev->dev);
+	if (!msi_desc)
+		goto free_msi_irqs;
+	xor_dev->msi_desc = msi_desc;
+
+	ret = devm_request_irq(&pdev->dev, msi_desc->irq,
+			       mv_xor_v2_interrupt_handler, 0,
+			       dev_name(&pdev->dev), xor_dev);
+	if (ret)
+		goto free_msi_irqs;
+
+	tasklet_init(&xor_dev->irq_tasklet, mv_xor_v2_tasklet,
+		     (unsigned long) xor_dev);
+
+	xor_dev->desc_size = mv_xor_v2_set_desc_size(xor_dev);
+
+	dma_cookie_init(&xor_dev->dmachan);
+
+	/*
+	 * allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	xor_dev->hw_desq_virt =
+		dma_alloc_coherent(&pdev->dev,
+				   xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+				   &xor_dev->hw_desq, GFP_KERNEL);
+	if (!xor_dev->hw_desq_virt) {
+		ret = -ENOMEM;
+		goto free_msi_irqs;
+	}
+
+	/* alloc memory for the SW descriptors */
+	xor_dev->sw_desq = devm_kcalloc(&pdev->dev,
+					MV_XOR_V2_DESC_NUM, sizeof(*sw_desc),
+					GFP_KERNEL);
+	if (!xor_dev->sw_desq) {
+		ret = -ENOMEM;
+		goto free_hw_desq;
+	}
+
+	spin_lock_init(&xor_dev->lock);
+
+	/* init the free SW descriptors list */
+	INIT_LIST_HEAD(&xor_dev->free_sw_desc);
+
+	/* add all SW descriptors to the free list */
+	for (i = 0; i < MV_XOR_V2_DESC_NUM; i++) {
+		struct mv_xor_v2_sw_desc *sw_desc =
+			xor_dev->sw_desq + i;
+		sw_desc->idx = i;
+		dma_async_tx_descriptor_init(&sw_desc->async_tx,
+					     &xor_dev->dmachan);
+		sw_desc->async_tx.tx_submit = mv_xor_v2_tx_submit;
+		async_tx_ack(&sw_desc->async_tx);
+
+		list_add(&sw_desc->free_list,
+			 &xor_dev->free_sw_desc);
+	}
+
+	dma_dev = &xor_dev->dmadev;
+
+	/* set DMA capabilities */
+	dma_cap_zero(dma_dev->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+	/* init dma link list */
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_issue_pending = mv_xor_v2_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	dma_dev->device_prep_dma_memcpy = mv_xor_v2_prep_dma_memcpy;
+	dma_dev->device_prep_dma_interrupt = mv_xor_v2_prep_dma_interrupt;
+	dma_dev->max_xor = 8;
+	dma_dev->device_prep_dma_xor = mv_xor_v2_prep_dma_xor;
+
+	xor_dev->dmachan.device = dma_dev;
+
+	list_add_tail(&xor_dev->dmachan.device_node,
+		      &dma_dev->channels);
+
+	mv_xor_v2_enable_imsg_thrd(xor_dev);
+
+	mv_xor_v2_descq_init(xor_dev);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto free_hw_desq;
+
+	dev_notice(&pdev->dev, "Marvell Version 2 XOR driver\n");
+
+	return 0;
+
+free_hw_desq:
+	dma_free_coherent(&pdev->dev,
+			  xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+			  xor_dev->hw_desq_virt, xor_dev->hw_desq);
+free_msi_irqs:
+	platform_msi_domain_free_irqs(&pdev->dev);
+disable_clk:
+	clk_disable_unprepare(xor_dev->clk);
+disable_reg_clk:
+	clk_disable_unprepare(xor_dev->reg_clk);
+	return ret;
+}
+
+static int mv_xor_v2_remove(struct platform_device *pdev)
+{
+	struct mv_xor_v2_device *xor_dev = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&xor_dev->dmadev);
+
+	dma_free_coherent(&pdev->dev,
+			  xor_dev->desc_size * MV_XOR_V2_DESC_NUM,
+			  xor_dev->hw_desq_virt, xor_dev->hw_desq);
+
+	devm_free_irq(&pdev->dev, xor_dev->msi_desc->irq, xor_dev);
+
+	platform_msi_domain_free_irqs(&pdev->dev);
+
+	tasklet_kill(&xor_dev->irq_tasklet);
+
+	clk_disable_unprepare(xor_dev->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id mv_xor_v2_dt_ids[] = {
+	{ .compatible = "marvell,xor-v2", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mv_xor_v2_dt_ids);
+#endif
+
+static struct platform_driver mv_xor_v2_driver = {
+	.probe		= mv_xor_v2_probe,
+	.suspend	= mv_xor_v2_suspend,
+	.resume		= mv_xor_v2_resume,
+	.remove		= mv_xor_v2_remove,
+	.driver		= {
+		.name	= "mv_xor_v2",
+		.of_match_table = of_match_ptr(mv_xor_v2_dt_ids),
+	},
+};
+
+module_platform_driver(mv_xor_v2_driver);
+
+MODULE_DESCRIPTION("DMA engine driver for Marvell's Version 2 of XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mxs-dma.c b/drivers/dma/mxs-dma.c
new file mode 100644
index 0000000..ae5182f
--- /dev/null
+++ b/drivers/dma/mxs-dma.c
@@ -0,0 +1,880 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright 2011 Freescale Semiconductor, Inc. All Rights Reserved.
+//
+// Refer to drivers/dma/imx-sdma.c
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/semaphore.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/platform_device.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/stmp_device.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/list.h>
+
+#include <asm/irq.h>
+
+#include "dmaengine.h"
+
+/*
+ * NOTE: The term "PIO" throughout the mxs-dma implementation means
+ * PIO mode of mxs apbh-dma and apbx-dma.  With this working mode,
+ * dma can program the controller registers of peripheral devices.
+ */
+
+#define dma_is_apbh(mxs_dma)	((mxs_dma)->type == MXS_DMA_APBH)
+#define apbh_is_old(mxs_dma)	((mxs_dma)->dev_id == IMX23_DMA)
+
+#define HW_APBHX_CTRL0				0x000
+#define BM_APBH_CTRL0_APB_BURST8_EN		(1 << 29)
+#define BM_APBH_CTRL0_APB_BURST_EN		(1 << 28)
+#define BP_APBH_CTRL0_RESET_CHANNEL		16
+#define HW_APBHX_CTRL1				0x010
+#define HW_APBHX_CTRL2				0x020
+#define HW_APBHX_CHANNEL_CTRL			0x030
+#define BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL	16
+/*
+ * The offset of NXTCMDAR register is different per both dma type and version,
+ * while stride for each channel is all the same 0x70.
+ */
+#define HW_APBHX_CHn_NXTCMDAR(d, n) \
+	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x050 : 0x110) + (n) * 0x70)
+#define HW_APBHX_CHn_SEMA(d, n) \
+	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x080 : 0x140) + (n) * 0x70)
+#define HW_APBHX_CHn_BAR(d, n) \
+	(((dma_is_apbh(d) && apbh_is_old(d)) ? 0x070 : 0x130) + (n) * 0x70)
+#define HW_APBX_CHn_DEBUG1(d, n) (0x150 + (n) * 0x70)
+
+/*
+ * ccw bits definitions
+ *
+ * COMMAND:		0..1	(2)
+ * CHAIN:		2	(1)
+ * IRQ:			3	(1)
+ * NAND_LOCK:		4	(1) - not implemented
+ * NAND_WAIT4READY:	5	(1) - not implemented
+ * DEC_SEM:		6	(1)
+ * WAIT4END:		7	(1)
+ * HALT_ON_TERMINATE:	8	(1)
+ * TERMINATE_FLUSH:	9	(1)
+ * RESERVED:		10..11	(2)
+ * PIO_NUM:		12..15	(4)
+ */
+#define BP_CCW_COMMAND		0
+#define BM_CCW_COMMAND		(3 << 0)
+#define CCW_CHAIN		(1 << 2)
+#define CCW_IRQ			(1 << 3)
+#define CCW_DEC_SEM		(1 << 6)
+#define CCW_WAIT4END		(1 << 7)
+#define CCW_HALT_ON_TERM	(1 << 8)
+#define CCW_TERM_FLUSH		(1 << 9)
+#define BP_CCW_PIO_NUM		12
+#define BM_CCW_PIO_NUM		(0xf << 12)
+
+#define BF_CCW(value, field)	(((value) << BP_CCW_##field) & BM_CCW_##field)
+
+#define MXS_DMA_CMD_NO_XFER	0
+#define MXS_DMA_CMD_WRITE	1
+#define MXS_DMA_CMD_READ	2
+#define MXS_DMA_CMD_DMA_SENSE	3	/* not implemented */
+
+struct mxs_dma_ccw {
+	u32		next;
+	u16		bits;
+	u16		xfer_bytes;
+#define MAX_XFER_BYTES	0xff00
+	u32		bufaddr;
+#define MXS_PIO_WORDS	16
+	u32		pio_words[MXS_PIO_WORDS];
+};
+
+#define CCW_BLOCK_SIZE	(4 * PAGE_SIZE)
+#define NUM_CCW	(int)(CCW_BLOCK_SIZE / sizeof(struct mxs_dma_ccw))
+
+struct mxs_dma_chan {
+	struct mxs_dma_engine		*mxs_dma;
+	struct dma_chan			chan;
+	struct dma_async_tx_descriptor	desc;
+	struct tasklet_struct		tasklet;
+	unsigned int			chan_irq;
+	struct mxs_dma_ccw		*ccw;
+	dma_addr_t			ccw_phys;
+	int				desc_count;
+	enum dma_status			status;
+	unsigned int			flags;
+	bool				reset;
+#define MXS_DMA_SG_LOOP			(1 << 0)
+#define MXS_DMA_USE_SEMAPHORE		(1 << 1)
+};
+
+#define MXS_DMA_CHANNELS		16
+#define MXS_DMA_CHANNELS_MASK		0xffff
+
+enum mxs_dma_devtype {
+	MXS_DMA_APBH,
+	MXS_DMA_APBX,
+};
+
+enum mxs_dma_id {
+	IMX23_DMA,
+	IMX28_DMA,
+};
+
+struct mxs_dma_engine {
+	enum mxs_dma_id			dev_id;
+	enum mxs_dma_devtype		type;
+	void __iomem			*base;
+	struct clk			*clk;
+	struct dma_device		dma_device;
+	struct device_dma_parameters	dma_parms;
+	struct mxs_dma_chan		mxs_chans[MXS_DMA_CHANNELS];
+	struct platform_device		*pdev;
+	unsigned int			nr_channels;
+};
+
+struct mxs_dma_type {
+	enum mxs_dma_id id;
+	enum mxs_dma_devtype type;
+};
+
+static struct mxs_dma_type mxs_dma_types[] = {
+	{
+		.id = IMX23_DMA,
+		.type = MXS_DMA_APBH,
+	}, {
+		.id = IMX23_DMA,
+		.type = MXS_DMA_APBX,
+	}, {
+		.id = IMX28_DMA,
+		.type = MXS_DMA_APBH,
+	}, {
+		.id = IMX28_DMA,
+		.type = MXS_DMA_APBX,
+	}
+};
+
+static const struct platform_device_id mxs_dma_ids[] = {
+	{
+		.name = "imx23-dma-apbh",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[0],
+	}, {
+		.name = "imx23-dma-apbx",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[1],
+	}, {
+		.name = "imx28-dma-apbh",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[2],
+	}, {
+		.name = "imx28-dma-apbx",
+		.driver_data = (kernel_ulong_t) &mxs_dma_types[3],
+	}, {
+		/* end of list */
+	}
+};
+
+static const struct of_device_id mxs_dma_dt_ids[] = {
+	{ .compatible = "fsl,imx23-dma-apbh", .data = &mxs_dma_ids[0], },
+	{ .compatible = "fsl,imx23-dma-apbx", .data = &mxs_dma_ids[1], },
+	{ .compatible = "fsl,imx28-dma-apbh", .data = &mxs_dma_ids[2], },
+	{ .compatible = "fsl,imx28-dma-apbx", .data = &mxs_dma_ids[3], },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mxs_dma_dt_ids);
+
+static struct mxs_dma_chan *to_mxs_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct mxs_dma_chan, chan);
+}
+
+static void mxs_dma_reset_chan(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/*
+	 * mxs dma channel resets can cause a channel stall. To recover from a
+	 * channel stall, we have to reset the whole DMA engine. To avoid this,
+	 * we use cyclic DMA with semaphores, that are enhanced in
+	 * mxs_dma_int_handler. To reset the channel, we can simply stop writing
+	 * into the semaphore counter.
+	 */
+	if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		mxs_chan->reset = true;
+	} else if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma)) {
+		writel(1 << (chan_id + BP_APBH_CTRL0_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+	} else {
+		unsigned long elapsed = 0;
+		const unsigned long max_wait = 50000; /* 50ms */
+		void __iomem *reg_dbg1 = mxs_dma->base +
+				HW_APBX_CHn_DEBUG1(mxs_dma, chan_id);
+
+		/*
+		 * On i.MX28 APBX, the DMA channel can stop working if we reset
+		 * the channel while it is in READ_FLUSH (0x08) state.
+		 * We wait here until we leave the state. Then we trigger the
+		 * reset. Waiting a maximum of 50ms, the kernel shouldn't crash
+		 * because of this.
+		 */
+		while ((readl(reg_dbg1) & 0xf) == 0x8 && elapsed < max_wait) {
+			udelay(100);
+			elapsed += 100;
+		}
+
+		if (elapsed >= max_wait)
+			dev_err(&mxs_chan->mxs_dma->pdev->dev,
+					"Failed waiting for the DMA channel %d to leave state READ_FLUSH, trying to reset channel in READ_FLUSH state now\n",
+					chan_id);
+
+		writel(1 << (chan_id + BP_APBHX_CHANNEL_CTRL_RESET_CHANNEL),
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+	}
+
+	mxs_chan->status = DMA_COMPLETE;
+}
+
+static void mxs_dma_enable_chan(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* set cmd_addr up */
+	writel(mxs_chan->ccw_phys,
+		mxs_dma->base + HW_APBHX_CHn_NXTCMDAR(mxs_dma, chan_id));
+
+	/* write 1 to SEMA to kick off the channel */
+	if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		/* A cyclic DMA consists of at least 2 segments, so initialize
+		 * the semaphore with 2 so we have enough time to add 1 to the
+		 * semaphore if we need to */
+		writel(2, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+	} else {
+		writel(1, mxs_dma->base + HW_APBHX_CHn_SEMA(mxs_dma, chan_id));
+	}
+	mxs_chan->reset = false;
+}
+
+static void mxs_dma_disable_chan(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+
+	mxs_chan->status = DMA_COMPLETE;
+}
+
+static int mxs_dma_pause_chan(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* freeze the channel */
+	if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_SET);
+
+	mxs_chan->status = DMA_PAUSED;
+	return 0;
+}
+
+static int mxs_dma_resume_chan(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_id = mxs_chan->chan.chan_id;
+
+	/* unfreeze the channel */
+	if (dma_is_apbh(mxs_dma) && apbh_is_old(mxs_dma))
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_CLR);
+	else
+		writel(1 << chan_id,
+			mxs_dma->base + HW_APBHX_CHANNEL_CTRL + STMP_OFFSET_REG_CLR);
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	return 0;
+}
+
+static dma_cookie_t mxs_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	return dma_cookie_assign(tx);
+}
+
+static void mxs_dma_tasklet(unsigned long data)
+{
+	struct mxs_dma_chan *mxs_chan = (struct mxs_dma_chan *) data;
+
+	dmaengine_desc_get_callback_invoke(&mxs_chan->desc, NULL);
+}
+
+static int mxs_dma_irq_to_chan(struct mxs_dma_engine *mxs_dma, int irq)
+{
+	int i;
+
+	for (i = 0; i != mxs_dma->nr_channels; ++i)
+		if (mxs_dma->mxs_chans[i].chan_irq == irq)
+			return i;
+
+	return -EINVAL;
+}
+
+static irqreturn_t mxs_dma_int_handler(int irq, void *dev_id)
+{
+	struct mxs_dma_engine *mxs_dma = dev_id;
+	struct mxs_dma_chan *mxs_chan;
+	u32 completed;
+	u32 err;
+	int chan = mxs_dma_irq_to_chan(mxs_dma, irq);
+
+	if (chan < 0)
+		return IRQ_NONE;
+
+	/* completion status */
+	completed = readl(mxs_dma->base + HW_APBHX_CTRL1);
+	completed = (completed >> chan) & 0x1;
+
+	/* Clear interrupt */
+	writel((1 << chan),
+			mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_CLR);
+
+	/* error status */
+	err = readl(mxs_dma->base + HW_APBHX_CTRL2);
+	err &= (1 << (MXS_DMA_CHANNELS + chan)) | (1 << chan);
+
+	/*
+	 * error status bit is in the upper 16 bits, error irq bit in the lower
+	 * 16 bits. We transform it into a simpler error code:
+	 * err: 0x00 = no error, 0x01 = TERMINATION, 0x02 = BUS_ERROR
+	 */
+	err = (err >> (MXS_DMA_CHANNELS + chan)) + (err >> chan);
+
+	/* Clear error irq */
+	writel((1 << chan),
+			mxs_dma->base + HW_APBHX_CTRL2 + STMP_OFFSET_REG_CLR);
+
+	/*
+	 * When both completion and error of termination bits set at the
+	 * same time, we do not take it as an error.  IOW, it only becomes
+	 * an error we need to handle here in case of either it's a bus
+	 * error or a termination error with no completion. 0x01 is termination
+	 * error, so we can subtract err & completed to get the real error case.
+	 */
+	err -= err & completed;
+
+	mxs_chan = &mxs_dma->mxs_chans[chan];
+
+	if (err) {
+		dev_dbg(mxs_dma->dma_device.dev,
+			"%s: error in channel %d\n", __func__,
+			chan);
+		mxs_chan->status = DMA_ERROR;
+		mxs_dma_reset_chan(&mxs_chan->chan);
+	} else if (mxs_chan->status != DMA_COMPLETE) {
+		if (mxs_chan->flags & MXS_DMA_SG_LOOP) {
+			mxs_chan->status = DMA_IN_PROGRESS;
+			if (mxs_chan->flags & MXS_DMA_USE_SEMAPHORE)
+				writel(1, mxs_dma->base +
+					HW_APBHX_CHn_SEMA(mxs_dma, chan));
+		} else {
+			mxs_chan->status = DMA_COMPLETE;
+		}
+	}
+
+	if (mxs_chan->status == DMA_COMPLETE) {
+		if (mxs_chan->reset)
+			return IRQ_HANDLED;
+		dma_cookie_complete(&mxs_chan->desc);
+	}
+
+	/* schedule tasklet on this channel */
+	tasklet_schedule(&mxs_chan->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static int mxs_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int ret;
+
+	mxs_chan->ccw = dma_zalloc_coherent(mxs_dma->dma_device.dev,
+					    CCW_BLOCK_SIZE,
+					    &mxs_chan->ccw_phys, GFP_KERNEL);
+	if (!mxs_chan->ccw) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	ret = request_irq(mxs_chan->chan_irq, mxs_dma_int_handler,
+			  0, "mxs-dma", mxs_dma);
+	if (ret)
+		goto err_irq;
+
+	ret = clk_prepare_enable(mxs_dma->clk);
+	if (ret)
+		goto err_clk;
+
+	mxs_dma_reset_chan(chan);
+
+	dma_async_tx_descriptor_init(&mxs_chan->desc, chan);
+	mxs_chan->desc.tx_submit = mxs_dma_tx_submit;
+
+	/* the descriptor is ready */
+	async_tx_ack(&mxs_chan->desc);
+
+	return 0;
+
+err_clk:
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+err_irq:
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+err_alloc:
+	return ret;
+}
+
+static void mxs_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+
+	mxs_dma_disable_chan(chan);
+
+	free_irq(mxs_chan->chan_irq, mxs_dma);
+
+	dma_free_coherent(mxs_dma->dma_device.dev, CCW_BLOCK_SIZE,
+			mxs_chan->ccw, mxs_chan->ccw_phys);
+
+	clk_disable_unprepare(mxs_dma->clk);
+}
+
+/*
+ * How to use the flags for ->device_prep_slave_sg() :
+ *    [1] If there is only one DMA command in the DMA chain, the code should be:
+ *            ......
+ *            ->device_prep_slave_sg(DMA_CTRL_ACK);
+ *            ......
+ *    [2] If there are two DMA commands in the DMA chain, the code should be
+ *            ......
+ *            ->device_prep_slave_sg(0);
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ *            ......
+ *    [3] If there are more than two DMA commands in the DMA chain, the code
+ *        should be:
+ *            ......
+ *            ->device_prep_slave_sg(0);                                // First
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT [| DMA_CTRL_ACK]);
+ *            ......
+ *            ->device_prep_slave_sg(DMA_PREP_INTERRUPT | DMA_CTRL_ACK); // Last
+ *            ......
+ */
+static struct dma_async_tx_descriptor *mxs_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	struct mxs_dma_ccw *ccw;
+	struct scatterlist *sg;
+	u32 i, j;
+	u32 *pio;
+	bool append = flags & DMA_PREP_INTERRUPT;
+	int idx = append ? mxs_chan->desc_count : 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS && !append)
+		return NULL;
+
+	if (sg_len + (append ? idx : 0) > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				sg_len, NUM_CCW);
+		goto err_out;
+	}
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags = 0;
+
+	/*
+	 * If the sg is prepared with append flag set, the sg
+	 * will be appended to the last prepared sg.
+	 */
+	if (append) {
+		BUG_ON(idx < 1);
+		ccw = &mxs_chan->ccw[idx - 1];
+		ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits &= ~CCW_IRQ;
+		ccw->bits &= ~CCW_DEC_SEM;
+	} else {
+		idx = 0;
+	}
+
+	if (direction == DMA_TRANS_NONE) {
+		ccw = &mxs_chan->ccw[idx++];
+		pio = (u32 *) sgl;
+
+		for (j = 0; j < sg_len;)
+			ccw->pio_words[j++] = *pio++;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_DEC_SEM;
+		if (flags & DMA_CTRL_ACK)
+			ccw->bits |= CCW_WAIT4END;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= BF_CCW(sg_len, PIO_NUM);
+		ccw->bits |= BF_CCW(MXS_DMA_CMD_NO_XFER, COMMAND);
+	} else {
+		for_each_sg(sgl, sg, sg_len, i) {
+			if (sg_dma_len(sg) > MAX_XFER_BYTES) {
+				dev_err(mxs_dma->dma_device.dev, "maximum bytes for sg entry exceeded: %d > %d\n",
+						sg_dma_len(sg), MAX_XFER_BYTES);
+				goto err_out;
+			}
+
+			ccw = &mxs_chan->ccw[idx++];
+
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * idx;
+			ccw->bufaddr = sg->dma_address;
+			ccw->xfer_bytes = sg_dma_len(sg);
+
+			ccw->bits = 0;
+			ccw->bits |= CCW_CHAIN;
+			ccw->bits |= CCW_HALT_ON_TERM;
+			ccw->bits |= CCW_TERM_FLUSH;
+			ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+					MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ,
+					COMMAND);
+
+			if (i + 1 == sg_len) {
+				ccw->bits &= ~CCW_CHAIN;
+				ccw->bits |= CCW_IRQ;
+				ccw->bits |= CCW_DEC_SEM;
+				if (flags & DMA_CTRL_ACK)
+					ccw->bits |= CCW_WAIT4END;
+			}
+		}
+	}
+	mxs_chan->desc_count = idx;
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *mxs_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	u32 num_periods = buf_len / period_len;
+	u32 i = 0, buf = 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS)
+		return NULL;
+
+	mxs_chan->status = DMA_IN_PROGRESS;
+	mxs_chan->flags |= MXS_DMA_SG_LOOP;
+	mxs_chan->flags |= MXS_DMA_USE_SEMAPHORE;
+
+	if (num_periods > NUM_CCW) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum number of sg exceeded: %d > %d\n",
+				num_periods, NUM_CCW);
+		goto err_out;
+	}
+
+	if (period_len > MAX_XFER_BYTES) {
+		dev_err(mxs_dma->dma_device.dev,
+				"maximum period size exceeded: %zu > %d\n",
+				period_len, MAX_XFER_BYTES);
+		goto err_out;
+	}
+
+	while (buf < buf_len) {
+		struct mxs_dma_ccw *ccw = &mxs_chan->ccw[i];
+
+		if (i + 1 == num_periods)
+			ccw->next = mxs_chan->ccw_phys;
+		else
+			ccw->next = mxs_chan->ccw_phys + sizeof(*ccw) * (i + 1);
+
+		ccw->bufaddr = dma_addr;
+		ccw->xfer_bytes = period_len;
+
+		ccw->bits = 0;
+		ccw->bits |= CCW_CHAIN;
+		ccw->bits |= CCW_IRQ;
+		ccw->bits |= CCW_HALT_ON_TERM;
+		ccw->bits |= CCW_TERM_FLUSH;
+		ccw->bits |= CCW_DEC_SEM;
+		ccw->bits |= BF_CCW(direction == DMA_DEV_TO_MEM ?
+				MXS_DMA_CMD_WRITE : MXS_DMA_CMD_READ, COMMAND);
+
+		dma_addr += period_len;
+		buf += period_len;
+
+		i++;
+	}
+	mxs_chan->desc_count = i;
+
+	return &mxs_chan->desc;
+
+err_out:
+	mxs_chan->status = DMA_ERROR;
+	return NULL;
+}
+
+static int mxs_dma_terminate_all(struct dma_chan *chan)
+{
+	mxs_dma_reset_chan(chan);
+	mxs_dma_disable_chan(chan);
+
+	return 0;
+}
+
+static enum dma_status mxs_dma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	u32 residue = 0;
+
+	if (mxs_chan->status == DMA_IN_PROGRESS &&
+			mxs_chan->flags & MXS_DMA_SG_LOOP) {
+		struct mxs_dma_ccw *last_ccw;
+		u32 bar;
+
+		last_ccw = &mxs_chan->ccw[mxs_chan->desc_count - 1];
+		residue = last_ccw->xfer_bytes + last_ccw->bufaddr;
+
+		bar = readl(mxs_dma->base +
+				HW_APBHX_CHn_BAR(mxs_dma, chan->chan_id));
+		residue -= bar;
+	}
+
+	dma_set_tx_state(txstate, chan->completed_cookie, chan->cookie,
+			residue);
+
+	return mxs_chan->status;
+}
+
+static int __init mxs_dma_init(struct mxs_dma_engine *mxs_dma)
+{
+	int ret;
+
+	ret = clk_prepare_enable(mxs_dma->clk);
+	if (ret)
+		return ret;
+
+	ret = stmp_reset_block(mxs_dma->base);
+	if (ret)
+		goto err_out;
+
+	/* enable apbh burst */
+	if (dma_is_apbh(mxs_dma)) {
+		writel(BM_APBH_CTRL0_APB_BURST_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+		writel(BM_APBH_CTRL0_APB_BURST8_EN,
+			mxs_dma->base + HW_APBHX_CTRL0 + STMP_OFFSET_REG_SET);
+	}
+
+	/* enable irq for all the channels */
+	writel(MXS_DMA_CHANNELS_MASK << MXS_DMA_CHANNELS,
+		mxs_dma->base + HW_APBHX_CTRL1 + STMP_OFFSET_REG_SET);
+
+err_out:
+	clk_disable_unprepare(mxs_dma->clk);
+	return ret;
+}
+
+struct mxs_dma_filter_param {
+	struct device_node *of_node;
+	unsigned int chan_id;
+};
+
+static bool mxs_dma_filter_fn(struct dma_chan *chan, void *fn_param)
+{
+	struct mxs_dma_filter_param *param = fn_param;
+	struct mxs_dma_chan *mxs_chan = to_mxs_dma_chan(chan);
+	struct mxs_dma_engine *mxs_dma = mxs_chan->mxs_dma;
+	int chan_irq;
+
+	if (mxs_dma->dma_device.dev->of_node != param->of_node)
+		return false;
+
+	if (chan->chan_id != param->chan_id)
+		return false;
+
+	chan_irq = platform_get_irq(mxs_dma->pdev, param->chan_id);
+	if (chan_irq < 0)
+		return false;
+
+	mxs_chan->chan_irq = chan_irq;
+
+	return true;
+}
+
+static struct dma_chan *mxs_dma_xlate(struct of_phandle_args *dma_spec,
+			       struct of_dma *ofdma)
+{
+	struct mxs_dma_engine *mxs_dma = ofdma->of_dma_data;
+	dma_cap_mask_t mask = mxs_dma->dma_device.cap_mask;
+	struct mxs_dma_filter_param param;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	param.of_node = ofdma->of_node;
+	param.chan_id = dma_spec->args[0];
+
+	if (param.chan_id >= mxs_dma->nr_channels)
+		return NULL;
+
+	return dma_request_channel(mask, mxs_dma_filter_fn, &param);
+}
+
+static int __init mxs_dma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const struct platform_device_id *id_entry;
+	const struct of_device_id *of_id;
+	const struct mxs_dma_type *dma_type;
+	struct mxs_dma_engine *mxs_dma;
+	struct resource *iores;
+	int ret, i;
+
+	mxs_dma = devm_kzalloc(&pdev->dev, sizeof(*mxs_dma), GFP_KERNEL);
+	if (!mxs_dma)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "dma-channels", &mxs_dma->nr_channels);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to read dma-channels\n");
+		return ret;
+	}
+
+	of_id = of_match_device(mxs_dma_dt_ids, &pdev->dev);
+	if (of_id)
+		id_entry = of_id->data;
+	else
+		id_entry = platform_get_device_id(pdev);
+
+	dma_type = (struct mxs_dma_type *)id_entry->driver_data;
+	mxs_dma->type = dma_type->type;
+	mxs_dma->dev_id = dma_type->id;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mxs_dma->base = devm_ioremap_resource(&pdev->dev, iores);
+	if (IS_ERR(mxs_dma->base))
+		return PTR_ERR(mxs_dma->base);
+
+	mxs_dma->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(mxs_dma->clk))
+		return PTR_ERR(mxs_dma->clk);
+
+	dma_cap_set(DMA_SLAVE, mxs_dma->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mxs_dma->dma_device.cap_mask);
+
+	INIT_LIST_HEAD(&mxs_dma->dma_device.channels);
+
+	/* Initialize channel parameters */
+	for (i = 0; i < MXS_DMA_CHANNELS; i++) {
+		struct mxs_dma_chan *mxs_chan = &mxs_dma->mxs_chans[i];
+
+		mxs_chan->mxs_dma = mxs_dma;
+		mxs_chan->chan.device = &mxs_dma->dma_device;
+		dma_cookie_init(&mxs_chan->chan);
+
+		tasklet_init(&mxs_chan->tasklet, mxs_dma_tasklet,
+			     (unsigned long) mxs_chan);
+
+
+		/* Add the channel to mxs_chan list */
+		list_add_tail(&mxs_chan->chan.device_node,
+			&mxs_dma->dma_device.channels);
+	}
+
+	ret = mxs_dma_init(mxs_dma);
+	if (ret)
+		return ret;
+
+	mxs_dma->pdev = pdev;
+	mxs_dma->dma_device.dev = &pdev->dev;
+
+	/* mxs_dma gets 65535 bytes maximum sg size */
+	mxs_dma->dma_device.dev->dma_parms = &mxs_dma->dma_parms;
+	dma_set_max_seg_size(mxs_dma->dma_device.dev, MAX_XFER_BYTES);
+
+	mxs_dma->dma_device.device_alloc_chan_resources = mxs_dma_alloc_chan_resources;
+	mxs_dma->dma_device.device_free_chan_resources = mxs_dma_free_chan_resources;
+	mxs_dma->dma_device.device_tx_status = mxs_dma_tx_status;
+	mxs_dma->dma_device.device_prep_slave_sg = mxs_dma_prep_slave_sg;
+	mxs_dma->dma_device.device_prep_dma_cyclic = mxs_dma_prep_dma_cyclic;
+	mxs_dma->dma_device.device_pause = mxs_dma_pause_chan;
+	mxs_dma->dma_device.device_resume = mxs_dma_resume_chan;
+	mxs_dma->dma_device.device_terminate_all = mxs_dma_terminate_all;
+	mxs_dma->dma_device.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	mxs_dma->dma_device.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	mxs_dma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	mxs_dma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	mxs_dma->dma_device.device_issue_pending = mxs_dma_enable_chan;
+
+	ret = dma_async_device_register(&mxs_dma->dma_device);
+	if (ret) {
+		dev_err(mxs_dma->dma_device.dev, "unable to register\n");
+		return ret;
+	}
+
+	ret = of_dma_controller_register(np, mxs_dma_xlate, mxs_dma);
+	if (ret) {
+		dev_err(mxs_dma->dma_device.dev,
+			"failed to register controller\n");
+		dma_async_device_unregister(&mxs_dma->dma_device);
+	}
+
+	dev_info(mxs_dma->dma_device.dev, "initialized\n");
+
+	return 0;
+}
+
+static struct platform_driver mxs_dma_driver = {
+	.driver		= {
+		.name	= "mxs-dma",
+		.of_match_table = mxs_dma_dt_ids,
+	},
+	.id_table	= mxs_dma_ids,
+};
+
+static int __init mxs_dma_module_init(void)
+{
+	return platform_driver_probe(&mxs_dma_driver, mxs_dma_probe);
+}
+subsys_initcall(mxs_dma_module_init);
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
new file mode 100644
index 0000000..8c7b2e8
--- /dev/null
+++ b/drivers/dma/nbpfaxi.c
@@ -0,0 +1,1528 @@
+/*
+ * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/dma/nbpfaxi.h>
+
+#include "dmaengine.h"
+
+#define NBPF_REG_CHAN_OFFSET	0
+#define NBPF_REG_CHAN_SIZE	0x40
+
+/* Channel Current Transaction Byte register */
+#define NBPF_CHAN_CUR_TR_BYTE	0x20
+
+/* Channel Status register */
+#define NBPF_CHAN_STAT	0x24
+#define NBPF_CHAN_STAT_EN	1
+#define NBPF_CHAN_STAT_TACT	4
+#define NBPF_CHAN_STAT_ERR	0x10
+#define NBPF_CHAN_STAT_END	0x20
+#define NBPF_CHAN_STAT_TC	0x40
+#define NBPF_CHAN_STAT_DER	0x400
+
+/* Channel Control register */
+#define NBPF_CHAN_CTRL	0x28
+#define NBPF_CHAN_CTRL_SETEN	1
+#define NBPF_CHAN_CTRL_CLREN	2
+#define NBPF_CHAN_CTRL_STG	4
+#define NBPF_CHAN_CTRL_SWRST	8
+#define NBPF_CHAN_CTRL_CLRRQ	0x10
+#define NBPF_CHAN_CTRL_CLREND	0x20
+#define NBPF_CHAN_CTRL_CLRTC	0x40
+#define NBPF_CHAN_CTRL_SETSUS	0x100
+#define NBPF_CHAN_CTRL_CLRSUS	0x200
+
+/* Channel Configuration register */
+#define NBPF_CHAN_CFG	0x2c
+#define NBPF_CHAN_CFG_SEL	7		/* terminal SELect: 0..7 */
+#define NBPF_CHAN_CFG_REQD	8		/* REQuest Direction: DMAREQ is 0: input, 1: output */
+#define NBPF_CHAN_CFG_LOEN	0x10		/* LOw ENable: low DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_HIEN	0x20		/* HIgh ENable: high DMA request line is: 0: inactive, 1: active */
+#define NBPF_CHAN_CFG_LVL	0x40		/* LeVeL: DMA request line is sensed as 0: edge, 1: level */
+#define NBPF_CHAN_CFG_AM	0x700		/* ACK Mode: 0: Pulse mode, 1: Level mode, b'1x: Bus Cycle */
+#define NBPF_CHAN_CFG_SDS	0xf000		/* Source Data Size: 0: 8 bits,... , 7: 1024 bits */
+#define NBPF_CHAN_CFG_DDS	0xf0000		/* Destination Data Size: as above */
+#define NBPF_CHAN_CFG_SAD	0x100000	/* Source ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_DAD	0x200000	/* Destination ADdress counting: 0: increment, 1: fixed */
+#define NBPF_CHAN_CFG_TM	0x400000	/* Transfer Mode: 0: single, 1: block TM */
+#define NBPF_CHAN_CFG_DEM	0x1000000	/* DMAEND interrupt Mask */
+#define NBPF_CHAN_CFG_TCM	0x2000000	/* DMATCO interrupt Mask */
+#define NBPF_CHAN_CFG_SBE	0x8000000	/* Sweep Buffer Enable */
+#define NBPF_CHAN_CFG_RSEL	0x10000000	/* RM: Register Set sELect */
+#define NBPF_CHAN_CFG_RSW	0x20000000	/* RM: Register Select sWitch */
+#define NBPF_CHAN_CFG_REN	0x40000000	/* RM: Register Set Enable */
+#define NBPF_CHAN_CFG_DMS	0x80000000	/* 0: register mode (RM), 1: link mode (LM) */
+
+#define NBPF_CHAN_NXLA	0x38
+#define NBPF_CHAN_CRLA	0x3c
+
+/* Link Header field */
+#define NBPF_HEADER_LV	1
+#define NBPF_HEADER_LE	2
+#define NBPF_HEADER_WBD	4
+#define NBPF_HEADER_DIM	8
+
+#define NBPF_CTRL	0x300
+#define NBPF_CTRL_PR	1		/* 0: fixed priority, 1: round robin */
+#define NBPF_CTRL_LVINT	2		/* DMAEND and DMAERR signalling: 0: pulse, 1: level */
+
+#define NBPF_DSTAT_ER	0x314
+#define NBPF_DSTAT_END	0x318
+
+#define NBPF_DMA_BUSWIDTHS \
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	 BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+struct nbpf_config {
+	int num_channels;
+	int buffer_size;
+};
+
+/*
+ * We've got 3 types of objects, used to describe DMA transfers:
+ * 1. high-level descriptor, containing a struct dma_async_tx_descriptor object
+ *	in it, used to communicate with the user
+ * 2. hardware DMA link descriptors, that we pass to DMAC for DMA transfer
+ *	queuing, these must be DMAable, using either the streaming DMA API or
+ *	allocated from coherent memory - one per SG segment
+ * 3. one per SG segment descriptors, used to manage HW link descriptors from
+ *	(2). They do not have to be DMAable. They can either be (a) allocated
+ *	together with link descriptors as mixed (DMA / CPU) objects, or (b)
+ *	separately. Even if allocated separately it would be best to link them
+ *	to link descriptors once during channel resource allocation and always
+ *	use them as a single object.
+ * Therefore for both cases (a) and (b) at run-time objects (2) and (3) shall be
+ * treated as a single SG segment descriptor.
+ */
+
+struct nbpf_link_reg {
+	u32	header;
+	u32	src_addr;
+	u32	dst_addr;
+	u32	transaction_size;
+	u32	config;
+	u32	interval;
+	u32	extension;
+	u32	next;
+} __packed;
+
+struct nbpf_device;
+struct nbpf_channel;
+struct nbpf_desc;
+
+struct nbpf_link_desc {
+	struct nbpf_link_reg *hwdesc;
+	dma_addr_t hwdesc_dma_addr;
+	struct nbpf_desc *desc;
+	struct list_head node;
+};
+
+/**
+ * struct nbpf_desc - DMA transfer descriptor
+ * @async_tx:	dmaengine object
+ * @user_wait:	waiting for a user ack
+ * @length:	total transfer length
+ * @sg:		list of hardware descriptors, represented by struct nbpf_link_desc
+ * @node:	member in channel descriptor lists
+ */
+struct nbpf_desc {
+	struct dma_async_tx_descriptor async_tx;
+	bool user_wait;
+	size_t length;
+	struct nbpf_channel *chan;
+	struct list_head sg;
+	struct list_head node;
+};
+
+/* Take a wild guess: allocate 4 segments per descriptor */
+#define NBPF_SEGMENTS_PER_DESC 4
+#define NBPF_DESCS_PER_PAGE ((PAGE_SIZE - sizeof(struct list_head)) /	\
+	(sizeof(struct nbpf_desc) +					\
+	 NBPF_SEGMENTS_PER_DESC *					\
+	 (sizeof(struct nbpf_link_desc) + sizeof(struct nbpf_link_reg))))
+#define NBPF_SEGMENTS_PER_PAGE (NBPF_SEGMENTS_PER_DESC * NBPF_DESCS_PER_PAGE)
+
+struct nbpf_desc_page {
+	struct list_head node;
+	struct nbpf_desc desc[NBPF_DESCS_PER_PAGE];
+	struct nbpf_link_desc ldesc[NBPF_SEGMENTS_PER_PAGE];
+	struct nbpf_link_reg hwdesc[NBPF_SEGMENTS_PER_PAGE];
+};
+
+/**
+ * struct nbpf_channel - one DMAC channel
+ * @dma_chan:	standard dmaengine channel object
+ * @base:	register address base
+ * @nbpf:	DMAC
+ * @name:	IRQ name
+ * @irq:	IRQ number
+ * @slave_addr:	address for slave DMA
+ * @slave_width:slave data size in bytes
+ * @slave_burst:maximum slave burst size in bytes
+ * @terminal:	DMA terminal, assigned to this channel
+ * @dmarq_cfg:	DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG
+ * @flags:	configuration flags from DT
+ * @lock:	protect descriptor lists
+ * @free_links:	list of free link descriptors
+ * @free:	list of free descriptors
+ * @queued:	list of queued descriptors
+ * @active:	list of descriptors, scheduled for processing
+ * @done:	list of completed descriptors, waiting post-processing
+ * @desc_page:	list of additionally allocated descriptor pages - if any
+ */
+struct nbpf_channel {
+	struct dma_chan dma_chan;
+	struct tasklet_struct tasklet;
+	void __iomem *base;
+	struct nbpf_device *nbpf;
+	char name[16];
+	int irq;
+	dma_addr_t slave_src_addr;
+	size_t slave_src_width;
+	size_t slave_src_burst;
+	dma_addr_t slave_dst_addr;
+	size_t slave_dst_width;
+	size_t slave_dst_burst;
+	unsigned int terminal;
+	u32 dmarq_cfg;
+	unsigned long flags;
+	spinlock_t lock;
+	struct list_head free_links;
+	struct list_head free;
+	struct list_head queued;
+	struct list_head active;
+	struct list_head done;
+	struct list_head desc_page;
+	struct nbpf_desc *running;
+	bool paused;
+};
+
+struct nbpf_device {
+	struct dma_device dma_dev;
+	void __iomem *base;
+	u32 max_burst_mem_read;
+	u32 max_burst_mem_write;
+	struct clk *clk;
+	const struct nbpf_config *config;
+	unsigned int eirq;
+	struct nbpf_channel chan[];
+};
+
+enum nbpf_model {
+	NBPF1B4,
+	NBPF1B8,
+	NBPF1B16,
+	NBPF4B4,
+	NBPF4B8,
+	NBPF4B16,
+	NBPF8B4,
+	NBPF8B8,
+	NBPF8B16,
+};
+
+static struct nbpf_config nbpf_cfg[] = {
+	[NBPF1B4] = {
+		.num_channels = 1,
+		.buffer_size = 4,
+	},
+	[NBPF1B8] = {
+		.num_channels = 1,
+		.buffer_size = 8,
+	},
+	[NBPF1B16] = {
+		.num_channels = 1,
+		.buffer_size = 16,
+	},
+	[NBPF4B4] = {
+		.num_channels = 4,
+		.buffer_size = 4,
+	},
+	[NBPF4B8] = {
+		.num_channels = 4,
+		.buffer_size = 8,
+	},
+	[NBPF4B16] = {
+		.num_channels = 4,
+		.buffer_size = 16,
+	},
+	[NBPF8B4] = {
+		.num_channels = 8,
+		.buffer_size = 4,
+	},
+	[NBPF8B8] = {
+		.num_channels = 8,
+		.buffer_size = 8,
+	},
+	[NBPF8B16] = {
+		.num_channels = 8,
+		.buffer_size = 16,
+	},
+};
+
+#define nbpf_to_chan(d) container_of(d, struct nbpf_channel, dma_chan)
+
+/*
+ * dmaengine drivers seem to have a lot in common and instead of sharing more
+ * code, they reimplement those common algorithms independently. In this driver
+ * we try to separate the hardware-specific part from the (largely) generic
+ * part. This improves code readability and makes it possible in the future to
+ * reuse the generic code in form of a helper library. That generic code should
+ * be suitable for various DMA controllers, using transfer descriptors in RAM
+ * and pushing one SG list at a time to the DMA controller.
+ */
+
+/*		Hardware-specific part		*/
+
+static inline u32 nbpf_chan_read(struct nbpf_channel *chan,
+				 unsigned int offset)
+{
+	u32 data = ioread32(chan->base + offset);
+	dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, chan->base, offset, data);
+	return data;
+}
+
+static inline void nbpf_chan_write(struct nbpf_channel *chan,
+				   unsigned int offset, u32 data)
+{
+	iowrite32(data, chan->base + offset);
+	dev_dbg(chan->dma_chan.device->dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, chan->base, offset, data);
+}
+
+static inline u32 nbpf_read(struct nbpf_device *nbpf,
+			    unsigned int offset)
+{
+	u32 data = ioread32(nbpf->base + offset);
+	dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, nbpf->base, offset, data);
+	return data;
+}
+
+static inline void nbpf_write(struct nbpf_device *nbpf,
+			      unsigned int offset, u32 data)
+{
+	iowrite32(data, nbpf->base + offset);
+	dev_dbg(nbpf->dma_dev.dev, "%s(0x%p + 0x%x) = 0x%x\n",
+		__func__, nbpf->base, offset, data);
+}
+
+static void nbpf_chan_halt(struct nbpf_channel *chan)
+{
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+}
+
+static bool nbpf_status_get(struct nbpf_channel *chan)
+{
+	u32 status = nbpf_read(chan->nbpf, NBPF_DSTAT_END);
+
+	return status & BIT(chan - chan->nbpf->chan);
+}
+
+static void nbpf_status_ack(struct nbpf_channel *chan)
+{
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREND);
+}
+
+static u32 nbpf_error_get(struct nbpf_device *nbpf)
+{
+	return nbpf_read(nbpf, NBPF_DSTAT_ER);
+}
+
+static struct nbpf_channel *nbpf_error_get_channel(struct nbpf_device *nbpf, u32 error)
+{
+	return nbpf->chan + __ffs(error);
+}
+
+static void nbpf_error_clear(struct nbpf_channel *chan)
+{
+	u32 status;
+	int i;
+
+	/* Stop the channel, make sure DMA has been aborted */
+	nbpf_chan_halt(chan);
+
+	for (i = 1000; i; i--) {
+		status = nbpf_chan_read(chan, NBPF_CHAN_STAT);
+		if (!(status & NBPF_CHAN_STAT_TACT))
+			break;
+		cpu_relax();
+	}
+
+	if (!i)
+		dev_err(chan->dma_chan.device->dev,
+			"%s(): abort timeout, channel status 0x%x\n", __func__, status);
+
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SWRST);
+}
+
+static int nbpf_start(struct nbpf_desc *desc)
+{
+	struct nbpf_channel *chan = desc->chan;
+	struct nbpf_link_desc *ldesc = list_first_entry(&desc->sg, struct nbpf_link_desc, node);
+
+	nbpf_chan_write(chan, NBPF_CHAN_NXLA, (u32)ldesc->hwdesc_dma_addr);
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETEN | NBPF_CHAN_CTRL_CLRSUS);
+	chan->paused = false;
+
+	/* Software trigger MEMCPY - only MEMCPY uses the block mode */
+	if (ldesc->hwdesc->config & NBPF_CHAN_CFG_TM)
+		nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_STG);
+
+	dev_dbg(chan->nbpf->dma_dev.dev, "%s(): next 0x%x, cur 0x%x\n", __func__,
+		nbpf_chan_read(chan, NBPF_CHAN_NXLA), nbpf_chan_read(chan, NBPF_CHAN_CRLA));
+
+	return 0;
+}
+
+static void nbpf_chan_prepare(struct nbpf_channel *chan)
+{
+	chan->dmarq_cfg = (chan->flags & NBPF_SLAVE_RQ_HIGH ? NBPF_CHAN_CFG_HIEN : 0) |
+		(chan->flags & NBPF_SLAVE_RQ_LOW ? NBPF_CHAN_CFG_LOEN : 0) |
+		(chan->flags & NBPF_SLAVE_RQ_LEVEL ?
+		 NBPF_CHAN_CFG_LVL | (NBPF_CHAN_CFG_AM & 0x200) : 0) |
+		chan->terminal;
+}
+
+static void nbpf_chan_prepare_default(struct nbpf_channel *chan)
+{
+	/* Don't output DMAACK */
+	chan->dmarq_cfg = NBPF_CHAN_CFG_AM & 0x400;
+	chan->terminal = 0;
+	chan->flags = 0;
+}
+
+static void nbpf_chan_configure(struct nbpf_channel *chan)
+{
+	/*
+	 * We assume, that only the link mode and DMA request line configuration
+	 * have to be set in the configuration register manually. Dynamic
+	 * per-transfer configuration will be loaded from transfer descriptors.
+	 */
+	nbpf_chan_write(chan, NBPF_CHAN_CFG, NBPF_CHAN_CFG_DMS | chan->dmarq_cfg);
+}
+
+static u32 nbpf_xfer_ds(struct nbpf_device *nbpf, size_t size,
+			enum dma_transfer_direction direction)
+{
+	int max_burst = nbpf->config->buffer_size * 8;
+
+	if (nbpf->max_burst_mem_read || nbpf->max_burst_mem_write) {
+		switch (direction) {
+		case DMA_MEM_TO_MEM:
+			max_burst = min_not_zero(nbpf->max_burst_mem_read,
+						 nbpf->max_burst_mem_write);
+			break;
+		case DMA_MEM_TO_DEV:
+			if (nbpf->max_burst_mem_read)
+				max_burst = nbpf->max_burst_mem_read;
+			break;
+		case DMA_DEV_TO_MEM:
+			if (nbpf->max_burst_mem_write)
+				max_burst = nbpf->max_burst_mem_write;
+			break;
+		case DMA_DEV_TO_DEV:
+		default:
+			break;
+		}
+	}
+
+	/* Maximum supported bursts depend on the buffer size */
+	return min_t(int, __ffs(size), ilog2(max_burst));
+}
+
+static size_t nbpf_xfer_size(struct nbpf_device *nbpf,
+			     enum dma_slave_buswidth width, u32 burst)
+{
+	size_t size;
+
+	if (!burst)
+		burst = 1;
+
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		size = 8 * burst;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		size = 4 * burst;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		size = 2 * burst;
+		break;
+
+	default:
+		pr_warn("%s(): invalid bus width %u\n", __func__, width);
+		/* fall through */
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		size = burst;
+	}
+
+	return nbpf_xfer_ds(nbpf, size, DMA_TRANS_NONE);
+}
+
+/*
+ * We need a way to recognise slaves, whose data is sent "raw" over the bus,
+ * i.e. it isn't known in advance how many bytes will be received. Therefore
+ * the slave driver has to provide a "large enough" buffer and either read the
+ * buffer, when it is full, or detect, that some data has arrived, then wait for
+ * a timeout, if no more data arrives - receive what's already there. We want to
+ * handle such slaves in a special way to allow an optimised mode for other
+ * users, for whom the amount of data is known in advance. So far there's no way
+ * to recognise such slaves. We use a data-width check to distinguish between
+ * the SD host and the PL011 UART.
+ */
+
+static int nbpf_prep_one(struct nbpf_link_desc *ldesc,
+			 enum dma_transfer_direction direction,
+			 dma_addr_t src, dma_addr_t dst, size_t size, bool last)
+{
+	struct nbpf_link_reg *hwdesc = ldesc->hwdesc;
+	struct nbpf_desc *desc = ldesc->desc;
+	struct nbpf_channel *chan = desc->chan;
+	struct device *dev = chan->dma_chan.device->dev;
+	size_t mem_xfer, slave_xfer;
+	bool can_burst;
+
+	hwdesc->header = NBPF_HEADER_WBD | NBPF_HEADER_LV |
+		(last ? NBPF_HEADER_LE : 0);
+
+	hwdesc->src_addr = src;
+	hwdesc->dst_addr = dst;
+	hwdesc->transaction_size = size;
+
+	/*
+	 * set config: SAD, DAD, DDS, SDS, etc.
+	 * Note on transfer sizes: the DMAC can perform unaligned DMA transfers,
+	 * but it is important to have transaction size a multiple of both
+	 * receiver and transmitter transfer sizes. It is also possible to use
+	 * different RAM and device transfer sizes, and it does work well with
+	 * some devices, e.g. with V08R07S01E SD host controllers, which can use
+	 * 128 byte transfers. But this doesn't work with other devices,
+	 * especially when the transaction size is unknown. This is the case,
+	 * e.g. with serial drivers like amba-pl011.c. For reception it sets up
+	 * the transaction size of 4K and if fewer bytes are received, it
+	 * pauses DMA and reads out data received via DMA as well as those left
+	 * in the Rx FIFO. For this to work with the RAM side using burst
+	 * transfers we enable the SBE bit and terminate the transfer in our
+	 * .device_pause handler.
+	 */
+	mem_xfer = nbpf_xfer_ds(chan->nbpf, size, direction);
+
+	switch (direction) {
+	case DMA_DEV_TO_MEM:
+		can_burst = chan->slave_src_width >= 3;
+		slave_xfer = min(mem_xfer, can_burst ?
+				 chan->slave_src_burst : chan->slave_src_width);
+		/*
+		 * Is the slave narrower than 64 bits, i.e. isn't using the full
+		 * bus width and cannot use bursts?
+		 */
+		if (mem_xfer > chan->slave_src_burst && !can_burst)
+			mem_xfer = chan->slave_src_burst;
+		/* Device-to-RAM DMA is unreliable without REQD set */
+		hwdesc->config = NBPF_CHAN_CFG_SAD | (NBPF_CHAN_CFG_DDS & (mem_xfer << 16)) |
+			(NBPF_CHAN_CFG_SDS & (slave_xfer << 12)) | NBPF_CHAN_CFG_REQD |
+			NBPF_CHAN_CFG_SBE;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		slave_xfer = min(mem_xfer, chan->slave_dst_width >= 3 ?
+				 chan->slave_dst_burst : chan->slave_dst_width);
+		hwdesc->config = NBPF_CHAN_CFG_DAD | (NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+			(NBPF_CHAN_CFG_DDS & (slave_xfer << 16)) | NBPF_CHAN_CFG_REQD;
+		break;
+
+	case DMA_MEM_TO_MEM:
+		hwdesc->config = NBPF_CHAN_CFG_TCM | NBPF_CHAN_CFG_TM |
+			(NBPF_CHAN_CFG_SDS & (mem_xfer << 12)) |
+			(NBPF_CHAN_CFG_DDS & (mem_xfer << 16));
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	hwdesc->config |= chan->dmarq_cfg | (last ? 0 : NBPF_CHAN_CFG_DEM) |
+		NBPF_CHAN_CFG_DMS;
+
+	dev_dbg(dev, "%s(): desc @ %pad: hdr 0x%x, cfg 0x%x, %zu @ %pad -> %pad\n",
+		__func__, &ldesc->hwdesc_dma_addr, hwdesc->header,
+		hwdesc->config, size, &src, &dst);
+
+	dma_sync_single_for_device(dev, ldesc->hwdesc_dma_addr, sizeof(*hwdesc),
+				   DMA_TO_DEVICE);
+
+	return 0;
+}
+
+static size_t nbpf_bytes_left(struct nbpf_channel *chan)
+{
+	return nbpf_chan_read(chan, NBPF_CHAN_CUR_TR_BYTE);
+}
+
+static void nbpf_configure(struct nbpf_device *nbpf)
+{
+	nbpf_write(nbpf, NBPF_CTRL, NBPF_CTRL_LVINT);
+}
+
+/*		Generic part			*/
+
+/* DMA ENGINE functions */
+static void nbpf_issue_pending(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	unsigned long flags;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (list_empty(&chan->queued))
+		goto unlock;
+
+	list_splice_tail_init(&chan->queued, &chan->active);
+
+	if (!chan->running) {
+		struct nbpf_desc *desc = list_first_entry(&chan->active,
+						struct nbpf_desc, node);
+		if (!nbpf_start(desc))
+			chan->running = desc;
+	}
+
+unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static enum dma_status nbpf_tx_status(struct dma_chan *dchan,
+		dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	enum dma_status status = dma_cookie_status(dchan, cookie, state);
+
+	if (state) {
+		dma_cookie_t running;
+		unsigned long flags;
+
+		spin_lock_irqsave(&chan->lock, flags);
+		running = chan->running ? chan->running->async_tx.cookie : -EINVAL;
+
+		if (cookie == running) {
+			state->residue = nbpf_bytes_left(chan);
+			dev_dbg(dchan->device->dev, "%s(): residue %u\n", __func__,
+				state->residue);
+		} else if (status == DMA_IN_PROGRESS) {
+			struct nbpf_desc *desc;
+			bool found = false;
+
+			list_for_each_entry(desc, &chan->active, node)
+				if (desc->async_tx.cookie == cookie) {
+					found = true;
+					break;
+				}
+
+			if (!found)
+				list_for_each_entry(desc, &chan->queued, node)
+					if (desc->async_tx.cookie == cookie) {
+						found = true;
+						break;
+
+					}
+
+			state->residue = found ? desc->length : 0;
+		}
+
+		spin_unlock_irqrestore(&chan->lock, flags);
+	}
+
+	if (chan->paused)
+		status = DMA_PAUSED;
+
+	return status;
+}
+
+static dma_cookie_t nbpf_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct nbpf_desc *desc = container_of(tx, struct nbpf_desc, async_tx);
+	struct nbpf_channel *chan = desc->chan;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	cookie = dma_cookie_assign(tx);
+	list_add_tail(&desc->node, &chan->queued);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dev_dbg(chan->dma_chan.device->dev, "Entry %s(%d)\n", __func__, cookie);
+
+	return cookie;
+}
+
+static int nbpf_desc_page_alloc(struct nbpf_channel *chan)
+{
+	struct dma_chan *dchan = &chan->dma_chan;
+	struct nbpf_desc_page *dpage = (void *)get_zeroed_page(GFP_KERNEL | GFP_DMA);
+	struct nbpf_link_desc *ldesc;
+	struct nbpf_link_reg *hwdesc;
+	struct nbpf_desc *desc;
+	LIST_HEAD(head);
+	LIST_HEAD(lhead);
+	int i;
+	struct device *dev = dchan->device->dev;
+
+	if (!dpage)
+		return -ENOMEM;
+
+	dev_dbg(dev, "%s(): alloc %lu descriptors, %lu segments, total alloc %zu\n",
+		__func__, NBPF_DESCS_PER_PAGE, NBPF_SEGMENTS_PER_PAGE, sizeof(*dpage));
+
+	for (i = 0, ldesc = dpage->ldesc, hwdesc = dpage->hwdesc;
+	     i < ARRAY_SIZE(dpage->ldesc);
+	     i++, ldesc++, hwdesc++) {
+		ldesc->hwdesc = hwdesc;
+		list_add_tail(&ldesc->node, &lhead);
+		ldesc->hwdesc_dma_addr = dma_map_single(dchan->device->dev,
+					hwdesc, sizeof(*hwdesc), DMA_TO_DEVICE);
+
+		dev_dbg(dev, "%s(): mapped 0x%p to %pad\n", __func__,
+			hwdesc, &ldesc->hwdesc_dma_addr);
+	}
+
+	for (i = 0, desc = dpage->desc;
+	     i < ARRAY_SIZE(dpage->desc);
+	     i++, desc++) {
+		dma_async_tx_descriptor_init(&desc->async_tx, dchan);
+		desc->async_tx.tx_submit = nbpf_tx_submit;
+		desc->chan = chan;
+		INIT_LIST_HEAD(&desc->sg);
+		list_add_tail(&desc->node, &head);
+	}
+
+	/*
+	 * This function cannot be called from interrupt context, so, no need to
+	 * save flags
+	 */
+	spin_lock_irq(&chan->lock);
+	list_splice_tail(&lhead, &chan->free_links);
+	list_splice_tail(&head, &chan->free);
+	list_add(&dpage->node, &chan->desc_page);
+	spin_unlock_irq(&chan->lock);
+
+	return ARRAY_SIZE(dpage->desc);
+}
+
+static void nbpf_desc_put(struct nbpf_desc *desc)
+{
+	struct nbpf_channel *chan = desc->chan;
+	struct nbpf_link_desc *ldesc, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_for_each_entry_safe(ldesc, tmp, &desc->sg, node)
+		list_move(&ldesc->node, &chan->free_links);
+
+	list_add(&desc->node, &chan->free);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void nbpf_scan_acked(struct nbpf_channel *chan)
+{
+	struct nbpf_desc *desc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_for_each_entry_safe(desc, tmp, &chan->done, node)
+		if (async_tx_test_ack(&desc->async_tx) && desc->user_wait) {
+			list_move(&desc->node, &head);
+			desc->user_wait = false;
+		}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, tmp, &head, node) {
+		list_del(&desc->node);
+		nbpf_desc_put(desc);
+	}
+}
+
+/*
+ * We have to allocate descriptors with the channel lock dropped. This means,
+ * before we re-acquire the lock buffers can be taken already, so we have to
+ * re-check after re-acquiring the lock and possibly retry, if buffers are gone
+ * again.
+ */
+static struct nbpf_desc *nbpf_desc_get(struct nbpf_channel *chan, size_t len)
+{
+	struct nbpf_desc *desc = NULL;
+	struct nbpf_link_desc *ldesc, *prev = NULL;
+
+	nbpf_scan_acked(chan);
+
+	spin_lock_irq(&chan->lock);
+
+	do {
+		int i = 0, ret;
+
+		if (list_empty(&chan->free)) {
+			/* No more free descriptors */
+			spin_unlock_irq(&chan->lock);
+			ret = nbpf_desc_page_alloc(chan);
+			if (ret < 0)
+				return NULL;
+			spin_lock_irq(&chan->lock);
+			continue;
+		}
+		desc = list_first_entry(&chan->free, struct nbpf_desc, node);
+		list_del(&desc->node);
+
+		do {
+			if (list_empty(&chan->free_links)) {
+				/* No more free link descriptors */
+				spin_unlock_irq(&chan->lock);
+				ret = nbpf_desc_page_alloc(chan);
+				if (ret < 0) {
+					nbpf_desc_put(desc);
+					return NULL;
+				}
+				spin_lock_irq(&chan->lock);
+				continue;
+			}
+
+			ldesc = list_first_entry(&chan->free_links,
+						 struct nbpf_link_desc, node);
+			ldesc->desc = desc;
+			if (prev)
+				prev->hwdesc->next = (u32)ldesc->hwdesc_dma_addr;
+
+			prev = ldesc;
+			list_move_tail(&ldesc->node, &desc->sg);
+
+			i++;
+		} while (i < len);
+	} while (!desc);
+
+	prev->hwdesc->next = 0;
+
+	spin_unlock_irq(&chan->lock);
+
+	return desc;
+}
+
+static void nbpf_chan_idle(struct nbpf_channel *chan)
+{
+	struct nbpf_desc *desc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_splice_init(&chan->done, &head);
+	list_splice_init(&chan->active, &head);
+	list_splice_init(&chan->queued, &head);
+
+	chan->running = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, tmp, &head, node) {
+		dev_dbg(chan->nbpf->dma_dev.dev, "%s(): force-free desc %p cookie %d\n",
+			__func__, desc, desc->async_tx.cookie);
+		list_del(&desc->node);
+		nbpf_desc_put(desc);
+	}
+}
+
+static int nbpf_pause(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+	dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+
+	chan->paused = true;
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_SETSUS);
+	/* See comment in nbpf_prep_one() */
+	nbpf_chan_write(chan, NBPF_CHAN_CTRL, NBPF_CHAN_CTRL_CLREN);
+
+	return 0;
+}
+
+static int nbpf_terminate_all(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+	dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+	dev_dbg(dchan->device->dev, "Terminating\n");
+
+	nbpf_chan_halt(chan);
+	nbpf_chan_idle(chan);
+
+	return 0;
+}
+
+static int nbpf_config(struct dma_chan *dchan,
+		       struct dma_slave_config *config)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+
+	dev_dbg(dchan->device->dev, "Entry %s\n", __func__);
+
+	/*
+	 * We could check config->slave_id to match chan->terminal here,
+	 * but with DT they would be coming from the same source, so
+	 * such a check would be superflous
+	 */
+
+	chan->slave_dst_addr = config->dst_addr;
+	chan->slave_dst_width = nbpf_xfer_size(chan->nbpf,
+					       config->dst_addr_width, 1);
+	chan->slave_dst_burst = nbpf_xfer_size(chan->nbpf,
+					       config->dst_addr_width,
+					       config->dst_maxburst);
+	chan->slave_src_addr = config->src_addr;
+	chan->slave_src_width = nbpf_xfer_size(chan->nbpf,
+					       config->src_addr_width, 1);
+	chan->slave_src_burst = nbpf_xfer_size(chan->nbpf,
+					       config->src_addr_width,
+					       config->src_maxburst);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_sg(struct nbpf_channel *chan,
+		struct scatterlist *src_sg, struct scatterlist *dst_sg,
+		size_t len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct nbpf_link_desc *ldesc;
+	struct scatterlist *mem_sg;
+	struct nbpf_desc *desc;
+	bool inc_src, inc_dst;
+	size_t data_len = 0;
+	int i = 0;
+
+	switch (direction) {
+	case DMA_DEV_TO_MEM:
+		mem_sg = dst_sg;
+		inc_src = false;
+		inc_dst = true;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		mem_sg = src_sg;
+		inc_src = true;
+		inc_dst = false;
+		break;
+
+	default:
+	case DMA_MEM_TO_MEM:
+		mem_sg = src_sg;
+		inc_src = true;
+		inc_dst = true;
+	}
+
+	desc = nbpf_desc_get(chan, len);
+	if (!desc)
+		return NULL;
+
+	desc->async_tx.flags = flags;
+	desc->async_tx.cookie = -EBUSY;
+	desc->user_wait = false;
+
+	/*
+	 * This is a private descriptor list, and we own the descriptor. No need
+	 * to lock.
+	 */
+	list_for_each_entry(ldesc, &desc->sg, node) {
+		int ret = nbpf_prep_one(ldesc, direction,
+					sg_dma_address(src_sg),
+					sg_dma_address(dst_sg),
+					sg_dma_len(mem_sg),
+					i == len - 1);
+		if (ret < 0) {
+			nbpf_desc_put(desc);
+			return NULL;
+		}
+		data_len += sg_dma_len(mem_sg);
+		if (inc_src)
+			src_sg = sg_next(src_sg);
+		if (inc_dst)
+			dst_sg = sg_next(dst_sg);
+		mem_sg = direction == DMA_DEV_TO_MEM ? dst_sg : src_sg;
+		i++;
+	}
+
+	desc->length = data_len;
+
+	/* The user has to return the descriptor to us ASAP via .tx_submit() */
+	return &desc->async_tx;
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_memcpy(
+	struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
+
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
+
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
+
+	sg_dma_len(&dst_sg) = len;
+	sg_dma_len(&src_sg) = len;
+
+	dev_dbg(dchan->device->dev, "%s(): %zu @ %pad -> %pad\n",
+		__func__, len, &src, &dst);
+
+	return nbpf_prep_sg(chan, &src_sg, &dst_sg, 1,
+			    DMA_MEM_TO_MEM, flags);
+}
+
+static struct dma_async_tx_descriptor *nbpf_prep_slave_sg(
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct scatterlist slave_sg;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	sg_init_table(&slave_sg, 1);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		sg_dma_address(&slave_sg) = chan->slave_dst_addr;
+		return nbpf_prep_sg(chan, sgl, &slave_sg, sg_len,
+				    direction, flags);
+
+	case DMA_DEV_TO_MEM:
+		sg_dma_address(&slave_sg) = chan->slave_src_addr;
+		return nbpf_prep_sg(chan, &slave_sg, sgl, sg_len,
+				    direction, flags);
+
+	default:
+		return NULL;
+	}
+}
+
+static int nbpf_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	int ret;
+
+	INIT_LIST_HEAD(&chan->free);
+	INIT_LIST_HEAD(&chan->free_links);
+	INIT_LIST_HEAD(&chan->queued);
+	INIT_LIST_HEAD(&chan->active);
+	INIT_LIST_HEAD(&chan->done);
+
+	ret = nbpf_desc_page_alloc(chan);
+	if (ret < 0)
+		return ret;
+
+	dev_dbg(dchan->device->dev, "Entry %s(): terminal %u\n", __func__,
+		chan->terminal);
+
+	nbpf_chan_configure(chan);
+
+	return ret;
+}
+
+static void nbpf_free_chan_resources(struct dma_chan *dchan)
+{
+	struct nbpf_channel *chan = nbpf_to_chan(dchan);
+	struct nbpf_desc_page *dpage, *tmp;
+
+	dev_dbg(dchan->device->dev, "Entry %s()\n", __func__);
+
+	nbpf_chan_halt(chan);
+	nbpf_chan_idle(chan);
+	/* Clean up for if a channel is re-used for MEMCPY after slave DMA */
+	nbpf_chan_prepare_default(chan);
+
+	list_for_each_entry_safe(dpage, tmp, &chan->desc_page, node) {
+		struct nbpf_link_desc *ldesc;
+		int i;
+		list_del(&dpage->node);
+		for (i = 0, ldesc = dpage->ldesc;
+		     i < ARRAY_SIZE(dpage->ldesc);
+		     i++, ldesc++)
+			dma_unmap_single(dchan->device->dev, ldesc->hwdesc_dma_addr,
+					 sizeof(*ldesc->hwdesc), DMA_TO_DEVICE);
+		free_page((unsigned long)dpage);
+	}
+}
+
+static struct dma_chan *nbpf_of_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct nbpf_device *nbpf = ofdma->of_dma_data;
+	struct dma_chan *dchan;
+	struct nbpf_channel *chan;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	dchan = dma_get_any_slave_channel(&nbpf->dma_dev);
+	if (!dchan)
+		return NULL;
+
+	dev_dbg(dchan->device->dev, "Entry %s(%s)\n", __func__,
+		dma_spec->np->name);
+
+	chan = nbpf_to_chan(dchan);
+
+	chan->terminal = dma_spec->args[0];
+	chan->flags = dma_spec->args[1];
+
+	nbpf_chan_prepare(chan);
+	nbpf_chan_configure(chan);
+
+	return dchan;
+}
+
+static void nbpf_chan_tasklet(unsigned long data)
+{
+	struct nbpf_channel *chan = (struct nbpf_channel *)data;
+	struct nbpf_desc *desc, *tmp;
+	struct dmaengine_desc_callback cb;
+
+	while (!list_empty(&chan->done)) {
+		bool found = false, must_put, recycling = false;
+
+		spin_lock_irq(&chan->lock);
+
+		list_for_each_entry_safe(desc, tmp, &chan->done, node) {
+			if (!desc->user_wait) {
+				/* Newly completed descriptor, have to process */
+				found = true;
+				break;
+			} else if (async_tx_test_ack(&desc->async_tx)) {
+				/*
+				 * This descriptor was waiting for a user ACK,
+				 * it can be recycled now.
+				 */
+				list_del(&desc->node);
+				spin_unlock_irq(&chan->lock);
+				nbpf_desc_put(desc);
+				recycling = true;
+				break;
+			}
+		}
+
+		if (recycling)
+			continue;
+
+		if (!found) {
+			/* This can happen if TERMINATE_ALL has been called */
+			spin_unlock_irq(&chan->lock);
+			break;
+		}
+
+		dma_cookie_complete(&desc->async_tx);
+
+		/*
+		 * With released lock we cannot dereference desc, maybe it's
+		 * still on the "done" list
+		 */
+		if (async_tx_test_ack(&desc->async_tx)) {
+			list_del(&desc->node);
+			must_put = true;
+		} else {
+			desc->user_wait = true;
+			must_put = false;
+		}
+
+		dmaengine_desc_get_callback(&desc->async_tx, &cb);
+
+		/* ack and callback completed descriptor */
+		spin_unlock_irq(&chan->lock);
+
+		dmaengine_desc_callback_invoke(&cb, NULL);
+
+		if (must_put)
+			nbpf_desc_put(desc);
+	}
+}
+
+static irqreturn_t nbpf_chan_irq(int irq, void *dev)
+{
+	struct nbpf_channel *chan = dev;
+	bool done = nbpf_status_get(chan);
+	struct nbpf_desc *desc;
+	irqreturn_t ret;
+	bool bh = false;
+
+	if (!done)
+		return IRQ_NONE;
+
+	nbpf_status_ack(chan);
+
+	dev_dbg(&chan->dma_chan.dev->device, "%s()\n", __func__);
+
+	spin_lock(&chan->lock);
+	desc = chan->running;
+	if (WARN_ON(!desc)) {
+		ret = IRQ_NONE;
+		goto unlock;
+	} else {
+		ret = IRQ_HANDLED;
+		bh = true;
+	}
+
+	list_move_tail(&desc->node, &chan->done);
+	chan->running = NULL;
+
+	if (!list_empty(&chan->active)) {
+		desc = list_first_entry(&chan->active,
+					struct nbpf_desc, node);
+		if (!nbpf_start(desc))
+			chan->running = desc;
+	}
+
+unlock:
+	spin_unlock(&chan->lock);
+
+	if (bh)
+		tasklet_schedule(&chan->tasklet);
+
+	return ret;
+}
+
+static irqreturn_t nbpf_err_irq(int irq, void *dev)
+{
+	struct nbpf_device *nbpf = dev;
+	u32 error = nbpf_error_get(nbpf);
+
+	dev_warn(nbpf->dma_dev.dev, "DMA error IRQ %u\n", irq);
+
+	if (!error)
+		return IRQ_NONE;
+
+	do {
+		struct nbpf_channel *chan = nbpf_error_get_channel(nbpf, error);
+		/* On error: abort all queued transfers, no callback */
+		nbpf_error_clear(chan);
+		nbpf_chan_idle(chan);
+		error = nbpf_error_get(nbpf);
+	} while (error);
+
+	return IRQ_HANDLED;
+}
+
+static int nbpf_chan_probe(struct nbpf_device *nbpf, int n)
+{
+	struct dma_device *dma_dev = &nbpf->dma_dev;
+	struct nbpf_channel *chan = nbpf->chan + n;
+	int ret;
+
+	chan->nbpf = nbpf;
+	chan->base = nbpf->base + NBPF_REG_CHAN_OFFSET + NBPF_REG_CHAN_SIZE * n;
+	INIT_LIST_HEAD(&chan->desc_page);
+	spin_lock_init(&chan->lock);
+	chan->dma_chan.device = dma_dev;
+	dma_cookie_init(&chan->dma_chan);
+	nbpf_chan_prepare_default(chan);
+
+	dev_dbg(dma_dev->dev, "%s(): channel %d: -> %p\n", __func__, n, chan->base);
+
+	snprintf(chan->name, sizeof(chan->name), "nbpf %d", n);
+
+	tasklet_init(&chan->tasklet, nbpf_chan_tasklet, (unsigned long)chan);
+	ret = devm_request_irq(dma_dev->dev, chan->irq,
+			nbpf_chan_irq, IRQF_SHARED,
+			chan->name, chan);
+	if (ret < 0)
+		return ret;
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&chan->dma_chan.device_node,
+		      &dma_dev->channels);
+
+	return 0;
+}
+
+static const struct of_device_id nbpf_match[] = {
+	{.compatible = "renesas,nbpfaxi64dmac1b4",	.data = &nbpf_cfg[NBPF1B4]},
+	{.compatible = "renesas,nbpfaxi64dmac1b8",	.data = &nbpf_cfg[NBPF1B8]},
+	{.compatible = "renesas,nbpfaxi64dmac1b16",	.data = &nbpf_cfg[NBPF1B16]},
+	{.compatible = "renesas,nbpfaxi64dmac4b4",	.data = &nbpf_cfg[NBPF4B4]},
+	{.compatible = "renesas,nbpfaxi64dmac4b8",	.data = &nbpf_cfg[NBPF4B8]},
+	{.compatible = "renesas,nbpfaxi64dmac4b16",	.data = &nbpf_cfg[NBPF4B16]},
+	{.compatible = "renesas,nbpfaxi64dmac8b4",	.data = &nbpf_cfg[NBPF8B4]},
+	{.compatible = "renesas,nbpfaxi64dmac8b8",	.data = &nbpf_cfg[NBPF8B8]},
+	{.compatible = "renesas,nbpfaxi64dmac8b16",	.data = &nbpf_cfg[NBPF8B16]},
+	{}
+};
+MODULE_DEVICE_TABLE(of, nbpf_match);
+
+static int nbpf_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct nbpf_device *nbpf;
+	struct dma_device *dma_dev;
+	struct resource *iomem, *irq_res;
+	const struct nbpf_config *cfg;
+	int num_channels;
+	int ret, irq, eirq, i;
+	int irqbuf[9] /* maximum 8 channels + error IRQ */;
+	unsigned int irqs = 0;
+
+	BUILD_BUG_ON(sizeof(struct nbpf_desc_page) > PAGE_SIZE);
+
+	/* DT only */
+	if (!np)
+		return -ENODEV;
+
+	cfg = of_device_get_match_data(dev);
+	num_channels = cfg->num_channels;
+
+	nbpf = devm_kzalloc(dev, struct_size(nbpf, chan, num_channels),
+			    GFP_KERNEL);
+	if (!nbpf)
+		return -ENOMEM;
+
+	dma_dev = &nbpf->dma_dev;
+	dma_dev->dev = dev;
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	nbpf->base = devm_ioremap_resource(dev, iomem);
+	if (IS_ERR(nbpf->base))
+		return PTR_ERR(nbpf->base);
+
+	nbpf->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(nbpf->clk))
+		return PTR_ERR(nbpf->clk);
+
+	of_property_read_u32(np, "max-burst-mem-read",
+			     &nbpf->max_burst_mem_read);
+	of_property_read_u32(np, "max-burst-mem-write",
+			     &nbpf->max_burst_mem_write);
+
+	nbpf->config = cfg;
+
+	for (i = 0; irqs < ARRAY_SIZE(irqbuf); i++) {
+		irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!irq_res)
+			break;
+
+		for (irq = irq_res->start; irq <= irq_res->end;
+		     irq++, irqs++)
+			irqbuf[irqs] = irq;
+	}
+
+	/*
+	 * 3 IRQ resource schemes are supported:
+	 * 1. 1 shared IRQ for error and all channels
+	 * 2. 2 IRQs: one for error and one shared for all channels
+	 * 3. 1 IRQ for error and an own IRQ for each channel
+	 */
+	if (irqs != 1 && irqs != 2 && irqs != num_channels + 1)
+		return -ENXIO;
+
+	if (irqs == 1) {
+		eirq = irqbuf[0];
+
+		for (i = 0; i <= num_channels; i++)
+			nbpf->chan[i].irq = irqbuf[0];
+	} else {
+		eirq = platform_get_irq_byname(pdev, "error");
+		if (eirq < 0)
+			return eirq;
+
+		if (irqs == num_channels + 1) {
+			struct nbpf_channel *chan;
+
+			for (i = 0, chan = nbpf->chan; i <= num_channels;
+			     i++, chan++) {
+				/* Skip the error IRQ */
+				if (irqbuf[i] == eirq)
+					i++;
+				chan->irq = irqbuf[i];
+			}
+
+			if (chan != nbpf->chan + num_channels)
+				return -EINVAL;
+		} else {
+			/* 2 IRQs and more than one channel */
+			if (irqbuf[0] == eirq)
+				irq = irqbuf[1];
+			else
+				irq = irqbuf[0];
+
+			for (i = 0; i <= num_channels; i++)
+				nbpf->chan[i].irq = irq;
+		}
+	}
+
+	ret = devm_request_irq(dev, eirq, nbpf_err_irq,
+			       IRQF_SHARED, "dma error", nbpf);
+	if (ret < 0)
+		return ret;
+	nbpf->eirq = eirq;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Create DMA Channel */
+	for (i = 0; i < num_channels; i++) {
+		ret = nbpf_chan_probe(nbpf, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+	/* Common and MEMCPY operations */
+	dma_dev->device_alloc_chan_resources
+		= nbpf_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = nbpf_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = nbpf_prep_memcpy;
+	dma_dev->device_tx_status = nbpf_tx_status;
+	dma_dev->device_issue_pending = nbpf_issue_pending;
+
+	/*
+	 * If we drop support for unaligned MEMCPY buffer addresses and / or
+	 * lengths by setting
+	 * dma_dev->copy_align = 4;
+	 * then we can set transfer length to 4 bytes in nbpf_prep_one() for
+	 * DMA_MEM_TO_MEM
+	 */
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma_dev->device_prep_slave_sg = nbpf_prep_slave_sg;
+	dma_dev->device_config = nbpf_config;
+	dma_dev->device_pause = nbpf_pause;
+	dma_dev->device_terminate_all = nbpf_terminate_all;
+
+	dma_dev->src_addr_widths = NBPF_DMA_BUSWIDTHS;
+	dma_dev->dst_addr_widths = NBPF_DMA_BUSWIDTHS;
+	dma_dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+	platform_set_drvdata(pdev, nbpf);
+
+	ret = clk_prepare_enable(nbpf->clk);
+	if (ret < 0)
+		return ret;
+
+	nbpf_configure(nbpf);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret < 0)
+		goto e_clk_off;
+
+	ret = of_dma_controller_register(np, nbpf_of_xlate, nbpf);
+	if (ret < 0)
+		goto e_dma_dev_unreg;
+
+	return 0;
+
+e_dma_dev_unreg:
+	dma_async_device_unregister(dma_dev);
+e_clk_off:
+	clk_disable_unprepare(nbpf->clk);
+
+	return ret;
+}
+
+static int nbpf_remove(struct platform_device *pdev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(pdev);
+	int i;
+
+	devm_free_irq(&pdev->dev, nbpf->eirq, nbpf);
+
+	for (i = 0; i < nbpf->config->num_channels; i++) {
+		struct nbpf_channel *chan = nbpf->chan + i;
+
+		devm_free_irq(&pdev->dev, chan->irq, chan);
+
+		tasklet_kill(&chan->tasklet);
+	}
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&nbpf->dma_dev);
+	clk_disable_unprepare(nbpf->clk);
+
+	return 0;
+}
+
+static const struct platform_device_id nbpf_ids[] = {
+	{"nbpfaxi64dmac1b4",	(kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
+	{"nbpfaxi64dmac1b8",	(kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
+	{"nbpfaxi64dmac1b16",	(kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
+	{"nbpfaxi64dmac4b4",	(kernel_ulong_t)&nbpf_cfg[NBPF4B4]},
+	{"nbpfaxi64dmac4b8",	(kernel_ulong_t)&nbpf_cfg[NBPF4B8]},
+	{"nbpfaxi64dmac4b16",	(kernel_ulong_t)&nbpf_cfg[NBPF4B16]},
+	{"nbpfaxi64dmac8b4",	(kernel_ulong_t)&nbpf_cfg[NBPF8B4]},
+	{"nbpfaxi64dmac8b8",	(kernel_ulong_t)&nbpf_cfg[NBPF8B8]},
+	{"nbpfaxi64dmac8b16",	(kernel_ulong_t)&nbpf_cfg[NBPF8B16]},
+	{},
+};
+MODULE_DEVICE_TABLE(platform, nbpf_ids);
+
+#ifdef CONFIG_PM
+static int nbpf_runtime_suspend(struct device *dev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev));
+	clk_disable_unprepare(nbpf->clk);
+	return 0;
+}
+
+static int nbpf_runtime_resume(struct device *dev)
+{
+	struct nbpf_device *nbpf = platform_get_drvdata(to_platform_device(dev));
+	return clk_prepare_enable(nbpf->clk);
+}
+#endif
+
+static const struct dev_pm_ops nbpf_pm_ops = {
+	SET_RUNTIME_PM_OPS(nbpf_runtime_suspend, nbpf_runtime_resume, NULL)
+};
+
+static struct platform_driver nbpf_driver = {
+	.driver = {
+		.name = "dma-nbpf",
+		.of_match_table = nbpf_match,
+		.pm = &nbpf_pm_ops,
+	},
+	.id_table = nbpf_ids,
+	.probe = nbpf_probe,
+	.remove = nbpf_remove,
+};
+
+module_platform_driver(nbpf_driver);
+
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
+MODULE_DESCRIPTION("dmaengine driver for NBPFAXI64* DMACs");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
new file mode 100644
index 0000000..91fd395
--- /dev/null
+++ b/drivers/dma/of-dma.c
@@ -0,0 +1,357 @@
+/*
+ * Device tree helpers for DMA request / controller
+ *
+ * Based on of_gpio.c
+ *
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+
+static LIST_HEAD(of_dma_list);
+static DEFINE_MUTEX(of_dma_lock);
+
+/**
+ * of_dma_find_controller - Get a DMA controller in DT DMA helpers list
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ *
+ * Finds a DMA controller with matching device node and number for dma cells
+ * in a list of registered DMA controllers. If a match is found a valid pointer
+ * to the DMA data stored is retuned. A NULL pointer is returned if no match is
+ * found.
+ */
+static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
+{
+	struct of_dma *ofdma;
+
+	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+		if (ofdma->of_node == dma_spec->np)
+			return ofdma;
+
+	pr_debug("%s: can't find DMA controller %pOF\n", __func__,
+		 dma_spec->np);
+
+	return NULL;
+}
+
+/**
+ * of_dma_router_xlate - translation function for router devices
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data (router information)
+ *
+ * The function creates new dma_spec to be passed to the router driver's
+ * of_dma_route_allocate() function to prepare a dma_spec which will be used
+ * to request channel from the real DMA controller.
+ */
+static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct dma_chan		*chan;
+	struct of_dma		*ofdma_target;
+	struct of_phandle_args	dma_spec_target;
+	void			*route_data;
+
+	/* translate the request for the real DMA controller */
+	memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target));
+	route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma);
+	if (IS_ERR(route_data))
+		return NULL;
+
+	ofdma_target = of_dma_find_controller(&dma_spec_target);
+	if (!ofdma_target)
+		return NULL;
+
+	chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
+	if (chan) {
+		chan->router = ofdma->dma_router;
+		chan->route_data = route_data;
+	} else {
+		ofdma->dma_router->route_free(ofdma->dma_router->dev,
+					      route_data);
+	}
+
+	/*
+	 * Need to put the node back since the ofdma->of_dma_route_allocate
+	 * has taken it for generating the new, translated dma_spec
+	 */
+	of_node_put(dma_spec_target.np);
+	return chan;
+}
+
+/**
+ * of_dma_controller_register - Register a DMA controller to DT DMA helpers
+ * @np:			device node of DMA controller
+ * @of_dma_xlate:	translation function which converts a phandle
+ *			arguments list into a dma_chan structure
+ * @data		pointer to controller specific data to be used by
+ *			translation function
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_controller_register(struct device_node *np,
+				struct dma_chan *(*of_dma_xlate)
+				(struct of_phandle_args *, struct of_dma *),
+				void *data)
+{
+	struct of_dma	*ofdma;
+
+	if (!np || !of_dma_xlate) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	ofdma->of_node = np;
+	ofdma->of_dma_xlate = of_dma_xlate;
+	ofdma->of_dma_data = data;
+
+	/* Now queue of_dma controller structure in list */
+	mutex_lock(&of_dma_lock);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+	mutex_unlock(&of_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_register);
+
+/**
+ * of_dma_controller_free - Remove a DMA controller from DT DMA helpers list
+ * @np:		device node of DMA controller
+ *
+ * Memory allocated by of_dma_controller_register() is freed here.
+ */
+void of_dma_controller_free(struct device_node *np)
+{
+	struct of_dma *ofdma;
+
+	mutex_lock(&of_dma_lock);
+
+	list_for_each_entry(ofdma, &of_dma_list, of_dma_controllers)
+		if (ofdma->of_node == np) {
+			list_del(&ofdma->of_dma_controllers);
+			kfree(ofdma);
+			break;
+		}
+
+	mutex_unlock(&of_dma_lock);
+}
+EXPORT_SYMBOL_GPL(of_dma_controller_free);
+
+/**
+ * of_dma_router_register - Register a DMA router to DT DMA helpers as a
+ *			    controller
+ * @np:				device node of DMA router
+ * @of_dma_route_allocate:	setup function for the router which need to
+ *				modify the dma_spec for the DMA controller to
+ *				use and to set up the requested route.
+ * @dma_router:			pointer to dma_router structure to be used when
+ *				the route need to be free up.
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_router_register(struct device_node *np,
+			   void *(*of_dma_route_allocate)
+			   (struct of_phandle_args *, struct of_dma *),
+			   struct dma_router *dma_router)
+{
+	struct of_dma	*ofdma;
+
+	if (!np || !of_dma_route_allocate || !dma_router) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	ofdma->of_node = np;
+	ofdma->of_dma_xlate = of_dma_router_xlate;
+	ofdma->of_dma_route_allocate = of_dma_route_allocate;
+	ofdma->dma_router = dma_router;
+
+	/* Now queue of_dma controller structure in list */
+	mutex_lock(&of_dma_lock);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+	mutex_unlock(&of_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_router_register);
+
+/**
+ * of_dma_match_channel - Check if a DMA specifier matches name
+ * @np:		device node to look for DMA channels
+ * @name:	channel name to be matched
+ * @index:	index of DMA specifier in list of DMA specifiers
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ *
+ * Check if the DMA specifier pointed to by the index in a list of DMA
+ * specifiers, matches the name provided. Returns 0 if the name matches and
+ * a valid pointer to the DMA specifier is found. Otherwise returns -ENODEV.
+ */
+static int of_dma_match_channel(struct device_node *np, const char *name,
+				int index, struct of_phandle_args *dma_spec)
+{
+	const char *s;
+
+	if (of_property_read_string_index(np, "dma-names", index, &s))
+		return -ENODEV;
+
+	if (strcmp(name, s))
+		return -ENODEV;
+
+	if (of_parse_phandle_with_args(np, "dmas", "#dma-cells", index,
+				       dma_spec))
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
+ * of_dma_request_slave_channel - Get the DMA slave channel
+ * @np:		device node to get DMA request from
+ * @name:	name of desired channel
+ *
+ * Returns pointer to appropriate DMA channel on success or an error pointer.
+ */
+struct dma_chan *of_dma_request_slave_channel(struct device_node *np,
+					      const char *name)
+{
+	struct of_phandle_args	dma_spec;
+	struct of_dma		*ofdma;
+	struct dma_chan		*chan;
+	int			count, i, start;
+	int			ret_no_channel = -ENODEV;
+	static atomic_t		last_index;
+
+	if (!np || !name) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	/* Silently fail if there is not even the "dmas" property */
+	if (!of_find_property(np, "dmas", NULL))
+		return ERR_PTR(-ENODEV);
+
+	count = of_property_count_strings(np, "dma-names");
+	if (count < 0) {
+		pr_err("%s: dma-names property of node '%pOF' missing or empty\n",
+			__func__, np);
+		return ERR_PTR(-ENODEV);
+	}
+
+	/*
+	 * approximate an average distribution across multiple
+	 * entries with the same name
+	 */
+	start = atomic_inc_return(&last_index);
+	for (i = 0; i < count; i++) {
+		if (of_dma_match_channel(np, name,
+					 (i + start) % count,
+					 &dma_spec))
+			continue;
+
+		mutex_lock(&of_dma_lock);
+		ofdma = of_dma_find_controller(&dma_spec);
+
+		if (ofdma) {
+			chan = ofdma->of_dma_xlate(&dma_spec, ofdma);
+		} else {
+			ret_no_channel = -EPROBE_DEFER;
+			chan = NULL;
+		}
+
+		mutex_unlock(&of_dma_lock);
+
+		of_node_put(dma_spec.np);
+
+		if (chan)
+			return chan;
+	}
+
+	return ERR_PTR(ret_no_channel);
+}
+EXPORT_SYMBOL_GPL(of_dma_request_slave_channel);
+
+/**
+ * of_dma_simple_xlate - Simple DMA engine translation function
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * A simple translation function for devices that use a 32-bit value for the
+ * filter_param when calling the DMA engine dma_request_channel() function.
+ * Note that this translation function requires that #dma-cells is equal to 1
+ * and the argument of the dma specifier is the 32-bit filter_param. Returns
+ * pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	int count = dma_spec->args_count;
+	struct of_dma_filter_info *info = ofdma->of_dma_data;
+
+	if (!info || !info->filter_fn)
+		return NULL;
+
+	if (count != 1)
+		return NULL;
+
+	return dma_request_channel(info->dma_cap, info->filter_fn,
+			&dma_spec->args[0]);
+}
+EXPORT_SYMBOL_GPL(of_dma_simple_xlate);
+
+/**
+ * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data
+ *
+ * This function can be used as the of xlate callback for DMA driver which wants
+ * to match the channel based on the channel id. When using this xlate function
+ * the #dma-cells propety of the DMA controller dt node needs to be set to 1.
+ * The data parameter of of_dma_controller_register must be a pointer to the
+ * dma_device struct the function should match upon.
+ *
+ * Returns pointer to appropriate dma channel on success or NULL on error.
+ */
+struct dma_chan *of_dma_xlate_by_chan_id(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct dma_device *dev = ofdma->of_dma_data;
+	struct dma_chan *chan, *candidate = NULL;
+
+	if (!dev || dma_spec->args_count != 1)
+		return NULL;
+
+	list_for_each_entry(chan, &dev->channels, device_node)
+		if (chan->chan_id == dma_spec->args[0]) {
+			candidate = chan;
+			break;
+		}
+
+	if (!candidate)
+		return NULL;
+
+	return dma_get_slave_channel(candidate);
+}
+EXPORT_SYMBOL_GPL(of_dma_xlate_by_chan_id);
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
new file mode 100644
index 0000000..7812a63
--- /dev/null
+++ b/drivers/dma/owl-dma.c
@@ -0,0 +1,971 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Actions Semi Owl SoCs DMA driver
+//
+// Copyright (c) 2014 Actions Semi Inc.
+// Author: David Liu <liuwei@actions-semi.com>
+//
+// Copyright (c) 2018 Linaro Ltd.
+// Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include "virt-dma.h"
+
+#define OWL_DMA_FRAME_MAX_LENGTH		0xfffff
+
+/* Global DMA Controller Registers */
+#define OWL_DMA_IRQ_PD0				0x00
+#define OWL_DMA_IRQ_PD1				0x04
+#define OWL_DMA_IRQ_PD2				0x08
+#define OWL_DMA_IRQ_PD3				0x0C
+#define OWL_DMA_IRQ_EN0				0x10
+#define OWL_DMA_IRQ_EN1				0x14
+#define OWL_DMA_IRQ_EN2				0x18
+#define OWL_DMA_IRQ_EN3				0x1C
+#define OWL_DMA_SECURE_ACCESS_CTL		0x20
+#define OWL_DMA_NIC_QOS				0x24
+#define OWL_DMA_DBGSEL				0x28
+#define OWL_DMA_IDLE_STAT			0x2C
+
+/* Channel Registers */
+#define OWL_DMA_CHAN_BASE(i)			(0x100 + (i) * 0x100)
+#define OWL_DMAX_MODE				0x00
+#define OWL_DMAX_SOURCE				0x04
+#define OWL_DMAX_DESTINATION			0x08
+#define OWL_DMAX_FRAME_LEN			0x0C
+#define OWL_DMAX_FRAME_CNT			0x10
+#define OWL_DMAX_REMAIN_FRAME_CNT		0x14
+#define OWL_DMAX_REMAIN_CNT			0x18
+#define OWL_DMAX_SOURCE_STRIDE			0x1C
+#define OWL_DMAX_DESTINATION_STRIDE		0x20
+#define OWL_DMAX_START				0x24
+#define OWL_DMAX_PAUSE				0x28
+#define OWL_DMAX_CHAINED_CTL			0x2C
+#define OWL_DMAX_CONSTANT			0x30
+#define OWL_DMAX_LINKLIST_CTL			0x34
+#define OWL_DMAX_NEXT_DESCRIPTOR		0x38
+#define OWL_DMAX_CURRENT_DESCRIPTOR_NUM		0x3C
+#define OWL_DMAX_INT_CTL			0x40
+#define OWL_DMAX_INT_STATUS			0x44
+#define OWL_DMAX_CURRENT_SOURCE_POINTER		0x48
+#define OWL_DMAX_CURRENT_DESTINATION_POINTER	0x4C
+
+/* OWL_DMAX_MODE Bits */
+#define OWL_DMA_MODE_TS(x)			(((x) & GENMASK(5, 0)) << 0)
+#define OWL_DMA_MODE_ST(x)			(((x) & GENMASK(1, 0)) << 8)
+#define	OWL_DMA_MODE_ST_DEV			OWL_DMA_MODE_ST(0)
+#define	OWL_DMA_MODE_ST_DCU			OWL_DMA_MODE_ST(2)
+#define	OWL_DMA_MODE_ST_SRAM			OWL_DMA_MODE_ST(3)
+#define OWL_DMA_MODE_DT(x)			(((x) & GENMASK(1, 0)) << 10)
+#define	OWL_DMA_MODE_DT_DEV			OWL_DMA_MODE_DT(0)
+#define	OWL_DMA_MODE_DT_DCU			OWL_DMA_MODE_DT(2)
+#define	OWL_DMA_MODE_DT_SRAM			OWL_DMA_MODE_DT(3)
+#define OWL_DMA_MODE_SAM(x)			(((x) & GENMASK(1, 0)) << 16)
+#define	OWL_DMA_MODE_SAM_CONST			OWL_DMA_MODE_SAM(0)
+#define	OWL_DMA_MODE_SAM_INC			OWL_DMA_MODE_SAM(1)
+#define	OWL_DMA_MODE_SAM_STRIDE			OWL_DMA_MODE_SAM(2)
+#define OWL_DMA_MODE_DAM(x)			(((x) & GENMASK(1, 0)) << 18)
+#define	OWL_DMA_MODE_DAM_CONST			OWL_DMA_MODE_DAM(0)
+#define	OWL_DMA_MODE_DAM_INC			OWL_DMA_MODE_DAM(1)
+#define	OWL_DMA_MODE_DAM_STRIDE			OWL_DMA_MODE_DAM(2)
+#define OWL_DMA_MODE_PW(x)			(((x) & GENMASK(2, 0)) << 20)
+#define OWL_DMA_MODE_CB				BIT(23)
+#define OWL_DMA_MODE_NDDBW(x)			(((x) & 0x1) << 28)
+#define	OWL_DMA_MODE_NDDBW_32BIT		OWL_DMA_MODE_NDDBW(0)
+#define	OWL_DMA_MODE_NDDBW_8BIT			OWL_DMA_MODE_NDDBW(1)
+#define OWL_DMA_MODE_CFE			BIT(29)
+#define OWL_DMA_MODE_LME			BIT(30)
+#define OWL_DMA_MODE_CME			BIT(31)
+
+/* OWL_DMAX_LINKLIST_CTL Bits */
+#define OWL_DMA_LLC_SAV(x)			(((x) & GENMASK(1, 0)) << 8)
+#define	OWL_DMA_LLC_SAV_INC			OWL_DMA_LLC_SAV(0)
+#define	OWL_DMA_LLC_SAV_LOAD_NEXT		OWL_DMA_LLC_SAV(1)
+#define	OWL_DMA_LLC_SAV_LOAD_PREV		OWL_DMA_LLC_SAV(2)
+#define OWL_DMA_LLC_DAV(x)			(((x) & GENMASK(1, 0)) << 10)
+#define	OWL_DMA_LLC_DAV_INC			OWL_DMA_LLC_DAV(0)
+#define	OWL_DMA_LLC_DAV_LOAD_NEXT		OWL_DMA_LLC_DAV(1)
+#define	OWL_DMA_LLC_DAV_LOAD_PREV		OWL_DMA_LLC_DAV(2)
+#define OWL_DMA_LLC_SUSPEND			BIT(16)
+
+/* OWL_DMAX_INT_CTL Bits */
+#define OWL_DMA_INTCTL_BLOCK			BIT(0)
+#define OWL_DMA_INTCTL_SUPER_BLOCK		BIT(1)
+#define OWL_DMA_INTCTL_FRAME			BIT(2)
+#define OWL_DMA_INTCTL_HALF_FRAME		BIT(3)
+#define OWL_DMA_INTCTL_LAST_FRAME		BIT(4)
+
+/* OWL_DMAX_INT_STATUS Bits */
+#define OWL_DMA_INTSTAT_BLOCK			BIT(0)
+#define OWL_DMA_INTSTAT_SUPER_BLOCK		BIT(1)
+#define OWL_DMA_INTSTAT_FRAME			BIT(2)
+#define OWL_DMA_INTSTAT_HALF_FRAME		BIT(3)
+#define OWL_DMA_INTSTAT_LAST_FRAME		BIT(4)
+
+/* Pack shift and newshift in a single word */
+#define BIT_FIELD(val, width, shift, newshift)	\
+		((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift))
+
+/**
+ * struct owl_dma_lli_hw - Hardware link list for dma transfer
+ * @next_lli: physical address of the next link list
+ * @saddr: source physical address
+ * @daddr: destination physical address
+ * @flen: frame length
+ * @fcnt: frame count
+ * @src_stride: source stride
+ * @dst_stride: destination stride
+ * @ctrla: dma_mode and linklist ctrl config
+ * @ctrlb: interrupt config
+ * @const_num: data for constant fill
+ */
+struct owl_dma_lli_hw {
+	u32	next_lli;
+	u32	saddr;
+	u32	daddr;
+	u32	flen:20;
+	u32	fcnt:12;
+	u32	src_stride;
+	u32	dst_stride;
+	u32	ctrla;
+	u32	ctrlb;
+	u32	const_num;
+};
+
+/**
+ * struct owl_dma_lli - Link list for dma transfer
+ * @hw: hardware link list
+ * @phys: physical address of hardware link list
+ * @node: node for txd's lli_list
+ */
+struct owl_dma_lli {
+	struct  owl_dma_lli_hw	hw;
+	dma_addr_t		phys;
+	struct list_head	node;
+};
+
+/**
+ * struct owl_dma_txd - Wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @lli_list: link list of lli nodes
+ */
+struct owl_dma_txd {
+	struct virt_dma_desc	vd;
+	struct list_head	lli_list;
+};
+
+/**
+ * struct owl_dma_pchan - Holder for the physical channels
+ * @id: physical index to this channel
+ * @base: virtual memory base for the dma channel
+ * @vchan: the virtual channel currently being served by this physical channel
+ * @lock: a lock to use when altering an instance of this struct
+ */
+struct owl_dma_pchan {
+	u32			id;
+	void __iomem		*base;
+	struct owl_dma_vchan	*vchan;
+	spinlock_t		lock;
+};
+
+/**
+ * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * @vc: wrappped virtual channel
+ * @pchan: the physical channel utilized by this channel
+ * @txd: active transaction on this channel
+ */
+struct owl_dma_vchan {
+	struct virt_dma_chan	vc;
+	struct owl_dma_pchan	*pchan;
+	struct owl_dma_txd	*txd;
+};
+
+/**
+ * struct owl_dma - Holder for the Owl DMA controller
+ * @dma: dma engine for this instance
+ * @base: virtual memory base for the DMA controller
+ * @clk: clock for the DMA controller
+ * @lock: a lock to use when change DMA controller global register
+ * @lli_pool: a pool for the LLI descriptors
+ * @nr_pchans: the number of physical channels
+ * @pchans: array of data for the physical channels
+ * @nr_vchans: the number of physical channels
+ * @vchans: array of data for the physical channels
+ */
+struct owl_dma {
+	struct dma_device	dma;
+	void __iomem		*base;
+	struct clk		*clk;
+	spinlock_t		lock;
+	struct dma_pool		*lli_pool;
+	int			irq;
+
+	unsigned int		nr_pchans;
+	struct owl_dma_pchan	*pchans;
+
+	unsigned int		nr_vchans;
+	struct owl_dma_vchan	*vchans;
+};
+
+static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
+			 u32 val, bool state)
+{
+	u32 regval;
+
+	regval = readl(pchan->base + reg);
+
+	if (state)
+		regval |= val;
+	else
+		regval &= ~val;
+
+	writel(val, pchan->base + reg);
+}
+
+static void pchan_writel(struct owl_dma_pchan *pchan, u32 reg, u32 data)
+{
+	writel(data, pchan->base + reg);
+}
+
+static u32 pchan_readl(struct owl_dma_pchan *pchan, u32 reg)
+{
+	return readl(pchan->base + reg);
+}
+
+static void dma_update(struct owl_dma *od, u32 reg, u32 val, bool state)
+{
+	u32 regval;
+
+	regval = readl(od->base + reg);
+
+	if (state)
+		regval |= val;
+	else
+		regval &= ~val;
+
+	writel(val, od->base + reg);
+}
+
+static void dma_writel(struct owl_dma *od, u32 reg, u32 data)
+{
+	writel(data, od->base + reg);
+}
+
+static u32 dma_readl(struct owl_dma *od, u32 reg)
+{
+	return readl(od->base + reg);
+}
+
+static inline struct owl_dma *to_owl_dma(struct dma_device *dd)
+{
+	return container_of(dd, struct owl_dma, dma);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct owl_dma_vchan *to_owl_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct owl_dma_vchan, vc.chan);
+}
+
+static inline struct owl_dma_txd *to_owl_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct owl_dma_txd, vd.tx);
+}
+
+static inline u32 llc_hw_ctrla(u32 mode, u32 llc_ctl)
+{
+	u32 ctl;
+
+	ctl = BIT_FIELD(mode, 4, 28, 28) |
+	      BIT_FIELD(mode, 8, 16, 20) |
+	      BIT_FIELD(mode, 4, 8, 16) |
+	      BIT_FIELD(mode, 6, 0, 10) |
+	      BIT_FIELD(llc_ctl, 2, 10, 8) |
+	      BIT_FIELD(llc_ctl, 2, 8, 6);
+
+	return ctl;
+}
+
+static inline u32 llc_hw_ctrlb(u32 int_ctl)
+{
+	u32 ctl;
+
+	ctl = BIT_FIELD(int_ctl, 7, 0, 18);
+
+	return ctl;
+}
+
+static void owl_dma_free_lli(struct owl_dma *od,
+			     struct owl_dma_lli *lli)
+{
+	list_del(&lli->node);
+	dma_pool_free(od->lli_pool, lli, lli->phys);
+}
+
+static struct owl_dma_lli *owl_dma_alloc_lli(struct owl_dma *od)
+{
+	struct owl_dma_lli *lli;
+	dma_addr_t phys;
+
+	lli = dma_pool_alloc(od->lli_pool, GFP_NOWAIT, &phys);
+	if (!lli)
+		return NULL;
+
+	INIT_LIST_HEAD(&lli->node);
+	lli->phys = phys;
+
+	return lli;
+}
+
+static struct owl_dma_lli *owl_dma_add_lli(struct owl_dma_txd *txd,
+					   struct owl_dma_lli *prev,
+					   struct owl_dma_lli *next)
+{
+	list_add_tail(&next->node, &txd->lli_list);
+
+	if (prev) {
+		prev->hw.next_lli = next->phys;
+		prev->hw.ctrla |= llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
+	}
+
+	return next;
+}
+
+static inline int owl_dma_cfg_lli(struct owl_dma_vchan *vchan,
+				  struct owl_dma_lli *lli,
+				  dma_addr_t src, dma_addr_t dst,
+				  u32 len, enum dma_transfer_direction dir)
+{
+	struct owl_dma_lli_hw *hw = &lli->hw;
+	u32 mode;
+
+	mode = OWL_DMA_MODE_PW(0);
+
+	switch (dir) {
+	case DMA_MEM_TO_MEM:
+		mode |= OWL_DMA_MODE_TS(0) | OWL_DMA_MODE_ST_DCU |
+			OWL_DMA_MODE_DT_DCU | OWL_DMA_MODE_SAM_INC |
+			OWL_DMA_MODE_DAM_INC;
+
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	hw->next_lli = 0; /* One link list by default */
+	hw->saddr = src;
+	hw->daddr = dst;
+
+	hw->fcnt = 1; /* Frame count fixed as 1 */
+	hw->flen = len; /* Max frame length is 1MB */
+	hw->src_stride = 0;
+	hw->dst_stride = 0;
+	hw->ctrla = llc_hw_ctrla(mode,
+				 OWL_DMA_LLC_SAV_LOAD_NEXT |
+				 OWL_DMA_LLC_DAV_LOAD_NEXT);
+
+	hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);
+
+	return 0;
+}
+
+static struct owl_dma_pchan *owl_dma_get_pchan(struct owl_dma *od,
+					       struct owl_dma_vchan *vchan)
+{
+	struct owl_dma_pchan *pchan = NULL;
+	unsigned long flags;
+	int i;
+
+	for (i = 0; i < od->nr_pchans; i++) {
+		pchan = &od->pchans[i];
+
+		spin_lock_irqsave(&pchan->lock, flags);
+		if (!pchan->vchan) {
+			pchan->vchan = vchan;
+			spin_unlock_irqrestore(&pchan->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&pchan->lock, flags);
+	}
+
+	return pchan;
+}
+
+static int owl_dma_pchan_busy(struct owl_dma *od, struct owl_dma_pchan *pchan)
+{
+	unsigned int val;
+
+	val = dma_readl(od, OWL_DMA_IDLE_STAT);
+
+	return !(val & (1 << pchan->id));
+}
+
+static void owl_dma_terminate_pchan(struct owl_dma *od,
+				    struct owl_dma_pchan *pchan)
+{
+	unsigned long flags;
+	u32 irq_pd;
+
+	pchan_writel(pchan, OWL_DMAX_START, 0);
+	pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+	spin_lock_irqsave(&od->lock, flags);
+	dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), false);
+
+	irq_pd = dma_readl(od, OWL_DMA_IRQ_PD0);
+	if (irq_pd & (1 << pchan->id)) {
+		dev_warn(od->dma.dev,
+			 "terminating pchan %d that still has pending irq\n",
+			 pchan->id);
+		dma_writel(od, OWL_DMA_IRQ_PD0, (1 << pchan->id));
+	}
+
+	pchan->vchan = NULL;
+
+	spin_unlock_irqrestore(&od->lock, flags);
+}
+
+static int owl_dma_start_next_txd(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&vchan->vc);
+	struct owl_dma_pchan *pchan = vchan->pchan;
+	struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+	struct owl_dma_lli *lli;
+	unsigned long flags;
+	u32 int_ctl;
+
+	list_del(&vd->node);
+
+	vchan->txd = txd;
+
+	/* Wait for channel inactive */
+	while (owl_dma_pchan_busy(od, pchan))
+		cpu_relax();
+
+	lli = list_first_entry(&txd->lli_list,
+			       struct owl_dma_lli, node);
+
+	int_ctl = OWL_DMA_INTCTL_SUPER_BLOCK;
+
+	pchan_writel(pchan, OWL_DMAX_MODE, OWL_DMA_MODE_LME);
+	pchan_writel(pchan, OWL_DMAX_LINKLIST_CTL,
+		     OWL_DMA_LLC_SAV_LOAD_NEXT | OWL_DMA_LLC_DAV_LOAD_NEXT);
+	pchan_writel(pchan, OWL_DMAX_NEXT_DESCRIPTOR, lli->phys);
+	pchan_writel(pchan, OWL_DMAX_INT_CTL, int_ctl);
+
+	/* Clear IRQ status for this pchan */
+	pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+
+	spin_lock_irqsave(&od->lock, flags);
+
+	dma_update(od, OWL_DMA_IRQ_EN0, (1 << pchan->id), true);
+
+	spin_unlock_irqrestore(&od->lock, flags);
+
+	dev_dbg(chan2dev(&vchan->vc.chan), "starting pchan %d\n", pchan->id);
+
+	/* Start DMA transfer for this pchan */
+	pchan_writel(pchan, OWL_DMAX_START, 0x1);
+
+	return 0;
+}
+
+static void owl_dma_phy_free(struct owl_dma *od, struct owl_dma_vchan *vchan)
+{
+	/* Ensure that the physical channel is stopped */
+	owl_dma_terminate_pchan(od, vchan->pchan);
+
+	vchan->pchan = NULL;
+}
+
+static irqreturn_t owl_dma_interrupt(int irq, void *dev_id)
+{
+	struct owl_dma *od = dev_id;
+	struct owl_dma_vchan *vchan;
+	struct owl_dma_pchan *pchan;
+	unsigned long pending;
+	int i;
+	unsigned int global_irq_pending, chan_irq_pending;
+
+	spin_lock(&od->lock);
+
+	pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+	/* Clear IRQ status for each pchan */
+	for_each_set_bit(i, &pending, od->nr_pchans) {
+		pchan = &od->pchans[i];
+		pchan_update(pchan, OWL_DMAX_INT_STATUS, 0xff, false);
+	}
+
+	/* Clear pending IRQ */
+	dma_writel(od, OWL_DMA_IRQ_PD0, pending);
+
+	/* Check missed pending IRQ */
+	for (i = 0; i < od->nr_pchans; i++) {
+		pchan = &od->pchans[i];
+		chan_irq_pending = pchan_readl(pchan, OWL_DMAX_INT_CTL) &
+				   pchan_readl(pchan, OWL_DMAX_INT_STATUS);
+
+		/* Dummy read to ensure OWL_DMA_IRQ_PD0 value is updated */
+		dma_readl(od, OWL_DMA_IRQ_PD0);
+
+		global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0);
+
+		if (chan_irq_pending && !(global_irq_pending & BIT(i)))	{
+			dev_dbg(od->dma.dev,
+				"global and channel IRQ pending match err\n");
+
+			/* Clear IRQ status for this pchan */
+			pchan_update(pchan, OWL_DMAX_INT_STATUS,
+				     0xff, false);
+
+			/* Update global IRQ pending */
+			pending |= BIT(i);
+		}
+	}
+
+	spin_unlock(&od->lock);
+
+	for_each_set_bit(i, &pending, od->nr_pchans) {
+		struct owl_dma_txd *txd;
+
+		pchan = &od->pchans[i];
+
+		vchan = pchan->vchan;
+		if (!vchan) {
+			dev_warn(od->dma.dev, "no vchan attached on pchan %d\n",
+				 pchan->id);
+			continue;
+		}
+
+		spin_lock(&vchan->vc.lock);
+
+		txd = vchan->txd;
+		if (txd) {
+			vchan->txd = NULL;
+
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * Start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&vchan->vc))
+				owl_dma_start_next_txd(vchan);
+			else
+				owl_dma_phy_free(od, vchan);
+		}
+
+		spin_unlock(&vchan->vc.lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void owl_dma_free_txd(struct owl_dma *od, struct owl_dma_txd *txd)
+{
+	struct owl_dma_lli *lli, *_lli;
+
+	if (unlikely(!txd))
+		return;
+
+	list_for_each_entry_safe(lli, _lli, &txd->lli_list, node)
+		owl_dma_free_lli(od, lli);
+
+	kfree(txd);
+}
+
+static void owl_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct owl_dma *od = to_owl_dma(vd->tx.chan->device);
+	struct owl_dma_txd *txd = to_owl_txd(&vd->tx);
+
+	owl_dma_free_txd(od, txd);
+}
+
+static int owl_dma_terminate_all(struct dma_chan *chan)
+{
+	struct owl_dma *od = to_owl_dma(chan->device);
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (vchan->pchan)
+		owl_dma_phy_free(od, vchan);
+
+	if (vchan->txd) {
+		owl_dma_desc_free(&vchan->txd->vd);
+		vchan->txd = NULL;
+	}
+
+	vchan_get_all_descriptors(&vchan->vc, &head);
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return 0;
+}
+
+static u32 owl_dma_getbytes_chan(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma_pchan *pchan;
+	struct owl_dma_txd *txd;
+	struct owl_dma_lli *lli;
+	unsigned int next_lli_phy;
+	size_t bytes;
+
+	pchan = vchan->pchan;
+	txd = vchan->txd;
+
+	if (!pchan || !txd)
+		return 0;
+
+	/* Get remain count of current node in link list */
+	bytes = pchan_readl(pchan, OWL_DMAX_REMAIN_CNT);
+
+	/* Loop through the preceding nodes to get total remaining bytes */
+	if (pchan_readl(pchan, OWL_DMAX_MODE) & OWL_DMA_MODE_LME) {
+		next_lli_phy = pchan_readl(pchan, OWL_DMAX_NEXT_DESCRIPTOR);
+		list_for_each_entry(lli, &txd->lli_list, node) {
+			/* Start from the next active node */
+			if (lli->phys == next_lli_phy) {
+				list_for_each_entry(lli, &txd->lli_list, node)
+					bytes += lli->hw.flen;
+				break;
+			}
+		}
+	}
+
+	return bytes;
+}
+
+static enum dma_status owl_dma_tx_status(struct dma_chan *chan,
+					 dma_cookie_t cookie,
+					 struct dma_tx_state *state)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	struct owl_dma_lli *lli;
+	struct virt_dma_desc *vd;
+	struct owl_dma_txd *txd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (ret == DMA_COMPLETE || !state)
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	if (vd) {
+		txd = to_owl_txd(&vd->tx);
+		list_for_each_entry(lli, &txd->lli_list, node)
+			bytes += lli->hw.flen;
+	} else {
+		bytes = owl_dma_getbytes_chan(vchan);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	dma_set_residue(state, bytes);
+
+	return ret;
+}
+
+static void owl_dma_phy_alloc_and_start(struct owl_dma_vchan *vchan)
+{
+	struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+	struct owl_dma_pchan *pchan;
+
+	pchan = owl_dma_get_pchan(od, vchan);
+	if (!pchan)
+		return;
+
+	dev_dbg(od->dma.dev, "allocated pchan %d\n", pchan->id);
+
+	vchan->pchan = pchan;
+	owl_dma_start_next_txd(vchan);
+}
+
+static void owl_dma_issue_pending(struct dma_chan *chan)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	if (vchan_issue_pending(&vchan->vc)) {
+		if (!vchan->pchan)
+			owl_dma_phy_alloc_and_start(vchan);
+	}
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor
+		*owl_dma_prep_memcpy(struct dma_chan *chan,
+				     dma_addr_t dst, dma_addr_t src,
+				     size_t len, unsigned long flags)
+{
+	struct owl_dma *od = to_owl_dma(chan->device);
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+	struct owl_dma_txd *txd;
+	struct owl_dma_lli *lli, *prev = NULL;
+	size_t offset, bytes;
+	int ret;
+
+	if (!len)
+		return NULL;
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	INIT_LIST_HEAD(&txd->lli_list);
+
+	/* Process the transfer as frame by frame */
+	for (offset = 0; offset < len; offset += bytes) {
+		lli = owl_dma_alloc_lli(od);
+		if (!lli) {
+			dev_warn(chan2dev(chan), "failed to allocate lli\n");
+			goto err_txd_free;
+		}
+
+		bytes = min_t(size_t, (len - offset), OWL_DMA_FRAME_MAX_LENGTH);
+
+		ret = owl_dma_cfg_lli(vchan, lli, src + offset, dst + offset,
+				      bytes, DMA_MEM_TO_MEM);
+		if (ret) {
+			dev_warn(chan2dev(chan), "failed to config lli\n");
+			goto err_txd_free;
+		}
+
+		prev = owl_dma_add_lli(txd, prev, lli);
+	}
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+	owl_dma_free_txd(od, txd);
+	return NULL;
+}
+
+static void owl_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct owl_dma_vchan *vchan = to_owl_vchan(chan);
+
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static inline void owl_dma_free(struct owl_dma *od)
+{
+	struct owl_dma_vchan *vchan = NULL;
+	struct owl_dma_vchan *next;
+
+	list_for_each_entry_safe(vchan,
+				 next, &od->dma.channels, vc.chan.device_node) {
+		list_del(&vchan->vc.chan.device_node);
+		tasklet_kill(&vchan->vc.task);
+	}
+}
+
+static int owl_dma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct owl_dma *od;
+	struct resource *res;
+	int ret, i, nr_channels, nr_requests;
+
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	od->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(od->base))
+		return PTR_ERR(od->base);
+
+	ret = of_property_read_u32(np, "dma-channels", &nr_channels);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get dma-channels\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "dma-requests", &nr_requests);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get dma-requests\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n",
+		 nr_channels, nr_requests);
+
+	od->nr_pchans = nr_channels;
+	od->nr_vchans = nr_requests;
+
+	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+
+	platform_set_drvdata(pdev, od);
+	spin_lock_init(&od->lock);
+
+	dma_cap_set(DMA_MEMCPY, od->dma.cap_mask);
+
+	od->dma.dev = &pdev->dev;
+	od->dma.device_free_chan_resources = owl_dma_free_chan_resources;
+	od->dma.device_tx_status = owl_dma_tx_status;
+	od->dma.device_issue_pending = owl_dma_issue_pending;
+	od->dma.device_prep_dma_memcpy = owl_dma_prep_memcpy;
+	od->dma.device_terminate_all = owl_dma_terminate_all;
+	od->dma.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->dma.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	od->dma.directions = BIT(DMA_MEM_TO_MEM);
+	od->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	INIT_LIST_HEAD(&od->dma.channels);
+
+	od->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(od->clk)) {
+		dev_err(&pdev->dev, "unable to get clock\n");
+		return PTR_ERR(od->clk);
+	}
+
+	/*
+	 * Eventhough the DMA controller is capable of generating 4
+	 * IRQ's for DMA priority feature, we only use 1 IRQ for
+	 * simplification.
+	 */
+	od->irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, od->irq, owl_dma_interrupt, 0,
+			       dev_name(&pdev->dev), od);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to request IRQ\n");
+		return ret;
+	}
+
+	/* Init physical channel */
+	od->pchans = devm_kcalloc(&pdev->dev, od->nr_pchans,
+				  sizeof(struct owl_dma_pchan), GFP_KERNEL);
+	if (!od->pchans)
+		return -ENOMEM;
+
+	for (i = 0; i < od->nr_pchans; i++) {
+		struct owl_dma_pchan *pchan = &od->pchans[i];
+
+		pchan->id = i;
+		pchan->base = od->base + OWL_DMA_CHAN_BASE(i);
+	}
+
+	/* Init virtual channel */
+	od->vchans = devm_kcalloc(&pdev->dev, od->nr_vchans,
+				  sizeof(struct owl_dma_vchan), GFP_KERNEL);
+	if (!od->vchans)
+		return -ENOMEM;
+
+	for (i = 0; i < od->nr_vchans; i++) {
+		struct owl_dma_vchan *vchan = &od->vchans[i];
+
+		vchan->vc.desc_free = owl_dma_desc_free;
+		vchan_init(&vchan->vc, &od->dma);
+	}
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	od->lli_pool = dma_pool_create(dev_name(od->dma.dev), od->dma.dev,
+				       sizeof(struct owl_dma_lli),
+				       __alignof__(struct owl_dma_lli),
+				       0);
+	if (!od->lli_pool) {
+		dev_err(&pdev->dev, "unable to allocate DMA descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	clk_prepare_enable(od->clk);
+
+	ret = dma_async_device_register(&od->dma);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register DMA engine device\n");
+		goto err_pool_free;
+	}
+
+	return 0;
+
+err_pool_free:
+	clk_disable_unprepare(od->clk);
+	dma_pool_destroy(od->lli_pool);
+
+	return ret;
+}
+
+static int owl_dma_remove(struct platform_device *pdev)
+{
+	struct owl_dma *od = platform_get_drvdata(pdev);
+
+	dma_async_device_unregister(&od->dma);
+
+	/* Mask all interrupts for this execution environment */
+	dma_writel(od, OWL_DMA_IRQ_EN0, 0x0);
+
+	/* Make sure we won't have any further interrupts */
+	devm_free_irq(od->dma.dev, od->irq, od);
+
+	owl_dma_free(od);
+
+	clk_disable_unprepare(od->clk);
+
+	return 0;
+}
+
+static const struct of_device_id owl_dma_match[] = {
+	{ .compatible = "actions,s900-dma", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, owl_dma_match);
+
+static struct platform_driver owl_dma_driver = {
+	.probe	= owl_dma_probe,
+	.remove	= owl_dma_remove,
+	.driver = {
+		.name = "dma-owl",
+		.of_match_table = of_match_ptr(owl_dma_match),
+	},
+};
+
+static int owl_dma_init(void)
+{
+	return platform_driver_register(&owl_dma_driver);
+}
+subsys_initcall(owl_dma_init);
+
+static void __exit owl_dma_exit(void)
+{
+	platform_driver_unregister(&owl_dma_driver);
+}
+module_exit(owl_dma_exit);
+
+MODULE_AUTHOR("David Liu <liuwei@actions-semi.com>");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Actions Semi Owl SoCs DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
new file mode 100644
index 0000000..afd8f27
--- /dev/null
+++ b/drivers/dma/pch_dma.c
@@ -0,0 +1,1022 @@
+/*
+ * Topcliff PCH DMA controller driver
+ * Copyright (c) 2010 Intel Corporation
+ * Copyright (C) 2011 LAPIS Semiconductor Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pch_dma.h>
+
+#include "dmaengine.h"
+
+#define DRV_NAME "pch-dma"
+
+#define DMA_CTL0_DISABLE		0x0
+#define DMA_CTL0_SG			0x1
+#define DMA_CTL0_ONESHOT		0x2
+#define DMA_CTL0_MODE_MASK_BITS		0x3
+#define DMA_CTL0_DIR_SHIFT_BITS		2
+#define DMA_CTL0_BITS_PER_CH		4
+
+#define DMA_CTL2_START_SHIFT_BITS	8
+#define DMA_CTL2_IRQ_ENABLE_MASK	((1UL << DMA_CTL2_START_SHIFT_BITS) - 1)
+
+#define DMA_STATUS_IDLE			0x0
+#define DMA_STATUS_DESC_READ		0x1
+#define DMA_STATUS_WAIT			0x2
+#define DMA_STATUS_ACCESS		0x3
+#define DMA_STATUS_BITS_PER_CH		2
+#define DMA_STATUS_MASK_BITS		0x3
+#define DMA_STATUS_SHIFT_BITS		16
+#define DMA_STATUS_IRQ(x)		(0x1 << (x))
+#define DMA_STATUS0_ERR(x)		(0x1 << ((x) + 8))
+#define DMA_STATUS2_ERR(x)		(0x1 << (x))
+
+#define DMA_DESC_WIDTH_SHIFT_BITS	12
+#define DMA_DESC_WIDTH_1_BYTE		(0x3 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_2_BYTES		(0x2 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_WIDTH_4_BYTES		(0x0 << DMA_DESC_WIDTH_SHIFT_BITS)
+#define DMA_DESC_MAX_COUNT_1_BYTE	0x3FF
+#define DMA_DESC_MAX_COUNT_2_BYTES	0x3FF
+#define DMA_DESC_MAX_COUNT_4_BYTES	0x7FF
+#define DMA_DESC_END_WITHOUT_IRQ	0x0
+#define DMA_DESC_END_WITH_IRQ		0x1
+#define DMA_DESC_FOLLOW_WITHOUT_IRQ	0x2
+#define DMA_DESC_FOLLOW_WITH_IRQ	0x3
+
+#define MAX_CHAN_NR			12
+
+#define DMA_MASK_CTL0_MODE	0x33333333
+#define DMA_MASK_CTL2_MODE	0x00003333
+
+static unsigned int init_nr_desc_per_channel = 64;
+module_param(init_nr_desc_per_channel, uint, 0644);
+MODULE_PARM_DESC(init_nr_desc_per_channel,
+		 "initial descriptors per channel (default: 64)");
+
+struct pch_dma_desc_regs {
+	u32	dev_addr;
+	u32	mem_addr;
+	u32	size;
+	u32	next;
+};
+
+struct pch_dma_regs {
+	u32	dma_ctl0;
+	u32	dma_ctl1;
+	u32	dma_ctl2;
+	u32	dma_ctl3;
+	u32	dma_sts0;
+	u32	dma_sts1;
+	u32	dma_sts2;
+	u32	reserved3;
+	struct pch_dma_desc_regs desc[MAX_CHAN_NR];
+};
+
+struct pch_dma_desc {
+	struct pch_dma_desc_regs regs;
+	struct dma_async_tx_descriptor txd;
+	struct list_head	desc_node;
+	struct list_head	tx_list;
+};
+
+struct pch_dma_chan {
+	struct dma_chan		chan;
+	void __iomem *membase;
+	enum dma_transfer_direction dir;
+	struct tasklet_struct	tasklet;
+	unsigned long		err_status;
+
+	spinlock_t		lock;
+
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		descs_allocated;
+};
+
+#define PDC_DEV_ADDR	0x00
+#define PDC_MEM_ADDR	0x04
+#define PDC_SIZE	0x08
+#define PDC_NEXT	0x0C
+
+#define channel_readl(pdc, name) \
+	readl((pdc)->membase + PDC_##name)
+#define channel_writel(pdc, name, val) \
+	writel((val), (pdc)->membase + PDC_##name)
+
+struct pch_dma {
+	struct dma_device	dma;
+	void __iomem *membase;
+	struct dma_pool		*pool;
+	struct pch_dma_regs	regs;
+	struct pch_dma_desc_regs ch_regs[MAX_CHAN_NR];
+	struct pch_dma_chan	channels[MAX_CHAN_NR];
+};
+
+#define PCH_DMA_CTL0	0x00
+#define PCH_DMA_CTL1	0x04
+#define PCH_DMA_CTL2	0x08
+#define PCH_DMA_CTL3	0x0C
+#define PCH_DMA_STS0	0x10
+#define PCH_DMA_STS1	0x14
+#define PCH_DMA_STS2	0x18
+
+#define dma_readl(pd, name) \
+	readl((pd)->membase + PCH_DMA_##name)
+#define dma_writel(pd, name, val) \
+	writel((val), (pd)->membase + PCH_DMA_##name)
+
+static inline
+struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct pch_dma_desc, txd);
+}
+
+static inline struct pch_dma_chan *to_pd_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct pch_dma_chan, chan);
+}
+
+static inline struct pch_dma *to_pd(struct dma_device *ddev)
+{
+	return container_of(ddev, struct pch_dma, dma);
+}
+
+static inline struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static inline
+struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->active_list,
+				struct pch_dma_desc, desc_node);
+}
+
+static inline
+struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan)
+{
+	return list_first_entry(&pd_chan->queue,
+				struct pch_dma_desc, desc_node);
+}
+
+static void pdc_enable_irq(struct dma_chan *chan, int enable)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	int pos;
+
+	if (chan->chan_id < 8)
+		pos = chan->chan_id;
+	else
+		pos = chan->chan_id + 8;
+
+	val = dma_readl(pd, CTL2);
+
+	if (enable)
+		val |= 0x1 << pos;
+	else
+		val &= ~(0x1 << pos);
+
+	dma_writel(pd, CTL2, val);
+
+	dev_dbg(chan2dev(chan), "pdc_enable_irq: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_dir(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	u32 mask_mode;
+	u32 mask_ctl;
+
+	if (chan->chan_id < 8) {
+		val = dma_readl(pd, CTL0);
+
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+					(DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+				       (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		val &= mask_mode;
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +
+					 DMA_CTL0_DIR_SHIFT_BITS));
+
+		val |= mask_ctl;
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+		val = dma_readl(pd, CTL3);
+
+		mask_mode = DMA_CTL0_MODE_MASK_BITS <<
+						(DMA_CTL0_BITS_PER_CH * ch);
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		val &= mask_mode;
+		if (pd_chan->dir == DMA_MEM_TO_DEV)
+			val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+				       DMA_CTL0_DIR_SHIFT_BITS);
+		else
+			val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch +
+					 DMA_CTL0_DIR_SHIFT_BITS));
+		val |= mask_ctl;
+		dma_writel(pd, CTL3, val);
+	}
+
+	dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static void pdc_set_mode(struct dma_chan *chan, u32 mode)
+{
+	struct pch_dma *pd = to_pd(chan->device);
+	u32 val;
+	u32 mask_ctl;
+	u32 mask_dir;
+
+	if (chan->chan_id < 8) {
+		mask_ctl = DMA_MASK_CTL0_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+			   (DMA_CTL0_BITS_PER_CH * chan->chan_id));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
+		val = dma_readl(pd, CTL0);
+		val &= mask_dir;
+		val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id);
+		val |= mask_ctl;
+		dma_writel(pd, CTL0, val);
+	} else {
+		int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */
+		mask_ctl = DMA_MASK_CTL2_MODE & ~(DMA_CTL0_MODE_MASK_BITS <<
+						 (DMA_CTL0_BITS_PER_CH * ch));
+		mask_dir = 1 << (DMA_CTL0_BITS_PER_CH * ch +\
+				 DMA_CTL0_DIR_SHIFT_BITS);
+		val = dma_readl(pd, CTL3);
+		val &= mask_dir;
+		val |= mode << (DMA_CTL0_BITS_PER_CH * ch);
+		val |= mask_ctl;
+		dma_writel(pd, CTL3, val);
+	}
+
+	dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n",
+		chan->chan_id, val);
+}
+
+static u32 pdc_get_status0(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS0);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * pd_chan->chan.chan_id));
+}
+
+static u32 pdc_get_status2(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma *pd = to_pd(pd_chan->chan.device);
+	u32 val;
+
+	val = dma_readl(pd, STS2);
+	return DMA_STATUS_MASK_BITS & (val >> (DMA_STATUS_SHIFT_BITS +
+			DMA_STATUS_BITS_PER_CH * (pd_chan->chan.chan_id - 8)));
+}
+
+static bool pdc_is_idle(struct pch_dma_chan *pd_chan)
+{
+	u32 sts;
+
+	if (pd_chan->chan.chan_id < 8)
+		sts = pdc_get_status0(pd_chan);
+	else
+		sts = pdc_get_status2(pd_chan);
+
+
+	if (sts == DMA_STATUS_IDLE)
+		return true;
+	else
+		return false;
+}
+
+static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc)
+{
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: Attempt to start non-idle channel\n");
+		return;
+	}
+
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> dev_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.dev_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> mem_addr: %x\n",
+		pd_chan->chan.chan_id, desc->regs.mem_addr);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> size: %x\n",
+		pd_chan->chan.chan_id, desc->regs.size);
+	dev_dbg(chan2dev(&pd_chan->chan), "chan %d -> next: %x\n",
+		pd_chan->chan.chan_id, desc->regs.next);
+
+	if (list_empty(&desc->tx_list)) {
+		channel_writel(pd_chan, DEV_ADDR, desc->regs.dev_addr);
+		channel_writel(pd_chan, MEM_ADDR, desc->regs.mem_addr);
+		channel_writel(pd_chan, SIZE, desc->regs.size);
+		channel_writel(pd_chan, NEXT, desc->regs.next);
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_ONESHOT);
+	} else {
+		channel_writel(pd_chan, NEXT, desc->txd.phys);
+		pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG);
+	}
+}
+
+static void pdc_chain_complete(struct pch_dma_chan *pd_chan,
+			       struct pch_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+	struct dmaengine_desc_callback cb;
+
+	dmaengine_desc_get_callback(txd, &cb);
+	list_splice_init(&desc->tx_list, &pd_chan->free_list);
+	list_move(&desc->desc_node, &pd_chan->free_list);
+
+	dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static void pdc_complete_all(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+
+	if (!list_empty(&pd_chan->queue))
+		pdc_dostart(pd_chan, pdc_first_queued(pd_chan));
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &pd_chan->active_list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+}
+
+static void pdc_handle_error(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *bad_desc;
+
+	bad_desc = pdc_first_active(pd_chan);
+	list_del(&bad_desc->desc_node);
+
+	list_splice_init(&pd_chan->queue, pd_chan->active_list.prev);
+
+	if (!list_empty(&pd_chan->active_list))
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+
+	dev_crit(chan2dev(&pd_chan->chan), "Bad descriptor submitted\n");
+	dev_crit(chan2dev(&pd_chan->chan), "descriptor cookie: %d\n",
+		 bad_desc->txd.cookie);
+
+	pdc_chain_complete(pd_chan, bad_desc);
+}
+
+static void pdc_advance_work(struct pch_dma_chan *pd_chan)
+{
+	if (list_empty(&pd_chan->active_list) ||
+		list_is_singular(&pd_chan->active_list)) {
+		pdc_complete_all(pd_chan);
+	} else {
+		pdc_chain_complete(pd_chan, pdc_first_active(pd_chan));
+		pdc_dostart(pd_chan, pdc_first_active(pd_chan));
+	}
+}
+
+static dma_cookie_t pd_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct pch_dma_desc *desc = to_pd_desc(txd);
+	struct pch_dma_chan *pd_chan = to_pd_chan(txd->chan);
+
+	spin_lock(&pd_chan->lock);
+
+	if (list_empty(&pd_chan->active_list)) {
+		list_add_tail(&desc->desc_node, &pd_chan->active_list);
+		pdc_dostart(pd_chan, desc);
+	} else {
+		list_add_tail(&desc->desc_node, &pd_chan->queue);
+	}
+
+	spin_unlock(&pd_chan->lock);
+	return 0;
+}
+
+static struct pch_dma_desc *pdc_alloc_desc(struct dma_chan *chan, gfp_t flags)
+{
+	struct pch_dma_desc *desc = NULL;
+	struct pch_dma *pd = to_pd(chan->device);
+	dma_addr_t addr;
+
+	desc = dma_pool_zalloc(pd->pool, flags, &addr);
+	if (desc) {
+		INIT_LIST_HEAD(&desc->tx_list);
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = pd_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		desc->txd.phys = addr;
+	}
+
+	return desc;
+}
+
+static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan)
+{
+	struct pch_dma_desc *desc, *_d;
+	struct pch_dma_desc *ret = NULL;
+	int i = 0;
+
+	spin_lock(&pd_chan->lock);
+	list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) {
+		i++;
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&pd_chan->chan), "desc %p not ACKed\n", desc);
+	}
+	spin_unlock(&pd_chan->lock);
+	dev_dbg(chan2dev(&pd_chan->chan), "scanned %d descriptors\n", i);
+
+	if (!ret) {
+		ret = pdc_alloc_desc(&pd_chan->chan, GFP_ATOMIC);
+		if (ret) {
+			spin_lock(&pd_chan->lock);
+			pd_chan->descs_allocated++;
+			spin_unlock(&pd_chan->lock);
+		} else {
+			dev_err(chan2dev(&pd_chan->chan),
+				"failed to alloc desc\n");
+		}
+	}
+
+	return ret;
+}
+
+static void pdc_desc_put(struct pch_dma_chan *pd_chan,
+			 struct pch_dma_desc *desc)
+{
+	if (desc) {
+		spin_lock(&pd_chan->lock);
+		list_splice_init(&desc->tx_list, &pd_chan->free_list);
+		list_add(&desc->desc_node, &pd_chan->free_list);
+		spin_unlock(&pd_chan->lock);
+	}
+}
+
+static int pd_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc;
+	LIST_HEAD(tmp_list);
+	int i;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle ?\n");
+		return -EIO;
+	}
+
+	if (!list_empty(&pd_chan->free_list))
+		return pd_chan->descs_allocated;
+
+	for (i = 0; i < init_nr_desc_per_channel; i++) {
+		desc = pdc_alloc_desc(chan, GFP_KERNEL);
+
+		if (!desc) {
+			dev_warn(chan2dev(chan),
+				"Only allocated %d initial descriptors\n", i);
+			break;
+		}
+
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	spin_lock_irq(&pd_chan->lock);
+	list_splice(&tmp_list, &pd_chan->free_list);
+	pd_chan->descs_allocated = i;
+	dma_cookie_init(chan);
+	spin_unlock_irq(&pd_chan->lock);
+
+	pdc_enable_irq(chan, 1);
+
+	return pd_chan->descs_allocated;
+}
+
+static void pd_free_chan_resources(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma *pd = to_pd(chan->device);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(tmp_list);
+
+	BUG_ON(!pdc_is_idle(pd_chan));
+	BUG_ON(!list_empty(&pd_chan->active_list));
+	BUG_ON(!list_empty(&pd_chan->queue));
+
+	spin_lock_irq(&pd_chan->lock);
+	list_splice_init(&pd_chan->free_list, &tmp_list);
+	pd_chan->descs_allocated = 0;
+	spin_unlock_irq(&pd_chan->lock);
+
+	list_for_each_entry_safe(desc, _d, &tmp_list, desc_node)
+		dma_pool_free(pd->pool, desc, desc->txd.phys);
+
+	pdc_enable_irq(chan, 0);
+}
+
+static enum dma_status pd_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+				    struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void pd_issue_pending(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+
+	if (pdc_is_idle(pd_chan)) {
+		spin_lock(&pd_chan->lock);
+		pdc_advance_work(pd_chan);
+		spin_unlock(&pd_chan->lock);
+	}
+}
+
+static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan,
+			struct scatterlist *sgl, unsigned int sg_len,
+			enum dma_transfer_direction direction, unsigned long flags,
+			void *context)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_slave *pd_slave = chan->private;
+	struct pch_dma_desc *first = NULL;
+	struct pch_dma_desc *prev = NULL;
+	struct pch_dma_desc *desc = NULL;
+	struct scatterlist *sg;
+	dma_addr_t reg;
+	int i;
+
+	if (unlikely(!sg_len)) {
+		dev_info(chan2dev(chan), "prep_slave_sg: length is zero!\n");
+		return NULL;
+	}
+
+	if (direction == DMA_DEV_TO_MEM)
+		reg = pd_slave->rx_reg;
+	else if (direction == DMA_MEM_TO_DEV)
+		reg = pd_slave->tx_reg;
+	else
+		return NULL;
+
+	pd_chan->dir = direction;
+	pdc_set_dir(chan);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc = pdc_desc_get(pd_chan);
+
+		if (!desc)
+			goto err_desc_get;
+
+		desc->regs.dev_addr = reg;
+		desc->regs.mem_addr = sg_dma_address(sg);
+		desc->regs.size = sg_dma_len(sg);
+		desc->regs.next = DMA_DESC_FOLLOW_WITHOUT_IRQ;
+
+		switch (pd_slave->width) {
+		case PCH_DMA_WIDTH_1_BYTE:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_1_BYTE)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_1_BYTE;
+			break;
+		case PCH_DMA_WIDTH_2_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_2_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_2_BYTES;
+			break;
+		case PCH_DMA_WIDTH_4_BYTES:
+			if (desc->regs.size > DMA_DESC_MAX_COUNT_4_BYTES)
+				goto err_desc_get;
+			desc->regs.size |= DMA_DESC_WIDTH_4_BYTES;
+			break;
+		default:
+			goto err_desc_get;
+		}
+
+		if (!first) {
+			first = desc;
+		} else {
+			prev->regs.next |= desc->txd.phys;
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+
+		prev = desc;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		desc->regs.next = DMA_DESC_END_WITH_IRQ;
+	else
+		desc->regs.next = DMA_DESC_END_WITHOUT_IRQ;
+
+	first->txd.cookie = -EBUSY;
+	desc->txd.flags = flags;
+
+	return &first->txd;
+
+err_desc_get:
+	dev_err(chan2dev(chan), "failed to get desc or wrong parameters\n");
+	pdc_desc_put(pd_chan, first);
+	return NULL;
+}
+
+static int pd_device_terminate_all(struct dma_chan *chan)
+{
+	struct pch_dma_chan *pd_chan = to_pd_chan(chan);
+	struct pch_dma_desc *desc, *_d;
+	LIST_HEAD(list);
+
+	spin_lock_irq(&pd_chan->lock);
+
+	pdc_set_mode(&pd_chan->chan, DMA_CTL0_DISABLE);
+
+	list_splice_init(&pd_chan->active_list, &list);
+	list_splice_init(&pd_chan->queue, &list);
+
+	list_for_each_entry_safe(desc, _d, &list, desc_node)
+		pdc_chain_complete(pd_chan, desc);
+
+	spin_unlock_irq(&pd_chan->lock);
+
+	return 0;
+}
+
+static void pdc_tasklet(unsigned long data)
+{
+	struct pch_dma_chan *pd_chan = (struct pch_dma_chan *)data;
+	unsigned long flags;
+
+	if (!pdc_is_idle(pd_chan)) {
+		dev_err(chan2dev(&pd_chan->chan),
+			"BUG: handle non-idle channel in tasklet\n");
+		return;
+	}
+
+	spin_lock_irqsave(&pd_chan->lock, flags);
+	if (test_and_clear_bit(0, &pd_chan->err_status))
+		pdc_handle_error(pd_chan);
+	else
+		pdc_advance_work(pd_chan);
+	spin_unlock_irqrestore(&pd_chan->lock, flags);
+}
+
+static irqreturn_t pd_irq(int irq, void *devid)
+{
+	struct pch_dma *pd = (struct pch_dma *)devid;
+	struct pch_dma_chan *pd_chan;
+	u32 sts0;
+	u32 sts2;
+	int i;
+	int ret0 = IRQ_NONE;
+	int ret2 = IRQ_NONE;
+
+	sts0 = dma_readl(pd, STS0);
+	sts2 = dma_readl(pd, STS2);
+
+	dev_dbg(pd->dma.dev, "pd_irq sts0: %x\n", sts0);
+
+	for (i = 0; i < pd->dma.chancnt; i++) {
+		pd_chan = &pd->channels[i];
+
+		if (i < 8) {
+			if (sts0 & DMA_STATUS_IRQ(i)) {
+				if (sts0 & DMA_STATUS0_ERR(i))
+					set_bit(0, &pd_chan->err_status);
+
+				tasklet_schedule(&pd_chan->tasklet);
+				ret0 = IRQ_HANDLED;
+			}
+		} else {
+			if (sts2 & DMA_STATUS_IRQ(i - 8)) {
+				if (sts2 & DMA_STATUS2_ERR(i))
+					set_bit(0, &pd_chan->err_status);
+
+				tasklet_schedule(&pd_chan->tasklet);
+				ret2 = IRQ_HANDLED;
+			}
+		}
+	}
+
+	/* clear interrupt bits in status register */
+	if (ret0)
+		dma_writel(pd, STS0, sts0);
+	if (ret2)
+		dma_writel(pd, STS2, sts2);
+
+	return ret0 | ret2;
+}
+
+#ifdef	CONFIG_PM
+static void pch_dma_save_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	pd->regs.dma_ctl0 = dma_readl(pd, CTL0);
+	pd->regs.dma_ctl1 = dma_readl(pd, CTL1);
+	pd->regs.dma_ctl2 = dma_readl(pd, CTL2);
+	pd->regs.dma_ctl3 = dma_readl(pd, CTL3);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		pd->ch_regs[i].dev_addr = channel_readl(pd_chan, DEV_ADDR);
+		pd->ch_regs[i].mem_addr = channel_readl(pd_chan, MEM_ADDR);
+		pd->ch_regs[i].size = channel_readl(pd_chan, SIZE);
+		pd->ch_regs[i].next = channel_readl(pd_chan, NEXT);
+
+		i++;
+	}
+}
+
+static void pch_dma_restore_regs(struct pch_dma *pd)
+{
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+	int i = 0;
+
+	dma_writel(pd, CTL0, pd->regs.dma_ctl0);
+	dma_writel(pd, CTL1, pd->regs.dma_ctl1);
+	dma_writel(pd, CTL2, pd->regs.dma_ctl2);
+	dma_writel(pd, CTL3, pd->regs.dma_ctl3);
+
+	list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) {
+		pd_chan = to_pd_chan(chan);
+
+		channel_writel(pd_chan, DEV_ADDR, pd->ch_regs[i].dev_addr);
+		channel_writel(pd_chan, MEM_ADDR, pd->ch_regs[i].mem_addr);
+		channel_writel(pd_chan, SIZE, pd->ch_regs[i].size);
+		channel_writel(pd_chan, NEXT, pd->ch_regs[i].next);
+
+		i++;
+	}
+}
+
+static int pch_dma_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+
+	if (pd)
+		pch_dma_save_regs(pd);
+
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+
+	return 0;
+}
+
+static int pch_dma_resume(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	int err;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_dbg(&pdev->dev, "failed to enable device\n");
+		return err;
+	}
+
+	if (pd)
+		pch_dma_restore_regs(pd);
+
+	return 0;
+}
+#endif
+
+static int pch_dma_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *id)
+{
+	struct pch_dma *pd;
+	struct pch_dma_regs *regs;
+	unsigned int nr_channels;
+	int err;
+	int i;
+
+	nr_channels = id->driver_data;
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	pci_set_drvdata(pdev, pd);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device\n");
+		goto err_free_mem;
+	}
+
+	if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Cannot find proper base address\n");
+		err = -ENODEV;
+		goto err_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot obtain PCI resources\n");
+		goto err_disable_pdev;
+	}
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "Cannot set proper DMA config\n");
+		goto err_free_res;
+	}
+
+	regs = pd->membase = pci_iomap(pdev, 1, 0);
+	if (!pd->membase) {
+		dev_err(&pdev->dev, "Cannot map MMIO registers\n");
+		err = -ENOMEM;
+		goto err_free_res;
+	}
+
+	pci_set_master(pdev);
+
+	err = request_irq(pdev->irq, pd_irq, IRQF_SHARED, DRV_NAME, pd);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request IRQ\n");
+		goto err_iounmap;
+	}
+
+	pd->pool = dma_pool_create("pch_dma_desc_pool", &pdev->dev,
+				   sizeof(struct pch_dma_desc), 4, 0);
+	if (!pd->pool) {
+		dev_err(&pdev->dev, "Failed to alloc DMA descriptors\n");
+		err = -ENOMEM;
+		goto err_free_irq;
+	}
+
+	pd->dma.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&pd->dma.channels);
+
+	for (i = 0; i < nr_channels; i++) {
+		struct pch_dma_chan *pd_chan = &pd->channels[i];
+
+		pd_chan->chan.device = &pd->dma;
+		dma_cookie_init(&pd_chan->chan);
+
+		pd_chan->membase = &regs->desc[i];
+
+		spin_lock_init(&pd_chan->lock);
+
+		INIT_LIST_HEAD(&pd_chan->active_list);
+		INIT_LIST_HEAD(&pd_chan->queue);
+		INIT_LIST_HEAD(&pd_chan->free_list);
+
+		tasklet_init(&pd_chan->tasklet, pdc_tasklet,
+			     (unsigned long)pd_chan);
+		list_add_tail(&pd_chan->chan.device_node, &pd->dma.channels);
+	}
+
+	dma_cap_zero(pd->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pd->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, pd->dma.cap_mask);
+
+	pd->dma.device_alloc_chan_resources = pd_alloc_chan_resources;
+	pd->dma.device_free_chan_resources = pd_free_chan_resources;
+	pd->dma.device_tx_status = pd_tx_status;
+	pd->dma.device_issue_pending = pd_issue_pending;
+	pd->dma.device_prep_slave_sg = pd_prep_slave_sg;
+	pd->dma.device_terminate_all = pd_device_terminate_all;
+
+	err = dma_async_device_register(&pd->dma);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register DMA device\n");
+		goto err_free_pool;
+	}
+
+	return 0;
+
+err_free_pool:
+	dma_pool_destroy(pd->pool);
+err_free_irq:
+	free_irq(pdev->irq, pd);
+err_iounmap:
+	pci_iounmap(pdev, pd->membase);
+err_free_res:
+	pci_release_regions(pdev);
+err_disable_pdev:
+	pci_disable_device(pdev);
+err_free_mem:
+	kfree(pd);
+	return err;
+}
+
+static void pch_dma_remove(struct pci_dev *pdev)
+{
+	struct pch_dma *pd = pci_get_drvdata(pdev);
+	struct pch_dma_chan *pd_chan;
+	struct dma_chan *chan, *_c;
+
+	if (pd) {
+		dma_async_device_unregister(&pd->dma);
+
+		free_irq(pdev->irq, pd);
+
+		list_for_each_entry_safe(chan, _c, &pd->dma.channels,
+					 device_node) {
+			pd_chan = to_pd_chan(chan);
+
+			tasklet_kill(&pd_chan->tasklet);
+		}
+
+		dma_pool_destroy(pd->pool);
+		pci_iounmap(pdev, pd->membase);
+		pci_release_regions(pdev);
+		pci_disable_device(pdev);
+		kfree(pd);
+	}
+}
+
+/* PCI Device ID of DMA device */
+#define PCI_VENDOR_ID_ROHM             0x10DB
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_8CH        0x8810
+#define PCI_DEVICE_ID_EG20T_PCH_DMA_4CH        0x8815
+#define PCI_DEVICE_ID_ML7213_DMA1_8CH	0x8026
+#define PCI_DEVICE_ID_ML7213_DMA2_8CH	0x802B
+#define PCI_DEVICE_ID_ML7213_DMA3_4CH	0x8034
+#define PCI_DEVICE_ID_ML7213_DMA4_12CH	0x8032
+#define PCI_DEVICE_ID_ML7223_DMA1_4CH	0x800B
+#define PCI_DEVICE_ID_ML7223_DMA2_4CH	0x800E
+#define PCI_DEVICE_ID_ML7223_DMA3_4CH	0x8017
+#define PCI_DEVICE_ID_ML7223_DMA4_4CH	0x803B
+#define PCI_DEVICE_ID_ML7831_DMA1_8CH	0x8810
+#define PCI_DEVICE_ID_ML7831_DMA2_4CH	0x8815
+
+static const struct pci_device_id pch_dma_id_table[] = {
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 },
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA1_8CH), 8}, /* UART */
+	{ PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7831_DMA2_4CH), 4}, /* SPI */
+	{ 0, },
+};
+
+static struct pci_driver pch_dma_driver = {
+	.name		= DRV_NAME,
+	.id_table	= pch_dma_id_table,
+	.probe		= pch_dma_probe,
+	.remove		= pch_dma_remove,
+#ifdef CONFIG_PM
+	.suspend	= pch_dma_suspend,
+	.resume		= pch_dma_resume,
+#endif
+};
+
+module_pci_driver(pch_dma_driver);
+
+MODULE_DESCRIPTION("Intel EG20T PCH / LAPIS Semicon ML7213/ML7223/ML7831 IOH "
+		   "DMA controller driver");
+MODULE_AUTHOR("Yong Wang <yong.y.wang@intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DEVICE_TABLE(pci, pch_dma_id_table);
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
new file mode 100644
index 0000000..88750a3
--- /dev/null
+++ b/drivers/dma/pl330.c
@@ -0,0 +1,3163 @@
+/*
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Copyright (C) 2010 Samsung Electronics Co. Ltd.
+ *	Jaswinder Singh <jassi.brar@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/amba/bus.h>
+#include <linux/scatterlist.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/err.h>
+#include <linux/pm_runtime.h>
+#include <linux/bug.h>
+
+#include "dmaengine.h"
+#define PL330_MAX_CHAN		8
+#define PL330_MAX_IRQS		32
+#define PL330_MAX_PERI		32
+#define PL330_MAX_BURST         16
+
+#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0)
+
+enum pl330_cachectrl {
+	CCTRL0,		/* Noncacheable and nonbufferable */
+	CCTRL1,		/* Bufferable only */
+	CCTRL2,		/* Cacheable, but do not allocate */
+	CCTRL3,		/* Cacheable and bufferable, but do not allocate */
+	INVALID1,	/* AWCACHE = 0x1000 */
+	INVALID2,
+	CCTRL6,		/* Cacheable write-through, allocate on writes only */
+	CCTRL7,		/* Cacheable write-back, allocate on writes only */
+};
+
+enum pl330_byteswap {
+	SWAP_NO,
+	SWAP_2,
+	SWAP_4,
+	SWAP_8,
+	SWAP_16,
+};
+
+/* Register and Bit field Definitions */
+#define DS			0x0
+#define DS_ST_STOP		0x0
+#define DS_ST_EXEC		0x1
+#define DS_ST_CMISS		0x2
+#define DS_ST_UPDTPC		0x3
+#define DS_ST_WFE		0x4
+#define DS_ST_ATBRR		0x5
+#define DS_ST_QBUSY		0x6
+#define DS_ST_WFP		0x7
+#define DS_ST_KILL		0x8
+#define DS_ST_CMPLT		0x9
+#define DS_ST_FLTCMP		0xe
+#define DS_ST_FAULT		0xf
+
+#define DPC			0x4
+#define INTEN			0x20
+#define ES			0x24
+#define INTSTATUS		0x28
+#define INTCLR			0x2c
+#define FSM			0x30
+#define FSC			0x34
+#define FTM			0x38
+
+#define _FTC			0x40
+#define FTC(n)			(_FTC + (n)*0x4)
+
+#define _CS			0x100
+#define CS(n)			(_CS + (n)*0x8)
+#define CS_CNS			(1 << 21)
+
+#define _CPC			0x104
+#define CPC(n)			(_CPC + (n)*0x8)
+
+#define _SA			0x400
+#define SA(n)			(_SA + (n)*0x20)
+
+#define _DA			0x404
+#define DA(n)			(_DA + (n)*0x20)
+
+#define _CC			0x408
+#define CC(n)			(_CC + (n)*0x20)
+
+#define CC_SRCINC		(1 << 0)
+#define CC_DSTINC		(1 << 14)
+#define CC_SRCPRI		(1 << 8)
+#define CC_DSTPRI		(1 << 22)
+#define CC_SRCNS		(1 << 9)
+#define CC_DSTNS		(1 << 23)
+#define CC_SRCIA		(1 << 10)
+#define CC_DSTIA		(1 << 24)
+#define CC_SRCBRSTLEN_SHFT	4
+#define CC_DSTBRSTLEN_SHFT	18
+#define CC_SRCBRSTSIZE_SHFT	1
+#define CC_DSTBRSTSIZE_SHFT	15
+#define CC_SRCCCTRL_SHFT	11
+#define CC_SRCCCTRL_MASK	0x7
+#define CC_DSTCCTRL_SHFT	25
+#define CC_DRCCCTRL_MASK	0x7
+#define CC_SWAP_SHFT		28
+
+#define _LC0			0x40c
+#define LC0(n)			(_LC0 + (n)*0x20)
+
+#define _LC1			0x410
+#define LC1(n)			(_LC1 + (n)*0x20)
+
+#define DBGSTATUS		0xd00
+#define DBG_BUSY		(1 << 0)
+
+#define DBGCMD			0xd04
+#define DBGINST0		0xd08
+#define DBGINST1		0xd0c
+
+#define CR0			0xe00
+#define CR1			0xe04
+#define CR2			0xe08
+#define CR3			0xe0c
+#define CR4			0xe10
+#define CRD			0xe14
+
+#define PERIPH_ID		0xfe0
+#define PERIPH_REV_SHIFT	20
+#define PERIPH_REV_MASK		0xf
+#define PERIPH_REV_R0P0		0
+#define PERIPH_REV_R1P0		1
+#define PERIPH_REV_R1P1		2
+
+#define CR0_PERIPH_REQ_SET	(1 << 0)
+#define CR0_BOOT_EN_SET		(1 << 1)
+#define CR0_BOOT_MAN_NS		(1 << 2)
+#define CR0_NUM_CHANS_SHIFT	4
+#define CR0_NUM_CHANS_MASK	0x7
+#define CR0_NUM_PERIPH_SHIFT	12
+#define CR0_NUM_PERIPH_MASK	0x1f
+#define CR0_NUM_EVENTS_SHIFT	17
+#define CR0_NUM_EVENTS_MASK	0x1f
+
+#define CR1_ICACHE_LEN_SHIFT	0
+#define CR1_ICACHE_LEN_MASK	0x7
+#define CR1_NUM_ICACHELINES_SHIFT	4
+#define CR1_NUM_ICACHELINES_MASK	0xf
+
+#define CRD_DATA_WIDTH_SHIFT	0
+#define CRD_DATA_WIDTH_MASK	0x7
+#define CRD_WR_CAP_SHIFT	4
+#define CRD_WR_CAP_MASK		0x7
+#define CRD_WR_Q_DEP_SHIFT	8
+#define CRD_WR_Q_DEP_MASK	0xf
+#define CRD_RD_CAP_SHIFT	12
+#define CRD_RD_CAP_MASK		0x7
+#define CRD_RD_Q_DEP_SHIFT	16
+#define CRD_RD_Q_DEP_MASK	0xf
+#define CRD_DATA_BUFF_SHIFT	20
+#define CRD_DATA_BUFF_MASK	0x3ff
+
+#define PART			0x330
+#define DESIGNER		0x41
+#define REVISION		0x0
+#define INTEG_CFG		0x0
+#define PERIPH_ID_VAL		((PART << 0) | (DESIGNER << 12))
+
+#define PL330_STATE_STOPPED		(1 << 0)
+#define PL330_STATE_EXECUTING		(1 << 1)
+#define PL330_STATE_WFE			(1 << 2)
+#define PL330_STATE_FAULTING		(1 << 3)
+#define PL330_STATE_COMPLETING		(1 << 4)
+#define PL330_STATE_WFP			(1 << 5)
+#define PL330_STATE_KILLING		(1 << 6)
+#define PL330_STATE_FAULT_COMPLETING	(1 << 7)
+#define PL330_STATE_CACHEMISS		(1 << 8)
+#define PL330_STATE_UPDTPC		(1 << 9)
+#define PL330_STATE_ATBARRIER		(1 << 10)
+#define PL330_STATE_QUEUEBUSY		(1 << 11)
+#define PL330_STATE_INVALID		(1 << 15)
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+				| PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH		0x54
+#define CMD_DMAEND		0x00
+#define CMD_DMAFLUSHP		0x35
+#define CMD_DMAGO		0xa0
+#define CMD_DMALD		0x04
+#define CMD_DMALDP		0x25
+#define CMD_DMALP		0x20
+#define CMD_DMALPEND		0x28
+#define CMD_DMAKILL		0x01
+#define CMD_DMAMOV		0xbc
+#define CMD_DMANOP		0x18
+#define CMD_DMARMB		0x12
+#define CMD_DMASEV		0x34
+#define CMD_DMAST		0x08
+#define CMD_DMASTP		0x29
+#define CMD_DMASTZ		0x0c
+#define CMD_DMAWFE		0x36
+#define CMD_DMAWFP		0x30
+#define CMD_DMAWMB		0x13
+
+#define SZ_DMAADDH		3
+#define SZ_DMAEND		1
+#define SZ_DMAFLUSHP		2
+#define SZ_DMALD		1
+#define SZ_DMALDP		2
+#define SZ_DMALP		2
+#define SZ_DMALPEND		2
+#define SZ_DMAKILL		1
+#define SZ_DMAMOV		6
+#define SZ_DMANOP		1
+#define SZ_DMARMB		1
+#define SZ_DMASEV		2
+#define SZ_DMAST		1
+#define SZ_DMASTP		2
+#define SZ_DMASTZ		1
+#define SZ_DMAWFE		2
+#define SZ_DMAWFP		2
+#define SZ_DMAWMB		1
+#define SZ_DMAGO		6
+
+#define BRST_LEN(ccr)		((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr)		(1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr)	((b) / BRST_SIZE(ccr) / BRST_LEN(ccr))
+#define BURST_TO_BYTE(c, ccr)	((c) * BRST_SIZE(ccr) * BRST_LEN(ccr))
+
+/*
+ * With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ	256
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
+
+#ifdef PL330_DEBUG_MCGEN
+static unsigned cmd_line;
+#define PL330_DBGCMD_DUMP(off, x...)	do { \
+						printk("%x:", cmd_line); \
+						printk(x); \
+						cmd_line += off; \
+					} while (0)
+#define PL330_DBGMC_START(addr)		(cmd_line = addr)
+#else
+#define PL330_DBGCMD_DUMP(off, x...)	do {} while (0)
+#define PL330_DBGMC_START(addr)		do {} while (0)
+#endif
+
+/* The number of default descriptors */
+
+#define NR_DEFAULT_DESC	16
+
+/* Delay for runtime PM autosuspend, ms */
+#define PL330_AUTOSUSPEND_DELAY 20
+
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+	u32	periph_id;
+#define DMAC_MODE_NS	(1 << 0)
+	unsigned int	mode;
+	unsigned int	data_bus_width:10; /* In number of bits */
+	unsigned int	data_buf_dep:11;
+	unsigned int	num_chan:4;
+	unsigned int	num_peri:6;
+	u32		peri_ns;
+	unsigned int	num_events:6;
+	u32		irq_ns;
+};
+
+/**
+ * Request Configuration.
+ * The PL330 core does not modify this and uses the last
+ * working configuration if the request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+	/* Address Incrementing */
+	unsigned dst_inc:1;
+	unsigned src_inc:1;
+
+	/*
+	 * For now, the SRC & DST protection levels
+	 * and burst size/length are assumed same.
+	 */
+	bool nonsecure;
+	bool privileged;
+	bool insnaccess;
+	unsigned brst_len:5;
+	unsigned brst_size:3; /* in power of 2 */
+
+	enum pl330_cachectrl dcctl;
+	enum pl330_cachectrl scctl;
+	enum pl330_byteswap swap;
+	struct pl330_config *pcfg;
+};
+
+/*
+ * One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+	u32 src_addr;
+	u32 dst_addr;
+	/* Size to xfer */
+	u32 bytes;
+};
+
+/* The xfer callbacks are made with one of these arguments. */
+enum pl330_op_err {
+	/* The all xfers in the request were success. */
+	PL330_ERR_NONE,
+	/* If req aborted due to global error. */
+	PL330_ERR_ABORT,
+	/* If req failed due to problem with Channel. */
+	PL330_ERR_FAIL,
+};
+
+enum dmamov_dst {
+	SAR = 0,
+	CCR,
+	DAR,
+};
+
+enum pl330_dst {
+	SRC = 0,
+	DST,
+};
+
+enum pl330_cond {
+	SINGLE,
+	BURST,
+	ALWAYS,
+};
+
+struct dma_pl330_desc;
+
+struct _pl330_req {
+	u32 mc_bus;
+	void *mc_cpu;
+	struct dma_pl330_desc *desc;
+};
+
+/* ToBeDone for tasklet */
+struct _pl330_tbd {
+	bool reset_dmac;
+	bool reset_mngr;
+	u8 reset_chan;
+};
+
+/* A DMAC Thread */
+struct pl330_thread {
+	u8 id;
+	int ev;
+	/* If the channel is not yet acquired by any client */
+	bool free;
+	/* Parent DMAC */
+	struct pl330_dmac *dmac;
+	/* Only two at a time */
+	struct _pl330_req req[2];
+	/* Index of the last enqueued request */
+	unsigned lstenq;
+	/* Index of the last submitted request or -1 if the DMA is stopped */
+	int req_running;
+};
+
+enum pl330_dmac_state {
+	UNINIT,
+	INIT,
+	DYING,
+};
+
+enum desc_status {
+	/* In the DMAC pool */
+	FREE,
+	/*
+	 * Allocated to some channel during prep_xxx
+	 * Also may be sitting on the work_list.
+	 */
+	PREP,
+	/*
+	 * Sitting on the work_list and already submitted
+	 * to the PL330 core. Not more than two descriptors
+	 * of a channel can be BUSY at any time.
+	 */
+	BUSY,
+	/*
+	 * Sitting on the channel work_list but xfer done
+	 * by PL330 core
+	 */
+	DONE,
+};
+
+struct dma_pl330_chan {
+	/* Schedule desc completion */
+	struct tasklet_struct task;
+
+	/* DMA-Engine Channel */
+	struct dma_chan chan;
+
+	/* List of submitted descriptors */
+	struct list_head submitted_list;
+	/* List of issued descriptors */
+	struct list_head work_list;
+	/* List of completed descriptors */
+	struct list_head completed_list;
+
+	/* Pointer to the DMAC that manages this channel,
+	 * NULL if the channel is available to be acquired.
+	 * As the parent, this DMAC also provides descriptors
+	 * to the channel.
+	 */
+	struct pl330_dmac *dmac;
+
+	/* To protect channel manipulation */
+	spinlock_t lock;
+
+	/*
+	 * Hardware channel thread of PL330 DMAC. NULL if the channel is
+	 * available.
+	 */
+	struct pl330_thread *thread;
+
+	/* For D-to-M and M-to-D channels */
+	int burst_sz; /* the peripheral fifo width */
+	int burst_len; /* the number of burst */
+	phys_addr_t fifo_addr;
+	/* DMA-mapped view of the FIFO; may differ if an IOMMU is present */
+	dma_addr_t fifo_dma;
+	enum dma_data_direction dir;
+
+	/* for cyclic capability */
+	bool cyclic;
+
+	/* for runtime pm tracking */
+	bool active;
+};
+
+struct pl330_dmac {
+	/* DMA-Engine Device */
+	struct dma_device ddma;
+
+	/* Holds info about sg limitations */
+	struct device_dma_parameters dma_parms;
+
+	/* Pool of descriptors available for the DMAC's channels */
+	struct list_head desc_pool;
+	/* To protect desc_pool manipulation */
+	spinlock_t pool_lock;
+
+	/* Size of MicroCode buffers for each channel. */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers. */
+	void __iomem	*base;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+
+	spinlock_t		lock;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of MicroCode buffer */
+	dma_addr_t		mcode_bus;
+	/* CPU address of MicroCode buffer */
+	void			*mcode_cpu;
+	/* List of all Channel threads */
+	struct pl330_thread	*channels;
+	/* Pointer to the MANAGER thread */
+	struct pl330_thread	*manager;
+	/* To handle bad news in interrupt */
+	struct tasklet_struct	tasks;
+	struct _pl330_tbd	dmac_tbd;
+	/* State of DMAC operation */
+	enum pl330_dmac_state	state;
+	/* Holds list of reqs with due callbacks */
+	struct list_head        req_done;
+
+	/* Peripheral channels connected to this DMAC */
+	unsigned int num_peripherals;
+	struct dma_pl330_chan *peripherals; /* keep at end */
+	int quirks;
+};
+
+static struct pl330_of_quirks {
+	char *quirk;
+	int id;
+} of_quirks[] = {
+	{
+		.quirk = "arm,pl330-broken-no-flushp",
+		.id = PL330_QUIRK_BROKEN_NO_FLUSHP,
+	}
+};
+
+struct dma_pl330_desc {
+	/* To attach to a queue as child */
+	struct list_head node;
+
+	/* Descriptor for the DMA Engine API */
+	struct dma_async_tx_descriptor txd;
+
+	/* Xfer for PL330 core */
+	struct pl330_xfer px;
+
+	struct pl330_reqcfg rqcfg;
+
+	enum desc_status status;
+
+	int bytes_requested;
+	bool last;
+
+	/* The channel which currently holds this desc */
+	struct dma_pl330_chan *pchan;
+
+	enum dma_transfer_direction rqtype;
+	/* Index of peripheral for the xfer. */
+	unsigned peri:5;
+	/* Hook to attach to DMAC's list of reqs with due callback */
+	struct list_head rqd;
+};
+
+struct _xfer_spec {
+	u32 ccr;
+	struct dma_pl330_desc *desc;
+};
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+	return thrd->req[0].desc != NULL && thrd->req[1].desc != NULL;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+	return thrd->dmac->manager == thrd;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+	return (thrd->dmac->pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_revision(u32 periph_id)
+{
+	return (periph_id >> PERIPH_REV_SHIFT) & PERIPH_REV_MASK;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAEND;
+
+	buf[0] = CMD_DMAEND;
+
+	PL330_DBGCMD_DUMP(SZ_DMAEND, "\tDMAEND\n");
+
+	return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAFLUSHP;
+
+	buf[0] = CMD_DMAFLUSHP;
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAFLUSHP, "\tDMAFLUSHP %u\n", peri >> 3);
+
+	return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[],	enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMALD;
+
+	buf[0] = CMD_DMALD;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMALD, "\tDMALD%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMALDP;
+
+	buf[0] = CMD_DMALDP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMALDP, "\tDMALDP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+		unsigned loop, u8 cnt)
+{
+	if (dry_run)
+		return SZ_DMALP;
+
+	buf[0] = CMD_DMALP;
+
+	if (loop)
+		buf[0] |= (1 << 1);
+
+	cnt--; /* DMAC increments by 1 internally */
+	buf[1] = cnt;
+
+	PL330_DBGCMD_DUMP(SZ_DMALP, "\tDMALP_%c %u\n", loop ? '1' : '0', cnt);
+
+	return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+	enum pl330_cond cond;
+	bool forever;
+	unsigned loop;
+	u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+		const struct _arg_LPEND *arg)
+{
+	enum pl330_cond cond = arg->cond;
+	bool forever = arg->forever;
+	unsigned loop = arg->loop;
+	u8 bjump = arg->bjump;
+
+	if (dry_run)
+		return SZ_DMALPEND;
+
+	buf[0] = CMD_DMALPEND;
+
+	if (loop)
+		buf[0] |= (1 << 2);
+
+	if (!forever)
+		buf[0] |= (1 << 4);
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	buf[1] = bjump;
+
+	PL330_DBGCMD_DUMP(SZ_DMALPEND, "\tDMALP%s%c_%c bjmpto_%x\n",
+			forever ? "FE" : "END",
+			cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'),
+			loop ? '1' : '0',
+			bjump);
+
+	return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAKILL;
+
+	buf[0] = CMD_DMAKILL;
+
+	return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+		enum dmamov_dst dst, u32 val)
+{
+	if (dry_run)
+		return SZ_DMAMOV;
+
+	buf[0] = CMD_DMAMOV;
+	buf[1] = dst;
+	buf[2] = val;
+	buf[3] = val >> 8;
+	buf[4] = val >> 16;
+	buf[5] = val >> 24;
+
+	PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
+		dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
+
+	return SZ_DMAMOV;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMARMB;
+
+	buf[0] = CMD_DMARMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMARMB, "\tDMARMB\n");
+
+	return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+	if (dry_run)
+		return SZ_DMASEV;
+
+	buf[0] = CMD_DMASEV;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	PL330_DBGCMD_DUMP(SZ_DMASEV, "\tDMASEV %u\n", ev >> 3);
+
+	return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMAST;
+
+	buf[0] = CMD_DMAST;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	PL330_DBGCMD_DUMP(SZ_DMAST, "\tDMAST%c\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'A'));
+
+	return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMASTP;
+
+	buf[0] = CMD_DMASTP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMASTP, "\tDMASTP%c %u\n",
+		cond == SINGLE ? 'S' : 'B', peri >> 3);
+
+	return SZ_DMASTP;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAWFP;
+
+	buf[0] = CMD_DMAWFP;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (0 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (0 << 0);
+	else
+		buf[0] |= (0 << 1) | (1 << 0);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWFP, "\tDMAWFP%c %u\n",
+		cond == SINGLE ? 'S' : (cond == BURST ? 'B' : 'P'), peri >> 3);
+
+	return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAWMB;
+
+	buf[0] = CMD_DMAWMB;
+
+	PL330_DBGCMD_DUMP(SZ_DMAWMB, "\tDMAWMB\n");
+
+	return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+	u8 chan;
+	u32 addr;
+	unsigned ns;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+		const struct _arg_GO *arg)
+{
+	u8 chan = arg->chan;
+	u32 addr = arg->addr;
+	unsigned ns = arg->ns;
+
+	if (dry_run)
+		return SZ_DMAGO;
+
+	buf[0] = CMD_DMAGO;
+	buf[0] |= (ns << 1);
+	buf[1] = chan & 0x7;
+	buf[2] = addr;
+	buf[3] = addr >> 8;
+	buf[4] = addr >> 16;
+	buf[5] = addr >> 24;
+
+	return SZ_DMAGO;
+}
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->base;
+	unsigned long loops = msecs_to_loops(5);
+
+	do {
+		/* Until Manager is Idle */
+		if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+			break;
+
+		cpu_relax();
+	} while (--loops);
+
+	if (!loops)
+		return true;
+
+	return false;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+		u8 insn[], bool as_manager)
+{
+	void __iomem *regs = thrd->dmac->base;
+	u32 val;
+
+	val = (insn[0] << 16) | (insn[1] << 24);
+	if (!as_manager) {
+		val |= (1 << 0);
+		val |= (thrd->id << 8); /* Channel Number */
+	}
+	writel(val, regs + DBGINST0);
+
+	val = le32_to_cpu(*((__le32 *)&insn[2]));
+	writel(val, regs + DBGINST1);
+
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd)) {
+		dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n");
+		return;
+	}
+
+	/* Get going */
+	writel(0, regs + DBGCMD);
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->base;
+	u32 val;
+
+	if (is_manager(thrd))
+		val = readl(regs + DS) & 0xf;
+	else
+		val = readl(regs + CS(thrd->id)) & 0xf;
+
+	switch (val) {
+	case DS_ST_STOP:
+		return PL330_STATE_STOPPED;
+	case DS_ST_EXEC:
+		return PL330_STATE_EXECUTING;
+	case DS_ST_CMISS:
+		return PL330_STATE_CACHEMISS;
+	case DS_ST_UPDTPC:
+		return PL330_STATE_UPDTPC;
+	case DS_ST_WFE:
+		return PL330_STATE_WFE;
+	case DS_ST_FAULT:
+		return PL330_STATE_FAULTING;
+	case DS_ST_ATBRR:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_ATBARRIER;
+	case DS_ST_QBUSY:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_QUEUEBUSY;
+	case DS_ST_WFP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_WFP;
+	case DS_ST_KILL:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_KILLING;
+	case DS_ST_CMPLT:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_COMPLETING;
+	case DS_ST_FLTCMP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_FAULT_COMPLETING;
+	default:
+		return PL330_STATE_INVALID;
+	}
+}
+
+static void _stop(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->base;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	if (_state(thrd) == PL330_STATE_FAULT_COMPLETING)
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+	/* Return if nothing needs to be done */
+	if (_state(thrd) == PL330_STATE_COMPLETING
+		  || _state(thrd) == PL330_STATE_KILLING
+		  || _state(thrd) == PL330_STATE_STOPPED)
+		return;
+
+	_emit_KILL(0, insn);
+
+	/* Stop generating interrupts for SEV */
+	writel(readl(regs + INTEN) & ~(1 << thrd->ev), regs + INTEN);
+
+	_execute_DBGINSN(thrd, insn, is_manager(thrd));
+}
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->base;
+	struct _pl330_req *req;
+	struct dma_pl330_desc *desc;
+	struct _arg_GO go;
+	unsigned ns;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+	int idx;
+
+	/* Return if already ACTIVE */
+	if (_state(thrd) != PL330_STATE_STOPPED)
+		return true;
+
+	idx = 1 - thrd->lstenq;
+	if (thrd->req[idx].desc != NULL) {
+		req = &thrd->req[idx];
+	} else {
+		idx = thrd->lstenq;
+		if (thrd->req[idx].desc != NULL)
+			req = &thrd->req[idx];
+		else
+			req = NULL;
+	}
+
+	/* Return if no request */
+	if (!req)
+		return true;
+
+	/* Return if req is running */
+	if (idx == thrd->req_running)
+		return true;
+
+	desc = req->desc;
+
+	ns = desc->rqcfg.nonsecure ? 1 : 0;
+
+	/* See 'Abort Sources' point-4 at Page 2-25 */
+	if (_manager_ns(thrd) && !ns)
+		dev_info(thrd->dmac->ddma.dev, "%s:%d Recipe for ABORT!\n",
+			__func__, __LINE__);
+
+	go.chan = thrd->id;
+	go.addr = req->mc_bus;
+	go.ns = ns;
+	_emit_GO(0, insn, &go);
+
+	/* Set to generate interrupts for SEV */
+	writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+	/* Only manager can execute GO */
+	_execute_DBGINSN(thrd, insn, true);
+
+	thrd->req_running = idx;
+
+	return true;
+}
+
+static bool _start(struct pl330_thread *thrd)
+{
+	switch (_state(thrd)) {
+	case PL330_STATE_FAULT_COMPLETING:
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+		if (_state(thrd) == PL330_STATE_KILLING)
+			UNTIL(thrd, PL330_STATE_STOPPED)
+		/* fall through */
+
+	case PL330_STATE_FAULTING:
+		_stop(thrd);
+		/* fall through */
+
+	case PL330_STATE_KILLING:
+	case PL330_STATE_COMPLETING:
+		UNTIL(thrd, PL330_STATE_STOPPED)
+		/* fall through */
+
+	case PL330_STATE_STOPPED:
+		return _trigger(thrd);
+
+	case PL330_STATE_WFP:
+	case PL330_STATE_QUEUEBUSY:
+	case PL330_STATE_ATBARRIER:
+	case PL330_STATE_UPDTPC:
+	case PL330_STATE_CACHEMISS:
+	case PL330_STATE_EXECUTING:
+		return true;
+
+	case PL330_STATE_WFE: /* For RESUME, nothing yet */
+	default:
+		return false;
+	}
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+	struct pl330_config *pcfg = pxs->desc->rqcfg.pcfg;
+
+	/* check lock-up free version */
+	if (get_revision(pcfg->periph_id) >= PERIPH_REV_R1P0) {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		}
+	} else {
+		while (cyc--) {
+			off += _emit_LD(dry_run, &buf[off], ALWAYS);
+			off += _emit_RMB(dry_run, &buf[off]);
+			off += _emit_ST(dry_run, &buf[off], ALWAYS);
+			off += _emit_WMB(dry_run, &buf[off]);
+		}
+	}
+
+	return off;
+}
+
+static u32 _emit_load(unsigned int dry_run, u8 buf[],
+	enum pl330_cond cond, enum dma_transfer_direction direction,
+	u8 peri)
+{
+	int off = 0;
+
+	switch (direction) {
+	case DMA_MEM_TO_MEM:
+		/* fall through */
+	case DMA_MEM_TO_DEV:
+		off += _emit_LD(dry_run, &buf[off], cond);
+		break;
+
+	case DMA_DEV_TO_MEM:
+		if (cond == ALWAYS) {
+			off += _emit_LDP(dry_run, &buf[off], SINGLE,
+				peri);
+			off += _emit_LDP(dry_run, &buf[off], BURST,
+				peri);
+		} else {
+			off += _emit_LDP(dry_run, &buf[off], cond,
+				peri);
+		}
+		break;
+
+	default:
+		/* this code should be unreachable */
+		WARN_ON(1);
+		break;
+	}
+
+	return off;
+}
+
+static inline u32 _emit_store(unsigned int dry_run, u8 buf[],
+	enum pl330_cond cond, enum dma_transfer_direction direction,
+	u8 peri)
+{
+	int off = 0;
+
+	switch (direction) {
+	case DMA_MEM_TO_MEM:
+		/* fall through */
+	case DMA_DEV_TO_MEM:
+		off += _emit_ST(dry_run, &buf[off], cond);
+		break;
+
+	case DMA_MEM_TO_DEV:
+		if (cond == ALWAYS) {
+			off += _emit_STP(dry_run, &buf[off], SINGLE,
+				peri);
+			off += _emit_STP(dry_run, &buf[off], BURST,
+				peri);
+		} else {
+			off += _emit_STP(dry_run, &buf[off], cond,
+				peri);
+		}
+		break;
+
+	default:
+		/* this code should be unreachable */
+		WARN_ON(1);
+		break;
+	}
+
+	return off;
+}
+
+static inline int _ldst_peripheral(struct pl330_dmac *pl330,
+				 unsigned dry_run, u8 buf[],
+				 const struct _xfer_spec *pxs, int cyc,
+				 enum pl330_cond cond)
+{
+	int off = 0;
+
+	if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
+		cond = BURST;
+
+	/*
+	 * do FLUSHP at beginning to clear any stale dma requests before the
+	 * first WFP.
+	 */
+	if (!(pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP))
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->desc->peri);
+	while (cyc--) {
+		off += _emit_WFP(dry_run, &buf[off], cond, pxs->desc->peri);
+		off += _emit_load(dry_run, &buf[off], cond, pxs->desc->rqtype,
+			pxs->desc->peri);
+		off += _emit_store(dry_run, &buf[off], cond, pxs->desc->rqtype,
+			pxs->desc->peri);
+	}
+
+	return off;
+}
+
+static int _bursts(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+	enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE;
+
+	switch (pxs->desc->rqtype) {
+	case DMA_MEM_TO_DEV:
+		/* fall through */
+	case DMA_DEV_TO_MEM:
+		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, cyc,
+			cond);
+		break;
+
+	case DMA_MEM_TO_MEM:
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+
+	default:
+		/* this code should be unreachable */
+		WARN_ON(1);
+		break;
+	}
+
+	return off;
+}
+
+/*
+ * transfer dregs with single transfers to peripheral, or a reduced size burst
+ * for mem-to-mem.
+ */
+static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int transfer_length)
+{
+	int off = 0;
+	int dregs_ccr;
+
+	if (transfer_length == 0)
+		return off;
+
+	switch (pxs->desc->rqtype) {
+	case DMA_MEM_TO_DEV:
+		/* fall through */
+	case DMA_DEV_TO_MEM:
+		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs,
+			transfer_length, SINGLE);
+		break;
+
+	case DMA_MEM_TO_MEM:
+		dregs_ccr = pxs->ccr;
+		dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
+			(0xf << CC_DSTBRSTLEN_SHFT));
+		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+			CC_SRCBRSTLEN_SHFT);
+		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+			CC_DSTBRSTLEN_SHFT);
+		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, 1);
+		break;
+
+	default:
+		/* this code should be unreachable */
+		WARN_ON(1);
+		break;
+	}
+
+	return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(struct pl330_dmac *pl330, unsigned dry_run, u8 buf[],
+		unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+	int cyc, cycmax, szlp, szlpend, szbrst, off;
+	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+	struct _arg_LPEND lpend;
+
+	if (*bursts == 1)
+		return _bursts(pl330, dry_run, buf, pxs, 1);
+
+	/* Max iterations possible in DMALP is 256 */
+	if (*bursts >= 256*256) {
+		lcnt1 = 256;
+		lcnt0 = 256;
+		cyc = *bursts / lcnt1 / lcnt0;
+	} else if (*bursts > 256) {
+		lcnt1 = 256;
+		lcnt0 = *bursts / lcnt1;
+		cyc = 1;
+	} else {
+		lcnt1 = *bursts;
+		lcnt0 = 0;
+		cyc = 1;
+	}
+
+	szlp = _emit_LP(1, buf, 0, 0);
+	szbrst = _bursts(pl330, 1, buf, pxs, 1);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 0;
+	lpend.bjump = 0;
+	szlpend = _emit_LPEND(1, buf, &lpend);
+
+	if (lcnt0) {
+		szlp *= 2;
+		szlpend *= 2;
+	}
+
+	/*
+	 * Max bursts that we can unroll due to limit on the
+	 * size of backward jump that can be encoded in DMALPEND
+	 * which is 8-bits and hence 255
+	 */
+	cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+	cyc = (cycmax < cyc) ? cycmax : cyc;
+
+	off = 0;
+
+	if (lcnt0) {
+		off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+		ljmp0 = off;
+	}
+
+	off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+	ljmp1 = off;
+
+	off += _bursts(pl330, dry_run, &buf[off], pxs, cyc);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 1;
+	lpend.bjump = off - ljmp1;
+	off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+	if (lcnt0) {
+		lpend.cond = ALWAYS;
+		lpend.forever = false;
+		lpend.loop = 0;
+		lpend.bjump = off - ljmp0;
+		off += _emit_LPEND(dry_run, &buf[off], &lpend);
+	}
+
+	*bursts = lcnt1 * cyc;
+	if (lcnt0)
+		*bursts *= lcnt0;
+
+	return off;
+}
+
+static inline int _setup_loops(struct pl330_dmac *pl330,
+			       unsigned dry_run, u8 buf[],
+			       const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = &pxs->desc->px;
+	u32 ccr = pxs->ccr;
+	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+	int num_dregs = (x->bytes - BURST_TO_BYTE(bursts, ccr)) /
+		BRST_SIZE(ccr);
+	int off = 0;
+
+	while (bursts) {
+		c = bursts;
+		off += _loop(pl330, dry_run, &buf[off], &c, pxs);
+		bursts -= c;
+	}
+	off += _dregs(pl330, dry_run, &buf[off], pxs, num_dregs);
+
+	return off;
+}
+
+static inline int _setup_xfer(struct pl330_dmac *pl330,
+			      unsigned dry_run, u8 buf[],
+			      const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = &pxs->desc->px;
+	int off = 0;
+
+	/* DMAMOV SAR, x->src_addr */
+	off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+	/* DMAMOV DAR, x->dst_addr */
+	off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+	/* Setup Loop(s) */
+	off += _setup_loops(pl330, dry_run, &buf[off], pxs);
+
+	return off;
+}
+
+/*
+ * A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC for the req.
+ */
+static int _setup_req(struct pl330_dmac *pl330, unsigned dry_run,
+		      struct pl330_thread *thrd, unsigned index,
+		      struct _xfer_spec *pxs)
+{
+	struct _pl330_req *req = &thrd->req[index];
+	u8 *buf = req->mc_cpu;
+	int off = 0;
+
+	PL330_DBGMC_START(req->mc_bus);
+
+	/* DMAMOV CCR, ccr */
+	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+	off += _setup_xfer(pl330, dry_run, &buf[off], pxs);
+
+	/* DMASEV peripheral/event */
+	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+	/* DMAEND */
+	off += _emit_END(dry_run, &buf[off]);
+
+	return off;
+}
+
+static inline u32 _prepare_ccr(const struct pl330_reqcfg *rqc)
+{
+	u32 ccr = 0;
+
+	if (rqc->src_inc)
+		ccr |= CC_SRCINC;
+
+	if (rqc->dst_inc)
+		ccr |= CC_DSTINC;
+
+	/* We set same protection levels for Src and DST for now */
+	if (rqc->privileged)
+		ccr |= CC_SRCPRI | CC_DSTPRI;
+	if (rqc->nonsecure)
+		ccr |= CC_SRCNS | CC_DSTNS;
+	if (rqc->insnaccess)
+		ccr |= CC_SRCIA | CC_DSTIA;
+
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+	ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+	ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+	ccr |= (rqc->scctl << CC_SRCCCTRL_SHFT);
+	ccr |= (rqc->dcctl << CC_DSTCCTRL_SHFT);
+
+	ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+	return ccr;
+}
+
+/*
+ * Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ */
+static int pl330_submit_req(struct pl330_thread *thrd,
+	struct dma_pl330_desc *desc)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct _xfer_spec xs;
+	unsigned long flags;
+	unsigned idx;
+	u32 ccr;
+	int ret = 0;
+
+	switch (desc->rqtype) {
+	case DMA_MEM_TO_DEV:
+		break;
+
+	case DMA_DEV_TO_MEM:
+		break;
+
+	case DMA_MEM_TO_MEM:
+		break;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (pl330->state == DYING
+		|| pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+		dev_info(thrd->dmac->ddma.dev, "%s:%d\n",
+			__func__, __LINE__);
+		return -EAGAIN;
+	}
+
+	/* If request for non-existing peripheral */
+	if (desc->rqtype != DMA_MEM_TO_MEM &&
+	    desc->peri >= pl330->pcfg.num_peri) {
+		dev_info(thrd->dmac->ddma.dev,
+				"%s:%d Invalid peripheral(%u)!\n",
+				__func__, __LINE__, desc->peri);
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	if (_queue_full(thrd)) {
+		ret = -EAGAIN;
+		goto xfer_exit;
+	}
+
+	/* Prefer Secure Channel */
+	if (!_manager_ns(thrd))
+		desc->rqcfg.nonsecure = 0;
+	else
+		desc->rqcfg.nonsecure = 1;
+
+	ccr = _prepare_ccr(&desc->rqcfg);
+
+	idx = thrd->req[0].desc == NULL ? 0 : 1;
+
+	xs.ccr = ccr;
+	xs.desc = desc;
+
+	/* First dry run to check if req is acceptable */
+	ret = _setup_req(pl330, 1, thrd, idx, &xs);
+	if (ret < 0)
+		goto xfer_exit;
+
+	if (ret > pl330->mcbufsz / 2) {
+		dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n",
+				__func__, __LINE__, ret, pl330->mcbufsz / 2);
+		ret = -ENOMEM;
+		goto xfer_exit;
+	}
+
+	/* Hook the request */
+	thrd->lstenq = idx;
+	thrd->req[idx].desc = desc;
+	_setup_req(pl330, 0, thrd, idx, &xs);
+
+	ret = 0;
+
+xfer_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return ret;
+}
+
+static void dma_pl330_rqcb(struct dma_pl330_desc *desc, enum pl330_op_err err)
+{
+	struct dma_pl330_chan *pch;
+	unsigned long flags;
+
+	if (!desc)
+		return;
+
+	pch = desc->pchan;
+
+	/* If desc aborted */
+	if (!pch)
+		return;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	desc->status = DONE;
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	tasklet_schedule(&pch->task);
+}
+
+static void pl330_dotask(unsigned long data)
+{
+	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	/* The DMAC itself gone nuts */
+	if (pl330->dmac_tbd.reset_dmac) {
+		pl330->state = DYING;
+		/* Reset the manager too */
+		pl330->dmac_tbd.reset_mngr = true;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_dmac = false;
+	}
+
+	if (pl330->dmac_tbd.reset_mngr) {
+		_stop(pl330->manager);
+		/* Reset all channels */
+		pl330->dmac_tbd.reset_chan = (1 << pl330->pcfg.num_chan) - 1;
+		/* Clear the reset flag */
+		pl330->dmac_tbd.reset_mngr = false;
+	}
+
+	for (i = 0; i < pl330->pcfg.num_chan; i++) {
+
+		if (pl330->dmac_tbd.reset_chan & (1 << i)) {
+			struct pl330_thread *thrd = &pl330->channels[i];
+			void __iomem *regs = pl330->base;
+			enum pl330_op_err err;
+
+			_stop(thrd);
+
+			if (readl(regs + FSC) & (1 << thrd->id))
+				err = PL330_ERR_FAIL;
+			else
+				err = PL330_ERR_ABORT;
+
+			spin_unlock_irqrestore(&pl330->lock, flags);
+			dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, err);
+			dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, err);
+			spin_lock_irqsave(&pl330->lock, flags);
+
+			thrd->req[0].desc = NULL;
+			thrd->req[1].desc = NULL;
+			thrd->req_running = -1;
+
+			/* Clear the reset flag */
+			pl330->dmac_tbd.reset_chan &= ~(1 << i);
+		}
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+static int pl330_update(struct pl330_dmac *pl330)
+{
+	struct dma_pl330_desc *descdone;
+	unsigned long flags;
+	void __iomem *regs;
+	u32 val;
+	int id, ev, ret = 0;
+
+	regs = pl330->base;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	val = readl(regs + FSM) & 0x1;
+	if (val)
+		pl330->dmac_tbd.reset_mngr = true;
+	else
+		pl330->dmac_tbd.reset_mngr = false;
+
+	val = readl(regs + FSC) & ((1 << pl330->pcfg.num_chan) - 1);
+	pl330->dmac_tbd.reset_chan |= val;
+	if (val) {
+		int i = 0;
+		while (i < pl330->pcfg.num_chan) {
+			if (val & (1 << i)) {
+				dev_info(pl330->ddma.dev,
+					"Reset Channel-%d\t CS-%x FTC-%x\n",
+						i, readl(regs + CS(i)),
+						readl(regs + FTC(i)));
+				_stop(&pl330->channels[i]);
+			}
+			i++;
+		}
+	}
+
+	/* Check which event happened i.e, thread notified */
+	val = readl(regs + ES);
+	if (pl330->pcfg.num_events < 32
+			&& val & ~((1 << pl330->pcfg.num_events) - 1)) {
+		pl330->dmac_tbd.reset_dmac = true;
+		dev_err(pl330->ddma.dev, "%s:%d Unexpected!\n", __func__,
+			__LINE__);
+		ret = 1;
+		goto updt_exit;
+	}
+
+	for (ev = 0; ev < pl330->pcfg.num_events; ev++) {
+		if (val & (1 << ev)) { /* Event occurred */
+			struct pl330_thread *thrd;
+			u32 inten = readl(regs + INTEN);
+			int active;
+
+			/* Clear the event */
+			if (inten & (1 << ev))
+				writel(1 << ev, regs + INTCLR);
+
+			ret = 1;
+
+			id = pl330->events[ev];
+
+			thrd = &pl330->channels[id];
+
+			active = thrd->req_running;
+			if (active == -1) /* Aborted */
+				continue;
+
+			/* Detach the req */
+			descdone = thrd->req[active].desc;
+			thrd->req[active].desc = NULL;
+
+			thrd->req_running = -1;
+
+			/* Get going again ASAP */
+			_start(thrd);
+
+			/* For now, just make a list of callbacks to be done */
+			list_add_tail(&descdone->rqd, &pl330->req_done);
+		}
+	}
+
+	/* Now that we are in no hurry, do the callbacks */
+	while (!list_empty(&pl330->req_done)) {
+		descdone = list_first_entry(&pl330->req_done,
+					    struct dma_pl330_desc, rqd);
+		list_del(&descdone->rqd);
+		spin_unlock_irqrestore(&pl330->lock, flags);
+		dma_pl330_rqcb(descdone, PL330_ERR_NONE);
+		spin_lock_irqsave(&pl330->lock, flags);
+	}
+
+updt_exit:
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	if (pl330->dmac_tbd.reset_dmac
+			|| pl330->dmac_tbd.reset_mngr
+			|| pl330->dmac_tbd.reset_chan) {
+		ret = 1;
+		tasklet_schedule(&pl330->tasks);
+	}
+
+	return ret;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	int ev;
+
+	for (ev = 0; ev < pl330->pcfg.num_events; ev++)
+		if (pl330->events[ev] == -1) {
+			pl330->events[ev] = thrd->id;
+			return ev;
+		}
+
+	return -1;
+}
+
+static bool _chan_ns(const struct pl330_dmac *pl330, int i)
+{
+	return pl330->pcfg.irq_ns & (1 << i);
+}
+
+/* Upon success, returns IdentityToken for the
+ * allocated channel, NULL otherwise.
+ */
+static struct pl330_thread *pl330_request_channel(struct pl330_dmac *pl330)
+{
+	struct pl330_thread *thrd = NULL;
+	int chans, i;
+
+	if (pl330->state == DYING)
+		return NULL;
+
+	chans = pl330->pcfg.num_chan;
+
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		if ((thrd->free) && (!_manager_ns(thrd) ||
+					_chan_ns(pl330, i))) {
+			thrd->ev = _alloc_event(thrd);
+			if (thrd->ev >= 0) {
+				thrd->free = false;
+				thrd->lstenq = 1;
+				thrd->req[0].desc = NULL;
+				thrd->req[1].desc = NULL;
+				thrd->req_running = -1;
+				break;
+			}
+		}
+		thrd = NULL;
+	}
+
+	return thrd;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	/* If the event is valid and was held by the thread */
+	if (ev >= 0 && ev < pl330->pcfg.num_events
+			&& pl330->events[ev] == thrd->id)
+		pl330->events[ev] = -1;
+}
+
+static void pl330_release_channel(struct pl330_thread *thrd)
+{
+	if (!thrd || thrd->free)
+		return;
+
+	_stop(thrd);
+
+	dma_pl330_rqcb(thrd->req[1 - thrd->lstenq].desc, PL330_ERR_ABORT);
+	dma_pl330_rqcb(thrd->req[thrd->lstenq].desc, PL330_ERR_ABORT);
+
+	_free_event(thrd, thrd->ev);
+	thrd->free = true;
+}
+
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_dmac *pl330)
+{
+	void __iomem *regs = pl330->base;
+	u32 val;
+
+	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+	val &= CRD_DATA_WIDTH_MASK;
+	pl330->pcfg.data_bus_width = 8 * (1 << val);
+
+	val = readl(regs + CRD) >> CRD_DATA_BUFF_SHIFT;
+	val &= CRD_DATA_BUFF_MASK;
+	pl330->pcfg.data_buf_dep = val + 1;
+
+	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+	val &= CR0_NUM_CHANS_MASK;
+	val += 1;
+	pl330->pcfg.num_chan = val;
+
+	val = readl(regs + CR0);
+	if (val & CR0_PERIPH_REQ_SET) {
+		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+		val += 1;
+		pl330->pcfg.num_peri = val;
+		pl330->pcfg.peri_ns = readl(regs + CR4);
+	} else {
+		pl330->pcfg.num_peri = 0;
+	}
+
+	val = readl(regs + CR0);
+	if (val & CR0_BOOT_MAN_NS)
+		pl330->pcfg.mode |= DMAC_MODE_NS;
+	else
+		pl330->pcfg.mode &= ~DMAC_MODE_NS;
+
+	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+	val &= CR0_NUM_EVENTS_MASK;
+	val += 1;
+	pl330->pcfg.num_events = val;
+
+	pl330->pcfg.irq_ns = readl(regs + CR3);
+}
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	thrd->req[0].mc_cpu = pl330->mcode_cpu
+				+ (thrd->id * pl330->mcbufsz);
+	thrd->req[0].mc_bus = pl330->mcode_bus
+				+ (thrd->id * pl330->mcbufsz);
+	thrd->req[0].desc = NULL;
+
+	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+				+ pl330->mcbufsz / 2;
+	thrd->req[1].mc_bus = thrd->req[0].mc_bus
+				+ pl330->mcbufsz / 2;
+	thrd->req[1].desc = NULL;
+
+	thrd->req_running = -1;
+}
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+	int chans = pl330->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Allocate 1 Manager and 'chans' Channel threads */
+	pl330->channels = kcalloc(1 + chans, sizeof(*thrd),
+					GFP_KERNEL);
+	if (!pl330->channels)
+		return -ENOMEM;
+
+	/* Init Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		thrd->id = i;
+		thrd->dmac = pl330;
+		_reset_thread(thrd);
+		thrd->free = true;
+	}
+
+	/* MANAGER is indexed at the end */
+	thrd = &pl330->channels[chans];
+	thrd->id = chans;
+	thrd->dmac = pl330;
+	thrd->free = false;
+	pl330->manager = thrd;
+
+	return 0;
+}
+
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+	int chans = pl330->pcfg.num_chan;
+	int ret;
+
+	/*
+	 * Alloc MicroCode buffer for 'chans' Channel threads.
+	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+	 */
+	pl330->mcode_cpu = dma_alloc_attrs(pl330->ddma.dev,
+				chans * pl330->mcbufsz,
+				&pl330->mcode_bus, GFP_KERNEL,
+				DMA_ATTR_PRIVILEGED);
+	if (!pl330->mcode_cpu) {
+		dev_err(pl330->ddma.dev, "%s:%d Can't allocate memory!\n",
+			__func__, __LINE__);
+		return -ENOMEM;
+	}
+
+	ret = dmac_alloc_threads(pl330);
+	if (ret) {
+		dev_err(pl330->ddma.dev, "%s:%d Can't to create channels for DMAC!\n",
+			__func__, __LINE__);
+		dma_free_coherent(pl330->ddma.dev,
+				chans * pl330->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pl330_add(struct pl330_dmac *pl330)
+{
+	int i, ret;
+
+	/* Check if we can handle this DMAC */
+	if ((pl330->pcfg.periph_id & 0xfffff) != PERIPH_ID_VAL) {
+		dev_err(pl330->ddma.dev, "PERIPH_ID 0x%x !\n",
+			pl330->pcfg.periph_id);
+		return -EINVAL;
+	}
+
+	/* Read the configuration of the DMAC */
+	read_dmac_config(pl330);
+
+	if (pl330->pcfg.num_events == 0) {
+		dev_err(pl330->ddma.dev, "%s:%d Can't work without events!\n",
+			__func__, __LINE__);
+		return -EINVAL;
+	}
+
+	spin_lock_init(&pl330->lock);
+
+	INIT_LIST_HEAD(&pl330->req_done);
+
+	/* Use default MC buffer size if not provided */
+	if (!pl330->mcbufsz)
+		pl330->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+	/* Mark all events as free */
+	for (i = 0; i < pl330->pcfg.num_events; i++)
+		pl330->events[i] = -1;
+
+	/* Allocate resources needed by the DMAC */
+	ret = dmac_alloc_resources(pl330);
+	if (ret) {
+		dev_err(pl330->ddma.dev, "Unable to create channels for DMAC\n");
+		return ret;
+	}
+
+	tasklet_init(&pl330->tasks, pl330_dotask, (unsigned long) pl330);
+
+	pl330->state = INIT;
+
+	return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Release Channel threads */
+	for (i = 0; i < pl330->pcfg.num_chan; i++) {
+		thrd = &pl330->channels[i];
+		pl330_release_channel(thrd);
+	}
+
+	/* Free memory */
+	kfree(pl330->channels);
+
+	return 0;
+}
+
+static void pl330_del(struct pl330_dmac *pl330)
+{
+	pl330->state = UNINIT;
+
+	tasklet_kill(&pl330->tasks);
+
+	/* Free DMAC resources */
+	dmac_free_threads(pl330);
+
+	dma_free_coherent(pl330->ddma.dev,
+		pl330->pcfg.num_chan * pl330->mcbufsz, pl330->mcode_cpu,
+		pl330->mcode_bus);
+}
+
+/* forward declaration */
+static struct amba_driver pl330_driver;
+
+static inline struct dma_pl330_chan *
+to_pchan(struct dma_chan *ch)
+{
+	if (!ch)
+		return NULL;
+
+	return container_of(ch, struct dma_pl330_chan, chan);
+}
+
+static inline struct dma_pl330_desc *
+to_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct dma_pl330_desc, txd);
+}
+
+static inline void fill_queue(struct dma_pl330_chan *pch)
+{
+	struct dma_pl330_desc *desc;
+	int ret;
+
+	list_for_each_entry(desc, &pch->work_list, node) {
+
+		/* If already submitted */
+		if (desc->status == BUSY)
+			continue;
+
+		ret = pl330_submit_req(pch->thread, desc);
+		if (!ret) {
+			desc->status = BUSY;
+		} else if (ret == -EAGAIN) {
+			/* QFull or DMAC Dying */
+			break;
+		} else {
+			/* Unacceptable request */
+			desc->status = DONE;
+			dev_err(pch->dmac->ddma.dev, "%s:%d Bad Desc(%d)\n",
+					__func__, __LINE__, desc->txd.cookie);
+			tasklet_schedule(&pch->task);
+		}
+	}
+}
+
+static void pl330_tasklet(unsigned long data)
+{
+	struct dma_pl330_chan *pch = (struct dma_pl330_chan *)data;
+	struct dma_pl330_desc *desc, *_dt;
+	unsigned long flags;
+	bool power_down = false;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	/* Pick up ripe tomatoes */
+	list_for_each_entry_safe(desc, _dt, &pch->work_list, node)
+		if (desc->status == DONE) {
+			if (!pch->cyclic)
+				dma_cookie_complete(&desc->txd);
+			list_move_tail(&desc->node, &pch->completed_list);
+		}
+
+	/* Try to submit a req imm. next to the last completed cookie */
+	fill_queue(pch);
+
+	if (list_empty(&pch->work_list)) {
+		spin_lock(&pch->thread->dmac->lock);
+		_stop(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+		power_down = true;
+		pch->active = false;
+	} else {
+		/* Make sure the PL330 Channel thread is active */
+		spin_lock(&pch->thread->dmac->lock);
+		_start(pch->thread);
+		spin_unlock(&pch->thread->dmac->lock);
+	}
+
+	while (!list_empty(&pch->completed_list)) {
+		struct dmaengine_desc_callback cb;
+
+		desc = list_first_entry(&pch->completed_list,
+					struct dma_pl330_desc, node);
+
+		dmaengine_desc_get_callback(&desc->txd, &cb);
+
+		if (pch->cyclic) {
+			desc->status = PREP;
+			list_move_tail(&desc->node, &pch->work_list);
+			if (power_down) {
+				pch->active = true;
+				spin_lock(&pch->thread->dmac->lock);
+				_start(pch->thread);
+				spin_unlock(&pch->thread->dmac->lock);
+				power_down = false;
+			}
+		} else {
+			desc->status = FREE;
+			list_move_tail(&desc->node, &pch->dmac->desc_pool);
+		}
+
+		dma_descriptor_unmap(&desc->txd);
+
+		if (dmaengine_desc_callback_valid(&cb)) {
+			spin_unlock_irqrestore(&pch->lock, flags);
+			dmaengine_desc_callback_invoke(&cb, NULL);
+			spin_lock_irqsave(&pch->lock, flags);
+		}
+	}
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	/* If work list empty, power down */
+	if (power_down) {
+		pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+		pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+	}
+}
+
+static struct dma_chan *of_dma_pl330_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	int count = dma_spec->args_count;
+	struct pl330_dmac *pl330 = ofdma->of_dma_data;
+	unsigned int chan_id;
+
+	if (!pl330)
+		return NULL;
+
+	if (count != 1)
+		return NULL;
+
+	chan_id = dma_spec->args[0];
+	if (chan_id >= pl330->num_peripherals)
+		return NULL;
+
+	return dma_get_slave_channel(&pl330->peripherals[chan_id].chan);
+}
+
+static int pl330_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330 = pch->dmac;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	dma_cookie_init(chan);
+	pch->cyclic = false;
+
+	pch->thread = pl330_request_channel(pl330);
+	if (!pch->thread) {
+		spin_unlock_irqrestore(&pl330->lock, flags);
+		return -ENOMEM;
+	}
+
+	tasklet_init(&pch->task, pl330_tasklet, (unsigned long) pch);
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return 1;
+}
+
+/*
+ * We need the data direction between the DMAC (the dma-mapping "device") and
+ * the FIFO (the dmaengine "dev"), from the FIFO's point of view. Confusing!
+ */
+static enum dma_data_direction
+pl330_dma_slave_map_dir(enum dma_transfer_direction dir)
+{
+	switch (dir) {
+	case DMA_MEM_TO_DEV:
+		return DMA_FROM_DEVICE;
+	case DMA_DEV_TO_MEM:
+		return DMA_TO_DEVICE;
+	case DMA_DEV_TO_DEV:
+		return DMA_BIDIRECTIONAL;
+	default:
+		return DMA_NONE;
+	}
+}
+
+static void pl330_unprep_slave_fifo(struct dma_pl330_chan *pch)
+{
+	if (pch->dir != DMA_NONE)
+		dma_unmap_resource(pch->chan.device->dev, pch->fifo_dma,
+				   1 << pch->burst_sz, pch->dir, 0);
+	pch->dir = DMA_NONE;
+}
+
+
+static bool pl330_prep_slave_fifo(struct dma_pl330_chan *pch,
+				  enum dma_transfer_direction dir)
+{
+	struct device *dev = pch->chan.device->dev;
+	enum dma_data_direction dma_dir = pl330_dma_slave_map_dir(dir);
+
+	/* Already mapped for this config? */
+	if (pch->dir == dma_dir)
+		return true;
+
+	pl330_unprep_slave_fifo(pch);
+	pch->fifo_dma = dma_map_resource(dev, pch->fifo_addr,
+					 1 << pch->burst_sz, dma_dir, 0);
+	if (dma_mapping_error(dev, pch->fifo_dma))
+		return false;
+
+	pch->dir = dma_dir;
+	return true;
+}
+
+static int fixup_burst_len(int max_burst_len, int quirks)
+{
+	if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
+		return 1;
+	else if (max_burst_len > PL330_MAX_BURST)
+		return PL330_MAX_BURST;
+	else if (max_burst_len < 1)
+		return 1;
+	else
+		return max_burst_len;
+}
+
+static int pl330_config(struct dma_chan *chan,
+			struct dma_slave_config *slave_config)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+
+	pl330_unprep_slave_fifo(pch);
+	if (slave_config->direction == DMA_MEM_TO_DEV) {
+		if (slave_config->dst_addr)
+			pch->fifo_addr = slave_config->dst_addr;
+		if (slave_config->dst_addr_width)
+			pch->burst_sz = __ffs(slave_config->dst_addr_width);
+		pch->burst_len = fixup_burst_len(slave_config->dst_maxburst,
+			pch->dmac->quirks);
+	} else if (slave_config->direction == DMA_DEV_TO_MEM) {
+		if (slave_config->src_addr)
+			pch->fifo_addr = slave_config->src_addr;
+		if (slave_config->src_addr_width)
+			pch->burst_sz = __ffs(slave_config->src_addr_width);
+		pch->burst_len = fixup_burst_len(slave_config->src_maxburst,
+			pch->dmac->quirks);
+	}
+
+	return 0;
+}
+
+static int pl330_terminate_all(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct dma_pl330_desc *desc;
+	unsigned long flags;
+	struct pl330_dmac *pl330 = pch->dmac;
+	LIST_HEAD(list);
+	bool power_down = false;
+
+	pm_runtime_get_sync(pl330->ddma.dev);
+	spin_lock_irqsave(&pch->lock, flags);
+
+	spin_lock(&pl330->lock);
+	_stop(pch->thread);
+	pch->thread->req[0].desc = NULL;
+	pch->thread->req[1].desc = NULL;
+	pch->thread->req_running = -1;
+	spin_unlock(&pl330->lock);
+
+	power_down = pch->active;
+	pch->active = false;
+
+	/* Mark all desc done */
+	list_for_each_entry(desc, &pch->submitted_list, node) {
+		desc->status = FREE;
+		dma_cookie_complete(&desc->txd);
+	}
+
+	list_for_each_entry(desc, &pch->work_list , node) {
+		desc->status = FREE;
+		dma_cookie_complete(&desc->txd);
+	}
+
+	list_splice_tail_init(&pch->submitted_list, &pl330->desc_pool);
+	list_splice_tail_init(&pch->work_list, &pl330->desc_pool);
+	list_splice_tail_init(&pch->completed_list, &pl330->desc_pool);
+	spin_unlock_irqrestore(&pch->lock, flags);
+	pm_runtime_mark_last_busy(pl330->ddma.dev);
+	if (power_down)
+		pm_runtime_put_autosuspend(pl330->ddma.dev);
+	pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+	return 0;
+}
+
+/*
+ * We don't support DMA_RESUME command because of hardware
+ * limitations, so after pausing the channel we cannot restore
+ * it to active state. We have to terminate channel and setup
+ * DMA transfer again. This pause feature was implemented to
+ * allow safely read residue before channel termination.
+ */
+static int pl330_pause(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330 = pch->dmac;
+	unsigned long flags;
+
+	pm_runtime_get_sync(pl330->ddma.dev);
+	spin_lock_irqsave(&pch->lock, flags);
+
+	spin_lock(&pl330->lock);
+	_stop(pch->thread);
+	spin_unlock(&pl330->lock);
+
+	spin_unlock_irqrestore(&pch->lock, flags);
+	pm_runtime_mark_last_busy(pl330->ddma.dev);
+	pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+	return 0;
+}
+
+static void pl330_free_chan_resources(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330 = pch->dmac;
+	unsigned long flags;
+
+	tasklet_kill(&pch->task);
+
+	pm_runtime_get_sync(pch->dmac->ddma.dev);
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	pl330_release_channel(pch->thread);
+	pch->thread = NULL;
+
+	if (pch->cyclic)
+		list_splice_tail_init(&pch->work_list, &pch->dmac->desc_pool);
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+	pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
+	pl330_unprep_slave_fifo(pch);
+}
+
+static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
+					   struct dma_pl330_desc *desc)
+{
+	struct pl330_thread *thrd = pch->thread;
+	struct pl330_dmac *pl330 = pch->dmac;
+	void __iomem *regs = thrd->dmac->base;
+	u32 val, addr;
+
+	pm_runtime_get_sync(pl330->ddma.dev);
+	val = addr = 0;
+	if (desc->rqcfg.src_inc) {
+		val = readl(regs + SA(thrd->id));
+		addr = desc->px.src_addr;
+	} else {
+		val = readl(regs + DA(thrd->id));
+		addr = desc->px.dst_addr;
+	}
+	pm_runtime_mark_last_busy(pch->dmac->ddma.dev);
+	pm_runtime_put_autosuspend(pl330->ddma.dev);
+
+	/* If DMAMOV hasn't finished yet, SAR/DAR can be zero */
+	if (!val)
+		return 0;
+
+	return val - addr;
+}
+
+static enum dma_status
+pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		 struct dma_tx_state *txstate)
+{
+	enum dma_status ret;
+	unsigned long flags;
+	struct dma_pl330_desc *desc, *running = NULL, *last_enq = NULL;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	unsigned int transferred, residual = 0;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	if (!txstate)
+		return ret;
+
+	if (ret == DMA_COMPLETE)
+		goto out;
+
+	spin_lock_irqsave(&pch->lock, flags);
+	spin_lock(&pch->thread->dmac->lock);
+
+	if (pch->thread->req_running != -1)
+		running = pch->thread->req[pch->thread->req_running].desc;
+
+	last_enq = pch->thread->req[pch->thread->lstenq].desc;
+
+	/* Check in pending list */
+	list_for_each_entry(desc, &pch->work_list, node) {
+		if (desc->status == DONE)
+			transferred = desc->bytes_requested;
+		else if (running && desc == running)
+			transferred =
+				pl330_get_current_xferred_count(pch, desc);
+		else if (desc->status == BUSY)
+			/*
+			 * Busy but not running means either just enqueued,
+			 * or finished and not yet marked done
+			 */
+			if (desc == last_enq)
+				transferred = 0;
+			else
+				transferred = desc->bytes_requested;
+		else
+			transferred = 0;
+		residual += desc->bytes_requested - transferred;
+		if (desc->txd.cookie == cookie) {
+			switch (desc->status) {
+			case DONE:
+				ret = DMA_COMPLETE;
+				break;
+			case PREP:
+			case BUSY:
+				ret = DMA_IN_PROGRESS;
+				break;
+			default:
+				WARN_ON(1);
+			}
+			break;
+		}
+		if (desc->last)
+			residual = 0;
+	}
+	spin_unlock(&pch->thread->dmac->lock);
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+out:
+	dma_set_residue(txstate, residual);
+
+	return ret;
+}
+
+static void pl330_issue_pending(struct dma_chan *chan)
+{
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pch->lock, flags);
+	if (list_empty(&pch->work_list)) {
+		/*
+		 * Warn on nothing pending. Empty submitted_list may
+		 * break our pm_runtime usage counter as it is
+		 * updated on work_list emptiness status.
+		 */
+		WARN_ON(list_empty(&pch->submitted_list));
+		pch->active = true;
+		pm_runtime_get_sync(pch->dmac->ddma.dev);
+	}
+	list_splice_tail_init(&pch->submitted_list, &pch->work_list);
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	pl330_tasklet((unsigned long)pch);
+}
+
+/*
+ * We returned the last one of the circular list of descriptor(s)
+ * from prep_xxx, so the argument to submit corresponds to the last
+ * descriptor of the list.
+ */
+static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dma_pl330_desc *desc, *last = to_desc(tx);
+	struct dma_pl330_chan *pch = to_pchan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	spin_lock_irqsave(&pch->lock, flags);
+
+	/* Assign cookies to all nodes */
+	while (!list_empty(&last->node)) {
+		desc = list_entry(last->node.next, struct dma_pl330_desc, node);
+		if (pch->cyclic) {
+			desc->txd.callback = last->txd.callback;
+			desc->txd.callback_param = last->txd.callback_param;
+		}
+		desc->last = false;
+
+		dma_cookie_assign(&desc->txd);
+
+		list_move_tail(&desc->node, &pch->submitted_list);
+	}
+
+	last->last = true;
+	cookie = dma_cookie_assign(&last->txd);
+	list_add_tail(&last->node, &pch->submitted_list);
+	spin_unlock_irqrestore(&pch->lock, flags);
+
+	return cookie;
+}
+
+static inline void _init_desc(struct dma_pl330_desc *desc)
+{
+	desc->rqcfg.swap = SWAP_NO;
+	desc->rqcfg.scctl = CCTRL0;
+	desc->rqcfg.dcctl = CCTRL0;
+	desc->txd.tx_submit = pl330_tx_submit;
+
+	INIT_LIST_HEAD(&desc->node);
+}
+
+/* Returns the number of descriptors added to the DMAC pool */
+static int add_desc(struct list_head *pool, spinlock_t *lock,
+		    gfp_t flg, int count)
+{
+	struct dma_pl330_desc *desc;
+	unsigned long flags;
+	int i;
+
+	desc = kcalloc(count, sizeof(*desc), flg);
+	if (!desc)
+		return 0;
+
+	spin_lock_irqsave(lock, flags);
+
+	for (i = 0; i < count; i++) {
+		_init_desc(&desc[i]);
+		list_add_tail(&desc[i].node, pool);
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return count;
+}
+
+static struct dma_pl330_desc *pluck_desc(struct list_head *pool,
+					 spinlock_t *lock)
+{
+	struct dma_pl330_desc *desc = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(lock, flags);
+
+	if (!list_empty(pool)) {
+		desc = list_entry(pool->next,
+				struct dma_pl330_desc, node);
+
+		list_del_init(&desc->node);
+
+		desc->status = PREP;
+		desc->txd.callback = NULL;
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return desc;
+}
+
+static struct dma_pl330_desc *pl330_get_desc(struct dma_pl330_chan *pch)
+{
+	struct pl330_dmac *pl330 = pch->dmac;
+	u8 *peri_id = pch->chan.private;
+	struct dma_pl330_desc *desc;
+
+	/* Pluck one desc from the pool of DMAC */
+	desc = pluck_desc(&pl330->desc_pool, &pl330->pool_lock);
+
+	/* If the DMAC pool is empty, alloc new */
+	if (!desc) {
+		DEFINE_SPINLOCK(lock);
+		LIST_HEAD(pool);
+
+		if (!add_desc(&pool, &lock, GFP_ATOMIC, 1))
+			return NULL;
+
+		desc = pluck_desc(&pool, &lock);
+		WARN_ON(!desc || !list_empty(&pool));
+	}
+
+	/* Initialize the descriptor */
+	desc->pchan = pch;
+	desc->txd.cookie = 0;
+	async_tx_ack(&desc->txd);
+
+	desc->peri = peri_id ? pch->chan.chan_id : 0;
+	desc->rqcfg.pcfg = &pch->dmac->pcfg;
+
+	dma_async_tx_descriptor_init(&desc->txd, &pch->chan);
+
+	return desc;
+}
+
+static inline void fill_px(struct pl330_xfer *px,
+		dma_addr_t dst, dma_addr_t src, size_t len)
+{
+	px->bytes = len;
+	px->dst_addr = dst;
+	px->src_addr = src;
+}
+
+static struct dma_pl330_desc *
+__pl330_prep_dma_memcpy(struct dma_pl330_chan *pch, dma_addr_t dst,
+		dma_addr_t src, size_t len)
+{
+	struct dma_pl330_desc *desc = pl330_get_desc(pch);
+
+	if (!desc) {
+		dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
+			__func__, __LINE__);
+		return NULL;
+	}
+
+	/*
+	 * Ideally we should lookout for reqs bigger than
+	 * those that can be programmed with 256 bytes of
+	 * MC buffer, but considering a req size is seldom
+	 * going to be word-unaligned and more than 200MB,
+	 * we take it easy.
+	 * Also, should the limit is reached we'd rather
+	 * have the platform increase MC buffer size than
+	 * complicating this API driver.
+	 */
+	fill_px(&desc->px, dst, src, len);
+
+	return desc;
+}
+
+/* Call after fixing burst size */
+static inline int get_burst_len(struct dma_pl330_desc *desc, size_t len)
+{
+	struct dma_pl330_chan *pch = desc->pchan;
+	struct pl330_dmac *pl330 = pch->dmac;
+	int burst_len;
+
+	burst_len = pl330->pcfg.data_bus_width / 8;
+	burst_len *= pl330->pcfg.data_buf_dep / pl330->pcfg.num_chan;
+	burst_len >>= desc->rqcfg.brst_size;
+
+	/* src/dst_burst_len can't be more than 16 */
+	if (burst_len > PL330_MAX_BURST)
+		burst_len = PL330_MAX_BURST;
+
+	return burst_len;
+}
+
+static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct dma_pl330_desc *desc = NULL, *first = NULL;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330 = pch->dmac;
+	unsigned int i;
+	dma_addr_t dst;
+	dma_addr_t src;
+
+	if (len % period_len != 0)
+		return NULL;
+
+	if (!is_slave_direction(direction)) {
+		dev_err(pch->dmac->ddma.dev, "%s:%d Invalid dma direction\n",
+		__func__, __LINE__);
+		return NULL;
+	}
+
+	if (!pl330_prep_slave_fifo(pch, direction))
+		return NULL;
+
+	for (i = 0; i < len / period_len; i++) {
+		desc = pl330_get_desc(pch);
+		if (!desc) {
+			dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n",
+				__func__, __LINE__);
+
+			if (!first)
+				return NULL;
+
+			spin_lock_irqsave(&pl330->pool_lock, flags);
+
+			while (!list_empty(&first->node)) {
+				desc = list_entry(first->node.next,
+						struct dma_pl330_desc, node);
+				list_move_tail(&desc->node, &pl330->desc_pool);
+			}
+
+			list_move_tail(&first->node, &pl330->desc_pool);
+
+			spin_unlock_irqrestore(&pl330->pool_lock, flags);
+
+			return NULL;
+		}
+
+		switch (direction) {
+		case DMA_MEM_TO_DEV:
+			desc->rqcfg.src_inc = 1;
+			desc->rqcfg.dst_inc = 0;
+			src = dma_addr;
+			dst = pch->fifo_dma;
+			break;
+		case DMA_DEV_TO_MEM:
+			desc->rqcfg.src_inc = 0;
+			desc->rqcfg.dst_inc = 1;
+			src = pch->fifo_dma;
+			dst = dma_addr;
+			break;
+		default:
+			break;
+		}
+
+		desc->rqtype = direction;
+		desc->rqcfg.brst_size = pch->burst_sz;
+		desc->rqcfg.brst_len = pch->burst_len;
+		desc->bytes_requested = period_len;
+		fill_px(&desc->px, dst, src, period_len);
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->node);
+
+		dma_addr += period_len;
+	}
+
+	if (!desc)
+		return NULL;
+
+	pch->cyclic = true;
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
+		dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct dma_pl330_desc *desc;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct pl330_dmac *pl330;
+	int burst;
+
+	if (unlikely(!pch || !len))
+		return NULL;
+
+	pl330 = pch->dmac;
+
+	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
+	if (!desc)
+		return NULL;
+
+	desc->rqcfg.src_inc = 1;
+	desc->rqcfg.dst_inc = 1;
+	desc->rqtype = DMA_MEM_TO_MEM;
+
+	/* Select max possible burst size */
+	burst = pl330->pcfg.data_bus_width / 8;
+
+	/*
+	 * Make sure we use a burst size that aligns with all the memcpy
+	 * parameters because our DMA programming algorithm doesn't cope with
+	 * transfers which straddle an entry in the DMA device's MFIFO.
+	 */
+	while ((src | dst | len) & (burst - 1))
+		burst /= 2;
+
+	desc->rqcfg.brst_size = 0;
+	while (burst != (1 << desc->rqcfg.brst_size))
+		desc->rqcfg.brst_size++;
+
+	/*
+	 * If burst size is smaller than bus width then make sure we only
+	 * transfer one at a time to avoid a burst stradling an MFIFO entry.
+	 */
+	if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width)
+		desc->rqcfg.brst_len = 1;
+
+	desc->rqcfg.brst_len = get_burst_len(desc, len);
+	desc->bytes_requested = len;
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+}
+
+static void __pl330_giveback_desc(struct pl330_dmac *pl330,
+				  struct dma_pl330_desc *first)
+{
+	unsigned long flags;
+	struct dma_pl330_desc *desc;
+
+	if (!first)
+		return;
+
+	spin_lock_irqsave(&pl330->pool_lock, flags);
+
+	while (!list_empty(&first->node)) {
+		desc = list_entry(first->node.next,
+				struct dma_pl330_desc, node);
+		list_move_tail(&desc->node, &pl330->desc_pool);
+	}
+
+	list_move_tail(&first->node, &pl330->desc_pool);
+
+	spin_unlock_irqrestore(&pl330->pool_lock, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flg, void *context)
+{
+	struct dma_pl330_desc *first, *desc = NULL;
+	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct scatterlist *sg;
+	int i;
+
+	if (unlikely(!pch || !sgl || !sg_len))
+		return NULL;
+
+	if (!pl330_prep_slave_fifo(pch, direction))
+		return NULL;
+
+	first = NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+
+		desc = pl330_get_desc(pch);
+		if (!desc) {
+			struct pl330_dmac *pl330 = pch->dmac;
+
+			dev_err(pch->dmac->ddma.dev,
+				"%s:%d Unable to fetch desc\n",
+				__func__, __LINE__);
+			__pl330_giveback_desc(pl330, first);
+
+			return NULL;
+		}
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->node);
+
+		if (direction == DMA_MEM_TO_DEV) {
+			desc->rqcfg.src_inc = 1;
+			desc->rqcfg.dst_inc = 0;
+			fill_px(&desc->px, pch->fifo_dma, sg_dma_address(sg),
+				sg_dma_len(sg));
+		} else {
+			desc->rqcfg.src_inc = 0;
+			desc->rqcfg.dst_inc = 1;
+			fill_px(&desc->px, sg_dma_address(sg), pch->fifo_dma,
+				sg_dma_len(sg));
+		}
+
+		desc->rqcfg.brst_size = pch->burst_sz;
+		desc->rqcfg.brst_len = pch->burst_len;
+		desc->rqtype = direction;
+		desc->bytes_requested = sg_dma_len(sg);
+	}
+
+	/* Return the last desc in the chain */
+	desc->txd.flags = flg;
+	return &desc->txd;
+}
+
+static irqreturn_t pl330_irq_handler(int irq, void *data)
+{
+	if (pl330_update(data))
+		return IRQ_HANDLED;
+	else
+		return IRQ_NONE;
+}
+
+#define PL330_DMA_BUSWIDTHS \
+	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES)
+
+/*
+ * Runtime PM callbacks are provided by amba/bus.c driver.
+ *
+ * It is assumed here that IRQ safe runtime PM is chosen in probe and amba
+ * bus driver will only disable/enable the clock in runtime PM callbacks.
+ */
+static int __maybe_unused pl330_suspend(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+
+	pm_runtime_disable(dev);
+
+	if (!pm_runtime_status_suspended(dev)) {
+		/* amba did not disable the clock */
+		amba_pclk_disable(pcdev);
+	}
+	amba_pclk_unprepare(pcdev);
+
+	return 0;
+}
+
+static int __maybe_unused pl330_resume(struct device *dev)
+{
+	struct amba_device *pcdev = to_amba_device(dev);
+	int ret;
+
+	ret = amba_pclk_prepare(pcdev);
+	if (ret)
+		return ret;
+
+	if (!pm_runtime_status_suspended(dev))
+		ret = amba_pclk_enable(pcdev);
+
+	pm_runtime_enable(dev);
+
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(pl330_pm, pl330_suspend, pl330_resume);
+
+static int
+pl330_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct pl330_config *pcfg;
+	struct pl330_dmac *pl330;
+	struct dma_pl330_chan *pch, *_p;
+	struct dma_device *pd;
+	struct resource *res;
+	int i, ret, irq;
+	int num_chan;
+	struct device_node *np = adev->dev.of_node;
+
+	ret = dma_set_mask_and_coherent(&adev->dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	/* Allocate a new DMAC and its Channels */
+	pl330 = devm_kzalloc(&adev->dev, sizeof(*pl330), GFP_KERNEL);
+	if (!pl330)
+		return -ENOMEM;
+
+	pd = &pl330->ddma;
+	pd->dev = &adev->dev;
+
+	pl330->mcbufsz = 0;
+
+	/* get quirk */
+	for (i = 0; i < ARRAY_SIZE(of_quirks); i++)
+		if (of_property_read_bool(np, of_quirks[i].quirk))
+			pl330->quirks |= of_quirks[i].id;
+
+	res = &adev->res;
+	pl330->base = devm_ioremap_resource(&adev->dev, res);
+	if (IS_ERR(pl330->base))
+		return PTR_ERR(pl330->base);
+
+	amba_set_drvdata(adev, pl330);
+
+	for (i = 0; i < AMBA_NR_IRQS; i++) {
+		irq = adev->irq[i];
+		if (irq) {
+			ret = devm_request_irq(&adev->dev, irq,
+					       pl330_irq_handler, 0,
+					       dev_name(&adev->dev), pl330);
+			if (ret)
+				return ret;
+		} else {
+			break;
+		}
+	}
+
+	pcfg = &pl330->pcfg;
+
+	pcfg->periph_id = adev->periphid;
+	ret = pl330_add(pl330);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&pl330->desc_pool);
+	spin_lock_init(&pl330->pool_lock);
+
+	/* Create a descriptor pool of default size */
+	if (!add_desc(&pl330->desc_pool, &pl330->pool_lock,
+		      GFP_KERNEL, NR_DEFAULT_DESC))
+		dev_warn(&adev->dev, "unable to allocate desc\n");
+
+	INIT_LIST_HEAD(&pd->channels);
+
+	/* Initialize channel parameters */
+	num_chan = max_t(int, pcfg->num_peri, pcfg->num_chan);
+
+	pl330->num_peripherals = num_chan;
+
+	pl330->peripherals = kcalloc(num_chan, sizeof(*pch), GFP_KERNEL);
+	if (!pl330->peripherals) {
+		ret = -ENOMEM;
+		goto probe_err2;
+	}
+
+	for (i = 0; i < num_chan; i++) {
+		pch = &pl330->peripherals[i];
+
+		pch->chan.private = adev->dev.of_node;
+		INIT_LIST_HEAD(&pch->submitted_list);
+		INIT_LIST_HEAD(&pch->work_list);
+		INIT_LIST_HEAD(&pch->completed_list);
+		spin_lock_init(&pch->lock);
+		pch->thread = NULL;
+		pch->chan.device = pd;
+		pch->dmac = pl330;
+		pch->dir = DMA_NONE;
+
+		/* Add the channel to the DMAC list */
+		list_add_tail(&pch->chan.device_node, &pd->channels);
+	}
+
+	dma_cap_set(DMA_MEMCPY, pd->cap_mask);
+	if (pcfg->num_peri) {
+		dma_cap_set(DMA_SLAVE, pd->cap_mask);
+		dma_cap_set(DMA_CYCLIC, pd->cap_mask);
+		dma_cap_set(DMA_PRIVATE, pd->cap_mask);
+	}
+
+	pd->device_alloc_chan_resources = pl330_alloc_chan_resources;
+	pd->device_free_chan_resources = pl330_free_chan_resources;
+	pd->device_prep_dma_memcpy = pl330_prep_dma_memcpy;
+	pd->device_prep_dma_cyclic = pl330_prep_dma_cyclic;
+	pd->device_tx_status = pl330_tx_status;
+	pd->device_prep_slave_sg = pl330_prep_slave_sg;
+	pd->device_config = pl330_config;
+	pd->device_pause = pl330_pause;
+	pd->device_terminate_all = pl330_terminate_all;
+	pd->device_issue_pending = pl330_issue_pending;
+	pd->src_addr_widths = PL330_DMA_BUSWIDTHS;
+	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
+	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
+			 1 : PL330_MAX_BURST);
+
+	ret = dma_async_device_register(pd);
+	if (ret) {
+		dev_err(&adev->dev, "unable to register DMAC\n");
+		goto probe_err3;
+	}
+
+	if (adev->dev.of_node) {
+		ret = of_dma_controller_register(adev->dev.of_node,
+					 of_dma_pl330_xlate, pl330);
+		if (ret) {
+			dev_err(&adev->dev,
+			"unable to register DMA to the generic DT DMA helpers\n");
+		}
+	}
+
+	adev->dev.dma_parms = &pl330->dma_parms;
+
+	/*
+	 * This is the limit for transfers with a buswidth of 1, larger
+	 * buswidths will have larger limits.
+	 */
+	ret = dma_set_max_seg_size(&adev->dev, 1900800);
+	if (ret)
+		dev_err(&adev->dev, "unable to set the seg size\n");
+
+
+	dev_info(&adev->dev,
+		"Loaded driver for PL330 DMAC-%x\n", adev->periphid);
+	dev_info(&adev->dev,
+		"\tDBUFF-%ux%ubytes Num_Chans-%u Num_Peri-%u Num_Events-%u\n",
+		pcfg->data_buf_dep, pcfg->data_bus_width / 8, pcfg->num_chan,
+		pcfg->num_peri, pcfg->num_events);
+
+	pm_runtime_irq_safe(&adev->dev);
+	pm_runtime_use_autosuspend(&adev->dev);
+	pm_runtime_set_autosuspend_delay(&adev->dev, PL330_AUTOSUSPEND_DELAY);
+	pm_runtime_mark_last_busy(&adev->dev);
+	pm_runtime_put_autosuspend(&adev->dev);
+
+	return 0;
+probe_err3:
+	/* Idle the DMAC */
+	list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
+			chan.device_node) {
+
+		/* Remove the channel */
+		list_del(&pch->chan.device_node);
+
+		/* Flush the channel */
+		if (pch->thread) {
+			pl330_terminate_all(&pch->chan);
+			pl330_free_chan_resources(&pch->chan);
+		}
+	}
+probe_err2:
+	pl330_del(pl330);
+
+	return ret;
+}
+
+static int pl330_remove(struct amba_device *adev)
+{
+	struct pl330_dmac *pl330 = amba_get_drvdata(adev);
+	struct dma_pl330_chan *pch, *_p;
+	int i, irq;
+
+	pm_runtime_get_noresume(pl330->ddma.dev);
+
+	if (adev->dev.of_node)
+		of_dma_controller_free(adev->dev.of_node);
+
+	for (i = 0; i < AMBA_NR_IRQS; i++) {
+		irq = adev->irq[i];
+		if (irq)
+			devm_free_irq(&adev->dev, irq, pl330);
+	}
+
+	dma_async_device_unregister(&pl330->ddma);
+
+	/* Idle the DMAC */
+	list_for_each_entry_safe(pch, _p, &pl330->ddma.channels,
+			chan.device_node) {
+
+		/* Remove the channel */
+		list_del(&pch->chan.device_node);
+
+		/* Flush the channel */
+		if (pch->thread) {
+			pl330_terminate_all(&pch->chan);
+			pl330_free_chan_resources(&pch->chan);
+		}
+	}
+
+	pl330_del(pl330);
+
+	return 0;
+}
+
+static const struct amba_id pl330_ids[] = {
+	{
+		.id	= 0x00041330,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl330_ids);
+
+static struct amba_driver pl330_driver = {
+	.drv = {
+		.owner = THIS_MODULE,
+		.name = "dma-pl330",
+		.pm = &pl330_pm,
+	},
+	.id_table = pl330_ids,
+	.probe = pl330_probe,
+	.remove = pl330_remove,
+};
+
+module_amba_driver(pl330_driver);
+
+MODULE_AUTHOR("Jaswinder Singh <jassisinghbrar@gmail.com>");
+MODULE_DESCRIPTION("API Driver for PL330 DMAC");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/Makefile b/drivers/dma/ppc4xx/Makefile
new file mode 100644
index 0000000..b3d259b
--- /dev/null
+++ b/drivers/dma/ppc4xx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += adma.o
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c
new file mode 100644
index 0000000..2561028
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.c
@@ -0,0 +1,4639 @@
+/*
+ * Copyright (C) 2006-2009 DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * Further porting to arch/powerpc by
+ * 	Anatolij Gustschin <agust@denx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the AMCC PPC440SPe Processors.
+ * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ * ADMA driver written by D.Williams.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/proc_fs.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include "adma.h"
+#include "../dmaengine.h"
+
+enum ppc_adma_init_code {
+	PPC_ADMA_INIT_OK = 0,
+	PPC_ADMA_INIT_MEMRES,
+	PPC_ADMA_INIT_MEMREG,
+	PPC_ADMA_INIT_ALLOC,
+	PPC_ADMA_INIT_COHERENT,
+	PPC_ADMA_INIT_CHANNEL,
+	PPC_ADMA_INIT_IRQ1,
+	PPC_ADMA_INIT_IRQ2,
+	PPC_ADMA_INIT_REGISTER
+};
+
+static char *ppc_adma_errors[] = {
+	[PPC_ADMA_INIT_OK] = "ok",
+	[PPC_ADMA_INIT_MEMRES] = "failed to get memory resource",
+	[PPC_ADMA_INIT_MEMREG] = "failed to request memory region",
+	[PPC_ADMA_INIT_ALLOC] = "failed to allocate memory for adev "
+				"structure",
+	[PPC_ADMA_INIT_COHERENT] = "failed to allocate coherent memory for "
+				   "hardware descriptors",
+	[PPC_ADMA_INIT_CHANNEL] = "failed to allocate memory for channel",
+	[PPC_ADMA_INIT_IRQ1] = "failed to request first irq",
+	[PPC_ADMA_INIT_IRQ2] = "failed to request second irq",
+	[PPC_ADMA_INIT_REGISTER] = "failed to register dma async device",
+};
+
+static enum ppc_adma_init_code
+ppc440spe_adma_devices[PPC440SPE_ADMA_ENGINES_NUM];
+
+struct ppc_dma_chan_ref {
+	struct dma_chan *chan;
+	struct list_head node;
+};
+
+/* The list of channels exported by ppc440spe ADMA */
+struct list_head
+ppc440spe_adma_chan_list = LIST_HEAD_INIT(ppc440spe_adma_chan_list);
+
+/* This flag is set when want to refetch the xor chain in the interrupt
+ * handler
+ */
+static u32 do_xor_refetch;
+
+/* Pointer to DMA0, DMA1 CP/CS FIFO */
+static void *ppc440spe_dma_fifo_buf;
+
+/* Pointers to last submitted to DMA0, DMA1 CDBs */
+static struct ppc440spe_adma_desc_slot *chan_last_sub[3];
+static struct ppc440spe_adma_desc_slot *chan_first_cdb[3];
+
+/* Pointer to last linked and submitted xor CB */
+static struct ppc440spe_adma_desc_slot *xor_last_linked;
+static struct ppc440spe_adma_desc_slot *xor_last_submit;
+
+/* This array is used in data-check operations for storing a pattern */
+static char ppc440spe_qword[16];
+
+static atomic_t ppc440spe_adma_err_irq_ref;
+static dcr_host_t ppc440spe_mq_dcr_host;
+static unsigned int ppc440spe_mq_dcr_len;
+
+/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up
+ * the block size in transactions, then we do not allow to activate more than
+ * only one RXOR transactions simultaneously. So use this var to store
+ * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is
+ * set) or not (PPC440SPE_RXOR_RUN is clear).
+ */
+static unsigned long ppc440spe_rxor_state;
+
+/* These are used in enable & check routines
+ */
+static u32 ppc440spe_r6_enabled;
+static struct ppc440spe_adma_chan *ppc440spe_r6_tchan;
+static struct completion ppc440spe_r6_test_comp;
+
+static int ppc440spe_adma_dma2rxor_prep_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_rxor *cursor, int index,
+		int src_cnt, u32 addr);
+static void ppc440spe_adma_dma2rxor_set_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, dma_addr_t addr);
+static void ppc440spe_adma_dma2rxor_set_mult(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, u8 mult);
+
+#ifdef ADMA_LL_DEBUG
+#define ADMA_LL_DBG(x) ({ if (1) x; 0; })
+#else
+#define ADMA_LL_DBG(x) ({ if (0) x; 0; })
+#endif
+
+static void print_cb(struct ppc440spe_adma_chan *chan, void *block)
+{
+	struct dma_cdb *cdb;
+	struct xor_cb *cb;
+	int i;
+
+	switch (chan->device->id) {
+	case 0:
+	case 1:
+		cdb = block;
+
+		pr_debug("CDB at %p [%d]:\n"
+			"\t attr 0x%02x opc 0x%02x cnt 0x%08x\n"
+			"\t sg1u 0x%08x sg1l 0x%08x\n"
+			"\t sg2u 0x%08x sg2l 0x%08x\n"
+			"\t sg3u 0x%08x sg3l 0x%08x\n",
+			cdb, chan->device->id,
+			cdb->attr, cdb->opc, le32_to_cpu(cdb->cnt),
+			le32_to_cpu(cdb->sg1u), le32_to_cpu(cdb->sg1l),
+			le32_to_cpu(cdb->sg2u), le32_to_cpu(cdb->sg2l),
+			le32_to_cpu(cdb->sg3u), le32_to_cpu(cdb->sg3l)
+		);
+		break;
+	case 2:
+		cb = block;
+
+		pr_debug("CB at %p [%d]:\n"
+			"\t cbc 0x%08x cbbc 0x%08x cbs 0x%08x\n"
+			"\t cbtah 0x%08x cbtal 0x%08x\n"
+			"\t cblah 0x%08x cblal 0x%08x\n",
+			cb, chan->device->id,
+			cb->cbc, cb->cbbc, cb->cbs,
+			cb->cbtah, cb->cbtal,
+			cb->cblah, cb->cblal);
+		for (i = 0; i < 16; i++) {
+			if (i && !cb->ops[i].h && !cb->ops[i].l)
+				continue;
+			pr_debug("\t ops[%2d]: h 0x%08x l 0x%08x\n",
+				i, cb->ops[i].h, cb->ops[i].l);
+		}
+		break;
+	}
+}
+
+static void print_cb_list(struct ppc440spe_adma_chan *chan,
+			  struct ppc440spe_adma_desc_slot *iter)
+{
+	for (; iter; iter = iter->hw_next)
+		print_cb(chan, iter->hw_desc);
+}
+
+static void prep_dma_xor_dbg(int id, dma_addr_t dst, dma_addr_t *src,
+			     unsigned int src_cnt)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+	for (i = 0; i < src_cnt; i++)
+		pr_debug("\t0x%016llx ", src[i]);
+	pr_debug("dst:\n\t0x%016llx\n", dst);
+}
+
+static void prep_dma_pq_dbg(int id, dma_addr_t *dst, dma_addr_t *src,
+			    unsigned int src_cnt)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc: ", __func__, id);
+	for (i = 0; i < src_cnt; i++)
+		pr_debug("\t0x%016llx ", src[i]);
+	pr_debug("dst: ");
+	for (i = 0; i < 2; i++)
+		pr_debug("\t0x%016llx ", dst[i]);
+}
+
+static void prep_dma_pqzero_sum_dbg(int id, dma_addr_t *src,
+				    unsigned int src_cnt,
+				    const unsigned char *scf)
+{
+	int i;
+
+	pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id);
+	if (scf) {
+		for (i = 0; i < src_cnt; i++)
+			pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
+	} else {
+		for (i = 0; i < src_cnt; i++)
+			pr_debug("\t0x%016llx(no) ", src[i]);
+	}
+
+	pr_debug("dst: ");
+	for (i = 0; i < 2; i++)
+		pr_debug("\t0x%016llx ", src[src_cnt + i]);
+}
+
+/******************************************************************************
+ * Command (Descriptor) Blocks low-level routines
+ ******************************************************************************/
+/**
+ * ppc440spe_desc_init_interrupt - initialize the descriptor for INTERRUPT
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_interrupt(struct ppc440spe_adma_desc_slot *desc,
+					  struct ppc440spe_adma_chan *chan)
+{
+	struct xor_cb *p;
+
+	switch (chan->device->id) {
+	case PPC440SPE_XOR_ID:
+		p = desc->hw_desc;
+		memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+		/* NOP with Command Block Complete Enable */
+		p->cbc = XOR_CBCR_CBCE_BIT;
+		break;
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+		/* NOP with interrupt */
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+		break;
+	default:
+		printk(KERN_ERR "Unsupported id %d in %s\n", chan->device->id,
+				__func__);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_init_null_xor - initialize the descriptor for NULL XOR
+ * pseudo operation
+ */
+static void ppc440spe_desc_init_null_xor(struct ppc440spe_adma_desc_slot *desc)
+{
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 0;
+	desc->dst_cnt = 1;
+}
+
+/**
+ * ppc440spe_desc_init_xor - initialize the descriptor for XOR operation
+ */
+static void ppc440spe_desc_init_xor(struct ppc440spe_adma_desc_slot *desc,
+					 int src_cnt, unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = 1;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT | src_cnt;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+/**
+ * ppc440spe_desc_init_dma2pq - initialize the descriptor for PQ
+ * operation in DMA2 controller
+ */
+static void ppc440spe_desc_init_dma2pq(struct ppc440spe_adma_desc_slot *desc,
+		int dst_cnt, int src_cnt, unsigned long flags)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct xor_cb));
+	desc->hw_next = NULL;
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+	memset(desc->reverse_flags, 0, sizeof(desc->reverse_flags));
+	desc->descs_per_op = 0;
+
+	hw_desc->cbc = XOR_CBCR_TGT_BIT;
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Enable interrupt on completion */
+		hw_desc->cbc |= XOR_CBCR_CBCE_BIT;
+}
+
+#define DMA_CTRL_FLAGS_LAST	DMA_PREP_FENCE
+#define DMA_PREP_ZERO_P		(DMA_CTRL_FLAGS_LAST << 1)
+#define DMA_PREP_ZERO_Q		(DMA_PREP_ZERO_P << 1)
+
+/**
+ * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation
+ * with DMA0/1
+ */
+static void ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc,
+				int dst_cnt, int src_cnt, unsigned long flags,
+				unsigned long op)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	u8 dopc;
+
+	/* Common initialization of a PQ descriptors chain */
+	set_bits(op, &desc->flags);
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+
+	/* WXOR MULTICAST if both P and Q are being computed
+	 * MV_SG1_SG2 if Q only
+	 */
+	dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
+		DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
+
+	list_for_each_entry(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+
+		if (likely(!list_is_last(&iter->chain_node,
+				&desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+				struct ppc440spe_adma_desc_slot, chain_node);
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			if (flags & DMA_PREP_INTERRUPT)
+				set_bit(PPC440SPE_DESC_INT, &iter->flags);
+			else
+				clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+
+	/* Set OPS depending on WXOR/RXOR type of operation */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) {
+		/* This is a WXOR only chain:
+		 * - first descriptors are for zeroing destinations
+		 *   if PPC440SPE_ZERO_P/Q set;
+		 * - descriptors remained are for GF-XOR operations.
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+
+		if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		}
+
+		if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		}
+
+		list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = dopc;
+		}
+	} else {
+		/* This is either RXOR-only or mixed RXOR/WXOR */
+
+		/* The first 1 or 2 slots in chain are always RXOR,
+		 * if need to calculate P & Q, then there are two
+		 * RXOR slots; if only P or only Q, then there is one
+		 */
+		iter = list_first_entry(&desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+
+		if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		}
+
+		/* The remaining descs (if any) are WXORs */
+		if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
+			iter = list_first_entry(&iter->chain_node,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			list_for_each_entry_from(iter, &desc->group_list,
+						chain_node) {
+				hw_desc = iter->hw_desc;
+				hw_desc->opc = dopc;
+			}
+		}
+	}
+}
+
+/**
+ * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor
+ * for PQ_ZERO_SUM operation
+ */
+static void ppc440spe_desc_init_dma01pqzero_sum(
+				struct ppc440spe_adma_desc_slot *desc,
+				int dst_cnt, int src_cnt)
+{
+	struct dma_cdb *hw_desc;
+	struct ppc440spe_adma_desc_slot *iter;
+	int i = 0;
+	u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
+				   DMA_CDB_OPC_MV_SG1_SG2;
+	/*
+	 * Initialize starting from 2nd or 3rd descriptor dependent
+	 * on dst_cnt. First one or two slots are for cloning P
+	 * and/or Q to chan->pdest and/or chan->qdest as we have
+	 * to preserve original P/Q.
+	 */
+	iter = list_first_entry(&desc->group_list,
+				struct ppc440spe_adma_desc_slot, chain_node);
+	iter = list_entry(iter->chain_node.next,
+			  struct ppc440spe_adma_desc_slot, chain_node);
+
+	if (dst_cnt > 1) {
+		iter = list_entry(iter->chain_node.next,
+				  struct ppc440spe_adma_desc_slot, chain_node);
+	}
+	/* initialize each source descriptor in chain */
+	list_for_each_entry_from(iter, &desc->group_list, chain_node) {
+		hw_desc = iter->hw_desc;
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->src_cnt = 0;
+		iter->dst_cnt = 0;
+
+		/* This is a ZERO_SUM operation:
+		 * - <src_cnt> descriptors starting from 2nd or 3rd
+		 *   descriptor are for GF-XOR operations;
+		 * - remaining <dst_cnt> descriptors are for checking the result
+		 */
+		if (i++ < src_cnt)
+			/* MV_SG1_SG2 if only Q is being verified
+			 * MULTICAST if both P and Q are being verified
+			 */
+			hw_desc->opc = dopc;
+		else
+			/* DMA_CDB_OPC_DCHECK128 operation */
+			hw_desc->opc = DMA_CDB_OPC_DCHECK128;
+
+		if (likely(!list_is_last(&iter->chain_node,
+					 &desc->group_list))) {
+			/* set 'next' pointer */
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+		} else {
+			/* this is the last descriptor.
+			 * this slot will be pasted from ADMA level
+			 * each time it wants to configure parameters
+			 * of the transaction (src, dst, ...)
+			 */
+			iter->hw_next = NULL;
+			/* always enable interrupt generation since we get
+			 * the status of pqzero from the handler
+			 */
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		}
+	}
+	desc->src_cnt = src_cnt;
+	desc->dst_cnt = dst_cnt;
+}
+
+/**
+ * ppc440spe_desc_init_memcpy - initialize the descriptor for MEMCPY operation
+ */
+static void ppc440spe_desc_init_memcpy(struct ppc440spe_adma_desc_slot *desc,
+					unsigned long flags)
+{
+	struct dma_cdb *hw_desc = desc->hw_desc;
+
+	memset(desc->hw_desc, 0, sizeof(struct dma_cdb));
+	desc->hw_next = NULL;
+	desc->src_cnt = 1;
+	desc->dst_cnt = 1;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		set_bit(PPC440SPE_DESC_INT, &desc->flags);
+	else
+		clear_bit(PPC440SPE_DESC_INT, &desc->flags);
+
+	hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+}
+
+/**
+ * ppc440spe_desc_set_src_addr - set source address into the descriptor
+ */
+static void ppc440spe_desc_set_src_addr(struct ppc440spe_adma_desc_slot *desc,
+					struct ppc440spe_adma_chan *chan,
+					int src_idx, dma_addr_t addrh,
+					dma_addr_t addrl)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmplow, tmphi;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->sg1l = cpu_to_le32((u32)tmplow);
+		dma_hw_desc->sg1u |= cpu_to_le32((u32)tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->ops[src_idx].l = addrl;
+		xor_hw_desc->ops[src_idx].h |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_src_mult - set source address mult into the descriptor
+ */
+static void ppc440spe_desc_set_src_mult(struct ppc440spe_adma_desc_slot *desc,
+			struct ppc440spe_adma_chan *chan, u32 mult_index,
+			int sg_index, unsigned char mult_value)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	u32 *psgu;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+
+		switch (sg_index) {
+		/* for RXOR operations set multiplier
+		 * into source cued address
+		 */
+		case DMA_CDB_SG_SRC:
+			psgu = &dma_hw_desc->sg1u;
+			break;
+		/* for WXOR operations set multiplier
+		 * into destination cued address(es)
+		 */
+		case DMA_CDB_SG_DST1:
+			psgu = &dma_hw_desc->sg2u;
+			break;
+		case DMA_CDB_SG_DST2:
+			psgu = &dma_hw_desc->sg3u;
+			break;
+		default:
+			BUG();
+		}
+
+		*psgu |= cpu_to_le32(mult_value << mult_index);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * ppc440spe_desc_set_dest_addr - set destination address into the descriptor
+ */
+static void ppc440spe_desc_set_dest_addr(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan,
+				dma_addr_t addrh, dma_addr_t addrl,
+				u32 dst_idx)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+	phys_addr_t addr64, tmphi, tmplow;
+	u32 *psgu, *psgl;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (!addrh) {
+			addr64 = addrl;
+			tmphi = (addr64 >> 32);
+			tmplow = (addr64 & 0xFFFFFFFF);
+		} else {
+			tmphi = addrh;
+			tmplow = addrl;
+		}
+		dma_hw_desc = desc->hw_desc;
+
+		psgu = dst_idx ? &dma_hw_desc->sg3u : &dma_hw_desc->sg2u;
+		psgl = dst_idx ? &dma_hw_desc->sg3l : &dma_hw_desc->sg2l;
+
+		*psgl = cpu_to_le32((u32)tmplow);
+		*psgu |= cpu_to_le32((u32)tmphi);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbtal = addrl;
+		xor_hw_desc->cbtah |= addrh;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_byte_count - set number of data bytes involved
+ * into the operation
+ */
+static void ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan,
+				u32 byte_count)
+{
+	struct dma_cdb *dma_hw_desc;
+	struct xor_cb *xor_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		dma_hw_desc->cnt = cpu_to_le32(byte_count);
+		break;
+	case PPC440SPE_XOR_ID:
+		xor_hw_desc = desc->hw_desc;
+		xor_hw_desc->cbbc = byte_count;
+		break;
+	}
+}
+
+/**
+ * ppc440spe_desc_set_rxor_block_size - set RXOR block size
+ */
+static inline void ppc440spe_desc_set_rxor_block_size(u32 byte_count)
+{
+	/* assume that byte_count is aligned on the 512-boundary;
+	 * thus write it directly to the register (bits 23:31 are
+	 * reserved there).
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
+}
+
+/**
+ * ppc440spe_desc_set_dcheck - set CHECK pattern
+ */
+static void ppc440spe_desc_set_dcheck(struct ppc440spe_adma_desc_slot *desc,
+				struct ppc440spe_adma_chan *chan, u8 *qword)
+{
+	struct dma_cdb *dma_hw_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_hw_desc = desc->hw_desc;
+		iowrite32(qword[0], &dma_hw_desc->sg3l);
+		iowrite32(qword[4], &dma_hw_desc->sg3u);
+		iowrite32(qword[8], &dma_hw_desc->sg2l);
+		iowrite32(qword[12], &dma_hw_desc->sg2u);
+		break;
+	default:
+		BUG();
+	}
+}
+
+/**
+ * ppc440spe_xor_set_link - set link address in xor CB
+ */
+static void ppc440spe_xor_set_link(struct ppc440spe_adma_desc_slot *prev_desc,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	struct xor_cb *xor_hw_desc = prev_desc->hw_desc;
+
+	if (unlikely(!next_desc || !(next_desc->phys))) {
+		printk(KERN_ERR "%s: next_desc=0x%p; next_desc->phys=0x%llx\n",
+			__func__, next_desc,
+			next_desc ? next_desc->phys : 0);
+		BUG();
+	}
+
+	xor_hw_desc->cbs = 0;
+	xor_hw_desc->cblal = next_desc->phys;
+	xor_hw_desc->cblah = 0;
+	xor_hw_desc->cbc |= XOR_CBCR_LNK_BIT;
+}
+
+/**
+ * ppc440spe_desc_set_link - set the address of descriptor following this
+ * descriptor in chain
+ */
+static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan,
+				struct ppc440spe_adma_desc_slot *prev_desc,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	unsigned long flags;
+	struct ppc440spe_adma_desc_slot *tail = next_desc;
+
+	if (unlikely(!prev_desc || !next_desc ||
+		(prev_desc->hw_next && prev_desc->hw_next != next_desc))) {
+		/* If previous next is overwritten something is wrong.
+		 * though we may refetch from append to initiate list
+		 * processing; in this case - it's ok.
+		 */
+		printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; "
+			"prev->hw_next=0x%p\n", __func__, prev_desc,
+			next_desc, prev_desc ? prev_desc->hw_next : 0);
+		BUG();
+	}
+
+	local_irq_save(flags);
+
+	/* do s/w chaining both for DMA and XOR descriptors */
+	prev_desc->hw_next = next_desc;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		break;
+	case PPC440SPE_XOR_ID:
+		/* bind descriptor to the chain */
+		while (tail->hw_next)
+			tail = tail->hw_next;
+		xor_last_linked = tail;
+
+		if (prev_desc == xor_last_submit)
+			/* do not link to the last submitted CB */
+			break;
+		ppc440spe_xor_set_link(prev_desc, next_desc);
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_desc_get_link - get the address of the descriptor that
+ * follows this one
+ */
+static inline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc,
+					struct ppc440spe_adma_chan *chan)
+{
+	if (!desc->hw_next)
+		return 0;
+
+	return desc->hw_next->phys;
+}
+
+/**
+ * ppc440spe_desc_is_aligned - check alignment
+ */
+static inline int ppc440spe_desc_is_aligned(
+	struct ppc440spe_adma_desc_slot *desc, int num_slots)
+{
+	return (desc->idx & (num_slots - 1)) ? 0 : 1;
+}
+
+/**
+ * ppc440spe_chan_xor_slot_count - get the number of slots necessary for
+ * XOR operation
+ */
+static int ppc440spe_chan_xor_slot_count(size_t len, int src_cnt,
+			int *slots_per_op)
+{
+	int slot_cnt;
+
+	/* each XOR descriptor provides up to 16 source operands */
+	slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
+
+	if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT))
+		return slot_cnt;
+
+	printk(KERN_ERR "%s: len %d > max %d !!\n",
+		__func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+	BUG();
+	return slot_cnt;
+}
+
+/**
+ * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for
+ * DMA2 PQ operation
+ */
+static int ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs,
+		int src_cnt, size_t len)
+{
+	signed long long order = 0;
+	int state = 0;
+	int addr_count = 0;
+	int i;
+	for (i = 1; i < src_cnt; i++) {
+		dma_addr_t cur_addr = srcs[i];
+		dma_addr_t old_addr = srcs[i-1];
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else {
+				state = 3;
+			}
+			break;
+		case 1:
+			if (i == src_cnt-2 || (order == -1
+				&& cur_addr != old_addr - len)) {
+				order = 0;
+				state = 0;
+				addr_count++;
+			} else if (cur_addr == old_addr + len*order) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 2*len) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else if (cur_addr == old_addr + 3*len) {
+				state = 2;
+				if (i == src_cnt-1)
+					addr_count++;
+			} else {
+				order = 0;
+				state = 0;
+				addr_count++;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			addr_count++;
+				break;
+		}
+		if (state == 3)
+			break;
+	}
+	if (src_cnt <= 1 || (state != 1 && state != 2)) {
+		pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
+			__func__, src_cnt, state, addr_count, order);
+		for (i = 0; i < src_cnt; i++)
+			pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
+		BUG();
+	}
+
+	return (addr_count + XOR_MAX_OPS - 1) / XOR_MAX_OPS;
+}
+
+
+/******************************************************************************
+ * ADMA channel low-level routines
+ ******************************************************************************/
+
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan);
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan);
+
+/**
+ * ppc440spe_adma_device_clear_eot_status - interrupt ack to XOR or DMA engine
+ */
+static void ppc440spe_adma_device_clear_eot_status(
+					struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	u8 *p = chan->device->dma_desc_pool_virt;
+	struct dma_cdb *cdb;
+	u32 rv, i;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* read FIFO to ack */
+		dma_reg = chan->device->dma_reg;
+		while ((rv = ioread32(&dma_reg->csfpl))) {
+			i = rv & DMA_CDB_ADDR_MSK;
+			cdb = (struct dma_cdb *)&p[i -
+			    (u32)chan->device->dma_desc_pool];
+
+			/* Clear opcode to ack. This is necessary for
+			 * ZeroSum operations only
+			 */
+			cdb->opc = 0;
+
+			if (test_bit(PPC440SPE_RXOR_RUN,
+			    &ppc440spe_rxor_state)) {
+				/* probably this is a completed RXOR op,
+				 * get pointer to CDB using the fact that
+				 * physical and virtual addresses of CDB
+				 * in pools have the same offsets
+				 */
+				if (le32_to_cpu(cdb->sg1u) &
+				    DMA_CUED_XOR_BASE) {
+					/* this is a RXOR */
+					clear_bit(PPC440SPE_RXOR_RUN,
+						  &ppc440spe_rxor_state);
+				}
+			}
+
+			if (rv & DMA_CDB_STATUS_MSK) {
+				/* ZeroSum check failed
+				 */
+				struct ppc440spe_adma_desc_slot *iter;
+				dma_addr_t phys = rv & ~DMA_CDB_MSK;
+
+				/*
+				 * Update the status of corresponding
+				 * descriptor.
+				 */
+				list_for_each_entry(iter, &chan->chain,
+				    chain_node) {
+					if (iter->phys == phys)
+						break;
+				}
+				/*
+				 * if cannot find the corresponding
+				 * slot it's a bug
+				 */
+				BUG_ON(&iter->chain_node == &chan->chain);
+
+				if (iter->xor_check_result) {
+					if (test_bit(PPC440SPE_DESC_PCHECK,
+						     &iter->flags)) {
+						*iter->xor_check_result |=
+							SUM_CHECK_P_RESULT;
+					} else
+					if (test_bit(PPC440SPE_DESC_QCHECK,
+						     &iter->flags)) {
+						*iter->xor_check_result |=
+							SUM_CHECK_Q_RESULT;
+					} else
+						BUG();
+				}
+			}
+		}
+
+		rv = ioread32(&dma_reg->dsts);
+		if (rv) {
+			pr_err("DMA%d err status: 0x%x\n",
+			       chan->device->id, rv);
+			/* write back to clear */
+			iowrite32(rv, &dma_reg->dsts);
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* reset status bits to ack */
+		xor_reg = chan->device->xor_reg;
+		rv = ioread32be(&xor_reg->sr);
+		iowrite32be(rv, &xor_reg->sr);
+
+		if (rv & (XOR_IE_ICBIE_BIT|XOR_IE_ICIE_BIT|XOR_IE_RPTIE_BIT)) {
+			if (rv & XOR_IE_RPTIE_BIT) {
+				/* Read PLB Timeout Error.
+				 * Try to resubmit the CB
+				 */
+				u32 val = ioread32be(&xor_reg->ccbalr);
+
+				iowrite32be(val, &xor_reg->cblalr);
+
+				val = ioread32be(&xor_reg->crsr);
+				iowrite32be(val | XOR_CRSR_XAE_BIT,
+					    &xor_reg->crsr);
+			} else
+				pr_err("XOR ERR 0x%x status\n", rv);
+			break;
+		}
+
+		/*  if the XORcore is idle, but there are unprocessed CBs
+		 * then refetch the s/w chain here
+		 */
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
+		    do_xor_refetch)
+			ppc440spe_chan_append(chan);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_chan_is_busy - get the channel status
+ */
+static int ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+	int busy = 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		/*  if command FIFO's head and tail pointers are equal and
+		 * status tail is the same as command, then channel is free
+		 */
+		if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
+		    ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
+			busy = 1;
+		break;
+	case PPC440SPE_XOR_ID:
+		/* use the special status bit for the XORcore
+		 */
+		xor_reg = chan->device->xor_reg;
+		busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0;
+		break;
+	}
+
+	return busy;
+}
+
+/**
+ * ppc440spe_chan_set_first_xor_descriptor -  init XORcore chain
+ */
+static void ppc440spe_chan_set_first_xor_descriptor(
+				struct ppc440spe_adma_chan *chan,
+				struct ppc440spe_adma_desc_slot *next_desc)
+{
+	struct xor_regs *xor_reg = chan->device->xor_reg;
+
+	if (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)
+		printk(KERN_INFO "%s: Warn: XORcore is running "
+			"when try to set the first CDB!\n",
+			__func__);
+
+	xor_last_submit = xor_last_linked = next_desc;
+
+	iowrite32be(XOR_CRSR_64BA_BIT, &xor_reg->crsr);
+
+	iowrite32be(next_desc->phys, &xor_reg->cblalr);
+	iowrite32be(0, &xor_reg->cblahr);
+	iowrite32be(ioread32be(&xor_reg->cbcr) | XOR_CBCR_LNK_BIT,
+		    &xor_reg->cbcr);
+
+	chan->hw_chain_inited = 1;
+}
+
+/**
+ * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO.
+ * called with irqs disabled
+ */
+static void ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan,
+		struct ppc440spe_adma_desc_slot *desc)
+{
+	u32 pcdb;
+	struct dma_regs *dma_reg = chan->device->dma_reg;
+
+	pcdb = desc->phys;
+	if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
+		pcdb |= DMA_CDB_NO_INT;
+
+	chan_last_sub[chan->device->id] = desc;
+
+	ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
+
+	iowrite32(pcdb, &dma_reg->cpfpl);
+}
+
+/**
+ * ppc440spe_chan_append - update the h/w chain in the channel
+ */
+static void ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+	struct ppc440spe_adma_desc_slot *iter;
+	struct xor_cb *xcb;
+	u32 cur_desc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		cur_desc = ppc440spe_chan_get_current_descriptor(chan);
+
+		if (likely(cur_desc)) {
+			iter = chan_last_sub[chan->device->id];
+			BUG_ON(!iter);
+		} else {
+			/* first peer */
+			iter = chan_first_cdb[chan->device->id];
+			BUG_ON(!iter);
+			ppc440spe_dma_put_desc(chan, iter);
+			chan->hw_chain_inited = 1;
+		}
+
+		/* is there something new to append */
+		if (!iter->hw_next)
+			break;
+
+		/* flush descriptors from the s/w queue to fifo */
+		list_for_each_entry_continue(iter, &chan->chain, chain_node) {
+			ppc440spe_dma_put_desc(chan, iter);
+			if (!iter->hw_next)
+				break;
+		}
+		break;
+	case PPC440SPE_XOR_ID:
+		/* update h/w links and refetch */
+		if (!xor_last_submit->hw_next)
+			break;
+
+		xor_reg = chan->device->xor_reg;
+		/* the last linked CDB has to generate an interrupt
+		 * that we'd be able to append the next lists to h/w
+		 * regardless of the XOR engine state at the moment of
+		 * appending of these next lists
+		 */
+		xcb = xor_last_linked->hw_desc;
+		xcb->cbc |= XOR_CBCR_CBCE_BIT;
+
+		if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) {
+			/* XORcore is idle. Refetch now */
+			do_xor_refetch = 0;
+			ppc440spe_xor_set_link(xor_last_submit,
+				xor_last_submit->hw_next);
+
+			ADMA_LL_DBG(print_cb_list(chan,
+				xor_last_submit->hw_next));
+
+			xor_last_submit = xor_last_linked;
+			iowrite32be(ioread32be(&xor_reg->crsr) |
+				    XOR_CRSR_RCBE_BIT | XOR_CRSR_64BA_BIT,
+				    &xor_reg->crsr);
+		} else {
+			/* XORcore is running. Refetch later in the handler */
+			do_xor_refetch = 1;
+		}
+
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/**
+ * ppc440spe_chan_get_current_descriptor - get the currently executed descriptor
+ */
+static u32
+ppc440spe_chan_get_current_descriptor(struct ppc440spe_adma_chan *chan)
+{
+	struct dma_regs *dma_reg;
+	struct xor_regs *xor_reg;
+
+	if (unlikely(!chan->hw_chain_inited))
+		/* h/w descriptor chain is not initialized yet */
+		return 0;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_reg = chan->device->dma_reg;
+		return ioread32(&dma_reg->acpl) & (~DMA_CDB_MSK);
+	case PPC440SPE_XOR_ID:
+		xor_reg = chan->device->xor_reg;
+		return ioread32be(&xor_reg->ccbalr);
+	}
+	return 0;
+}
+
+/**
+ * ppc440spe_chan_run - enable the channel
+ */
+static void ppc440spe_chan_run(struct ppc440spe_adma_chan *chan)
+{
+	struct xor_regs *xor_reg;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMAs are always enabled, do nothing */
+		break;
+	case PPC440SPE_XOR_ID:
+		/* drain write buffer */
+		xor_reg = chan->device->xor_reg;
+
+		/* fetch descriptor pointed to in <link> */
+		iowrite32be(XOR_CRSR_64BA_BIT | XOR_CRSR_XAE_BIT,
+			    &xor_reg->crsr);
+		break;
+	}
+}
+
+/******************************************************************************
+ * ADMA device level
+ ******************************************************************************/
+
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan);
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan);
+
+static dma_cookie_t
+ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *tx,
+				    dma_addr_t addr, int index);
+static void
+ppc440spe_adma_memcpy_xor_set_src(struct ppc440spe_adma_desc_slot *tx,
+				  dma_addr_t addr, int index);
+
+static void
+ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *tx,
+			   dma_addr_t *paddr, unsigned long flags);
+static void
+ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *tx,
+			  dma_addr_t addr, int index);
+static void
+ppc440spe_adma_pq_set_src_mult(struct ppc440spe_adma_desc_slot *tx,
+			       unsigned char mult, int index, int dst_pos);
+static void
+ppc440spe_adma_pqzero_sum_set_dest(struct ppc440spe_adma_desc_slot *tx,
+				   dma_addr_t paddr, dma_addr_t qaddr);
+
+static struct page *ppc440spe_rxor_srcs[32];
+
+/**
+ * ppc440spe_can_rxor - check if the operands may be processed with RXOR
+ */
+static int ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
+{
+	int i, order = 0, state = 0;
+	int idx = 0;
+
+	if (unlikely(!(src_cnt > 1)))
+		return 0;
+
+	BUG_ON(src_cnt > ARRAY_SIZE(ppc440spe_rxor_srcs));
+
+	/* Skip holes in the source list before checking */
+	for (i = 0; i < src_cnt; i++) {
+		if (!srcs[i])
+			continue;
+		ppc440spe_rxor_srcs[idx++] = srcs[i];
+	}
+	src_cnt = idx;
+
+	for (i = 1; i < src_cnt; i++) {
+		char *cur_addr = page_address(ppc440spe_rxor_srcs[i]);
+		char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
+
+		switch (state) {
+		case 0:
+			if (cur_addr == old_addr + len) {
+				/* direct RXOR */
+				order = 1;
+				state = 1;
+			} else if (old_addr == cur_addr + len) {
+				/* reverse RXOR */
+				order = -1;
+				state = 1;
+			} else
+				goto out;
+			break;
+		case 1:
+			if ((i == src_cnt - 2) ||
+			    (order == -1 && cur_addr != old_addr - len)) {
+				order = 0;
+				state = 0;
+			} else if ((cur_addr == old_addr + len * order) ||
+				   (cur_addr == old_addr + 2 * len) ||
+				   (cur_addr == old_addr + 3 * len)) {
+				state = 2;
+			} else {
+				order = 0;
+				state = 0;
+			}
+			break;
+		case 2:
+			order = 0;
+			state = 0;
+			break;
+		}
+	}
+
+out:
+	if (state == 1 || state == 2)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * ppc440spe_adma_device_estimate - estimate the efficiency of processing
+ *	the operation given on this channel. It's assumed that 'chan' is
+ *	capable to process 'cap' type of operation.
+ * @chan: channel to use
+ * @cap: type of transaction
+ * @dst_lst: array of destination pointers
+ * @dst_cnt: number of destination operands
+ * @src_lst: array of source pointers
+ * @src_cnt: number of source operands
+ * @src_sz: size of each source operand
+ */
+static int ppc440spe_adma_estimate(struct dma_chan *chan,
+	enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt,
+	struct page **src_lst, int src_cnt, size_t src_sz)
+{
+	int ef = 1;
+
+	if (cap == DMA_PQ || cap == DMA_PQ_VAL) {
+		/* If RAID-6 capabilities were not activated don't try
+		 * to use them
+		 */
+		if (unlikely(!ppc440spe_r6_enabled))
+			return -1;
+	}
+	/*  In the current implementation of ppc440spe ADMA driver it
+	 * makes sense to pick out only pq case, because it may be
+	 * processed:
+	 * (1) either using Biskup method on DMA2;
+	 * (2) or on DMA0/1.
+	 *  Thus we give a favour to (1) if the sources are suitable;
+	 * else let it be processed on one of the DMA0/1 engines.
+	 *  In the sum_product case where destination is also the
+	 * source process it on DMA0/1 only.
+	 */
+	if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
+
+		if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
+			ef = 0; /* sum_product case, process on DMA0/1 */
+		else if (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
+			ef = 3; /* override (DMA0/1 + idle) */
+		else
+			ef = 0; /* can't process on DMA2 if !rxor */
+	}
+
+	/* channel idleness increases the priority */
+	if (likely(ef) &&
+	    !ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
+		ef++;
+
+	return ef;
+}
+
+struct dma_chan *
+ppc440spe_async_tx_find_best_channel(enum dma_transaction_type cap,
+	struct page **dst_lst, int dst_cnt, struct page **src_lst,
+	int src_cnt, size_t src_sz)
+{
+	struct dma_chan *best_chan = NULL;
+	struct ppc_dma_chan_ref *ref;
+	int best_rank = -1;
+
+	if (unlikely(!src_sz))
+		return NULL;
+	if (src_sz > PAGE_SIZE) {
+		/*
+		 * should a user of the api ever pass > PAGE_SIZE requests
+		 * we sort out cases where temporary page-sized buffers
+		 * are used.
+		 */
+		switch (cap) {
+		case DMA_PQ:
+			if (src_cnt == 1 && dst_lst[1] == src_lst[0])
+				return NULL;
+			if (src_cnt == 2 && dst_lst[1] == src_lst[1])
+				return NULL;
+			break;
+		case DMA_PQ_VAL:
+		case DMA_XOR_VAL:
+			return NULL;
+		default:
+			break;
+		}
+	}
+
+	list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) {
+		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
+			int rank;
+
+			rank = ppc440spe_adma_estimate(ref->chan, cap, dst_lst,
+					dst_cnt, src_lst, src_cnt, src_sz);
+			if (rank > best_rank) {
+				best_rank = rank;
+				best_chan = ref->chan;
+			}
+		}
+	}
+
+	return best_chan;
+}
+EXPORT_SYMBOL_GPL(ppc440spe_async_tx_find_best_channel);
+
+/**
+ * ppc440spe_get_group_entry - get group entry with index idx
+ * @tdesc: is the last allocated slot in the group.
+ */
+static struct ppc440spe_adma_desc_slot *
+ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
+{
+	struct ppc440spe_adma_desc_slot *iter = tdesc->group_head;
+	int i = 0;
+
+	if (entry_idx < 0 || entry_idx >= (tdesc->src_cnt + tdesc->dst_cnt)) {
+		printk("%s: entry_idx %d, src_cnt %d, dst_cnt %d\n",
+			__func__, entry_idx, tdesc->src_cnt, tdesc->dst_cnt);
+		BUG();
+	}
+
+	list_for_each_entry(iter, &tdesc->group_list, chain_node) {
+		if (i++ == entry_idx)
+			break;
+	}
+	return iter;
+}
+
+/**
+ * ppc440spe_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &ppc440spe_chan->lock while calling this function
+ */
+static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot,
+				      struct ppc440spe_adma_chan *chan)
+{
+	int stride = slot->slots_per_op;
+
+	while (stride--) {
+		slot->slots_per_op = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct ppc440spe_adma_desc_slot,
+				slot_node);
+	}
+}
+
+/**
+ * ppc440spe_adma_run_tx_complete_actions - call functions to be called
+ * upon completion
+ */
+static dma_cookie_t ppc440spe_adma_run_tx_complete_actions(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_adma_chan *chan,
+		dma_cookie_t cookie)
+{
+	BUG_ON(desc->async_tx.cookie < 0);
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+		desc->async_tx.cookie = 0;
+
+		dma_descriptor_unmap(&desc->async_tx);
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
+	}
+
+	/* run dependent operations */
+	dma_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+/**
+ * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
+ */
+static int ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_adma_chan *chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (list_is_last(&desc->chain_node, &chan->chain) ||
+	    desc->phys == ppc440spe_chan_get_current_descriptor(chan))
+		return 1;
+
+	if (chan->device->id != PPC440SPE_XOR_ID) {
+		/* our DMA interrupt handler clears opc field of
+		 * each processed descriptor. For all types of
+		 * operations except for ZeroSum we do not actually
+		 * need ack from the interrupt handler. ZeroSum is a
+		 * special case since the result of this operation
+		 * is available from the handler only, so if we see
+		 * such type of descriptor (which is unprocessed yet)
+		 * then leave it in chain.
+		 */
+		struct dma_cdb *cdb = desc->hw_desc;
+		if (cdb->opc == DMA_CDB_OPC_DCHECK128)
+			return 1;
+	}
+
+	dev_dbg(chan->device->common.dev, "\tfree slot %llx: %d stride: %d\n",
+		desc->phys, desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	ppc440spe_adma_free_slots(desc, chan);
+	return 0;
+}
+
+/**
+ * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine
+ *	which runs through the channel CDBs list until reach the descriptor
+ *	currently processed. When routine determines that all CDBs of group
+ *	are completed then corresponding callbacks (if any) are called and slots
+ *	are freed.
+ */
+static void __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
+	dma_cookie_t cookie = 0;
+	u32 current_desc = ppc440spe_chan_get_current_descriptor(chan);
+	int busy = ppc440spe_chan_is_busy(chan);
+	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: %s\n",
+		chan->device->id, __func__);
+
+	if (!current_desc) {
+		/*  There were no transactions yet, so
+		 * nothing to clean
+		 */
+		return;
+	}
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &chan->chain,
+					chain_node) {
+		dev_dbg(chan->device->common.dev, "\tcookie: %d slot: %d "
+		    "busy: %d this_desc: %#llx next_desc: %#x "
+		    "cur: %#x ack: %d\n",
+		    iter->async_tx.cookie, iter->idx, busy, iter->phys,
+		    ppc440spe_desc_get_link(iter, chan), current_desc,
+		    async_tx_test_ack(&iter->async_tx));
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel,subsequent descriptors are either in process
+		 * or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || ppc440spe_desc_get_link(iter, chan)) {
+				/* not all descriptors of the group have
+				 * been completed; exit.
+				 */
+				break;
+			}
+		}
+
+		/* detect the start of a group transaction */
+		if (!slot_cnt && !slots_per_op) {
+			slot_cnt = iter->slot_cnt;
+			slots_per_op = iter->slots_per_op;
+			if (slot_cnt <= slots_per_op) {
+				slot_cnt = 0;
+				slots_per_op = 0;
+			}
+		}
+
+		if (slot_cnt) {
+			if (!group_start)
+				group_start = iter;
+			slot_cnt -= slots_per_op;
+		}
+
+		/* all the members of a group are complete */
+		if (slots_per_op != 0 && slot_cnt == 0) {
+			struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter;
+			int end_of_chain = 0;
+
+			/* clean up the group */
+			slot_cnt = group_start->slot_cnt;
+			grp_iter = group_start;
+			list_for_each_entry_safe_from(grp_iter, _grp_iter,
+				&chan->chain, chain_node) {
+
+				cookie = ppc440spe_adma_run_tx_complete_actions(
+					grp_iter, chan, cookie);
+
+				slot_cnt -= slots_per_op;
+				end_of_chain = ppc440spe_adma_clean_slot(
+				    grp_iter, chan);
+				if (end_of_chain && slot_cnt) {
+					/* Should wait for ZeroSum completion */
+					if (cookie > 0)
+						chan->common.completed_cookie = cookie;
+					return;
+				}
+
+				if (slot_cnt == 0 || end_of_chain)
+					break;
+			}
+
+			/* the group should be complete at this point */
+			BUG_ON(slot_cnt);
+
+			slots_per_op = 0;
+			group_start = NULL;
+			if (end_of_chain)
+				break;
+			else
+				continue;
+		} else if (slots_per_op) /* wait for group completion */
+			continue;
+
+		cookie = ppc440spe_adma_run_tx_complete_actions(iter, chan,
+		    cookie);
+
+		if (ppc440spe_adma_clean_slot(iter, chan))
+			break;
+	}
+
+	BUG_ON(!seen_current);
+
+	if (cookie > 0) {
+		chan->common.completed_cookie = cookie;
+		pr_debug("\tcompleted cookie %d\n", cookie);
+	}
+
+}
+
+/**
+ * ppc440spe_adma_tasklet - clean up watch-dog initiator
+ */
+static void ppc440spe_adma_tasklet(unsigned long data)
+{
+	struct ppc440spe_adma_chan *chan = (struct ppc440spe_adma_chan *) data;
+
+	spin_lock_nested(&chan->lock, SINGLE_DEPTH_NESTING);
+	__ppc440spe_adma_slot_cleanup(chan);
+	spin_unlock(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_slot_cleanup - clean up scheduled initiator
+ */
+static void ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
+{
+	spin_lock_bh(&chan->lock);
+	__ppc440spe_adma_slot_cleanup(chan);
+	spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_adma_alloc_slots - allocate free slots (if any)
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_adma_alloc_slots(
+		struct ppc440spe_adma_chan *chan, int num_slots,
+		int slots_per_op)
+{
+	struct ppc440spe_adma_desc_slot *iter = NULL, *_iter;
+	struct ppc440spe_adma_desc_slot *alloc_start = NULL;
+	struct list_head chain = LIST_HEAD_INIT(chain);
+	int slots_found, retry = 0;
+
+
+	BUG_ON(!num_slots || !slots_per_op);
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = chan->last_used;
+	else
+		iter = list_entry(&chan->all_slots,
+				  struct ppc440spe_adma_desc_slot,
+				  slot_node);
+	list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
+	    slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++)
+			alloc_start = iter;
+
+		if (slots_found == num_slots) {
+			struct ppc440spe_adma_desc_slot *alloc_tail = NULL;
+			struct ppc440spe_adma_desc_slot *last_used = NULL;
+
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+				/* pre-ack all but the last descriptor */
+				if (num_slots != slots_per_op)
+					async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->hw_next = NULL;
+				iter->flags = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct ppc440spe_adma_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->group_list);
+			chan->last_used = last_used;
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* try to free some slots if the allocation fails */
+	tasklet_schedule(&chan->irq_tasklet);
+	return NULL;
+}
+
+/**
+ * ppc440spe_adma_alloc_chan_resources -  allocate pools for CDB slots
+ */
+static int ppc440spe_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *slot = NULL;
+	char *hw_desc;
+	int i, db_sz;
+	int init;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	init = ppc440spe_chan->slots_allocated ? 0 : 1;
+	chan->chan_id = ppc440spe_chan->device->id;
+
+	/* Allocate descriptor slots */
+	i = ppc440spe_chan->slots_allocated;
+	if (ppc440spe_chan->device->id != PPC440SPE_XOR_ID)
+		db_sz = sizeof(struct dma_cdb);
+	else
+		db_sz = sizeof(struct xor_cb);
+
+	for (; i < (ppc440spe_chan->device->pool_size / db_sz); i++) {
+		slot = kzalloc(sizeof(struct ppc440spe_adma_desc_slot),
+			       GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "SPE ADMA Channel only initialized"
+				" %d descriptor slots", i--);
+			break;
+		}
+
+		hw_desc = (char *) ppc440spe_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[i * db_sz];
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = ppc440spe_adma_tx_submit;
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->group_list);
+		slot->phys = ppc440spe_chan->device->dma_desc_pool + i * db_sz;
+		slot->idx = i;
+
+		spin_lock_bh(&ppc440spe_chan->lock);
+		ppc440spe_chan->slots_allocated++;
+		list_add_tail(&slot->slot_node, &ppc440spe_chan->all_slots);
+		spin_unlock_bh(&ppc440spe_chan->lock);
+	}
+
+	if (i && !ppc440spe_chan->last_used) {
+		ppc440spe_chan->last_used =
+			list_entry(ppc440spe_chan->all_slots.next,
+				struct ppc440spe_adma_desc_slot,
+				slot_node);
+	}
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: allocated %d descriptor slots\n",
+		ppc440spe_chan->device->id, i);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		switch (ppc440spe_chan->device->id) {
+		case PPC440SPE_DMA0_ID:
+		case PPC440SPE_DMA1_ID:
+			ppc440spe_chan->hw_chain_inited = 0;
+			/* Use WXOR for self-testing */
+			if (!ppc440spe_r6_tchan)
+				ppc440spe_r6_tchan = ppc440spe_chan;
+			break;
+		case PPC440SPE_XOR_ID:
+			ppc440spe_chan_start_null_xor(ppc440spe_chan);
+			break;
+		default:
+			BUG();
+		}
+		ppc440spe_chan->needs_unmap = 1;
+	}
+
+	return (i > 0) ? i : -ENOMEM;
+}
+
+/**
+ * ppc440spe_rxor_set_region_data -
+ */
+static void ppc440spe_rxor_set_region(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, u32 mask)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mask;
+}
+
+/**
+ * ppc440spe_rxor_set_src -
+ */
+static void ppc440spe_rxor_set_src(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= DMA_CUED_XOR_BASE;
+	xcb->ops[xor_arg_no].l = addr;
+}
+
+/**
+ * ppc440spe_rxor_set_mult -
+ */
+static void ppc440spe_rxor_set_mult(struct ppc440spe_adma_desc_slot *desc,
+	u8 xor_arg_no, u8 idx, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+
+	xcb->ops[xor_arg_no].h |= mult << (DMA_CUED_MULT1_OFF + idx * 8);
+}
+
+/**
+ * ppc440spe_adma_check_threshold - append CDBs to h/w chain if threshold
+ *	has been achieved
+ */
+static void ppc440spe_adma_check_threshold(struct ppc440spe_adma_chan *chan)
+{
+	dev_dbg(chan->device->common.dev, "ppc440spe adma%d: pending: %d\n",
+		chan->device->id, chan->pending);
+
+	if (chan->pending >= PPC440SPE_ADMA_THRESHOLD) {
+		chan->pending = 0;
+		ppc440spe_chan_append(chan);
+	}
+}
+
+/**
+ * ppc440spe_adma_tx_submit - submit new descriptor group to the channel
+ *	(it's not necessary that descriptors will be submitted to the h/w
+ *	chains too right now)
+ */
+static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc;
+	struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan);
+	struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail;
+	int slot_cnt;
+	int slots_per_op;
+	dma_cookie_t cookie;
+
+	sw_desc = tx_to_ppc440spe_adma_slot(tx);
+
+	group_start = sw_desc->group_head;
+	slot_cnt = group_start->slot_cnt;
+	slots_per_op = group_start->slots_per_op;
+
+	spin_lock_bh(&chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (unlikely(list_empty(&chan->chain))) {
+		/* first peer */
+		list_splice_init(&sw_desc->group_list, &chan->chain);
+		chan_first_cdb[chan->device->id] = group_start;
+	} else {
+		/* isn't first peer, bind CDBs to chain */
+		old_chain_tail = list_entry(chan->chain.prev,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		list_splice_init(&sw_desc->group_list,
+		    &old_chain_tail->chain_node);
+		/* fix up the hardware chain */
+		ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
+	}
+
+	/* increment the pending count by the number of operations */
+	chan->pending += slot_cnt / slots_per_op;
+	ppc440spe_adma_check_threshold(chan);
+	spin_unlock_bh(&chan->lock);
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s cookie: %d slot: %d tx %p\n",
+		chan->device->id, __func__,
+		sw_desc->async_tx.cookie, sw_desc->idx, sw_desc);
+
+	return cookie;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_interrupt - prepare CDB for a pseudo DMA operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_interrupt(
+		struct dma_chan *chan, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", ppc440spe_chan->device->id,
+		__func__);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	slot_cnt = slots_per_op = 1;
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+			slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_interrupt(group_start, ppc440spe_chan);
+		group_start->unmap_len = 0;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_memcpy - prepare CDB for a MEMCPY operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dma_dest,
+		dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (unlikely(!len))
+		return NULL;
+
+	BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s len: %u int_en %d\n",
+		ppc440spe_chan->device->id, __func__, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+	slot_cnt = slots_per_op = 1;
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+		slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_memcpy(group_start, flags);
+		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+		ppc440spe_adma_memcpy_xor_set_src(group_start, dma_src, 0);
+		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor - prepare CDB for a XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor(
+		struct dma_chan *chan, dma_addr_t dma_dest,
+		dma_addr_t *dma_src, u32 src_cnt, size_t len,
+		unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	int slot_cnt, slots_per_op;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	ADMA_LL_DBG(prep_dma_xor_dbg(ppc440spe_chan->device->id,
+				     dma_dest, dma_src, src_cnt));
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+		ppc440spe_chan->device->id, __func__, src_cnt, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	slot_cnt = ppc440spe_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+			slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		ppc440spe_desc_init_xor(group_start, src_cnt, flags);
+		ppc440spe_adma_set_dest(group_start, dma_dest, 0);
+		while (src_cnt--)
+			ppc440spe_adma_memcpy_xor_set_src(group_start,
+				dma_src[src_cnt], src_cnt);
+		ppc440spe_desc_set_byte_count(group_start, ppc440spe_chan, len);
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static inline void
+ppc440spe_desc_set_xor_src_cnt(struct ppc440spe_adma_desc_slot *desc,
+				int src_cnt);
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor);
+
+/**
+ * ppc440spe_adma_init_dma2rxor_slot -
+ */
+static void ppc440spe_adma_init_dma2rxor_slot(
+		struct ppc440spe_adma_desc_slot *desc,
+		dma_addr_t *src, int src_cnt)
+{
+	int i;
+
+	/* initialize CDB */
+	for (i = 0; i < src_cnt; i++) {
+		ppc440spe_adma_dma2rxor_prep_src(desc, &desc->rxor_cursor, i,
+						 desc->src_cnt, (u32)src[i]);
+	}
+}
+
+/**
+ * ppc440spe_dma01_prep_mult -
+ * for Q operation where destination is also the source
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_mult(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 2;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* use WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = dst_cnt;
+		/* First descriptor, zero data in the destination and copy it
+		 * to q page using MULTICAST transfer.
+		 */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					     DMA_CUED_XOR_BASE, dst[0], 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0, dst[1], 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * Second descriptor, multiply data from the q page
+		 * and store the result in real destination.
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0,
+					    DMA_CUED_XOR_HB, dst[1]);
+		ppc440spe_desc_set_dest_addr(iter, chan,
+					     DMA_CUED_XOR_BASE, dst[0], 0);
+
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_dma01_prep_sum_product -
+ * Dx = A*(P+Pxy) + B*(Q+Qxy) operation where destination is also
+ * the source.
+ */
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_sum_product(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	unsigned long op = 0;
+	int slot_cnt;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	slot_cnt = 3;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+
+	/* WXOR, each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		struct ppc440spe_adma_chan *chan;
+		struct ppc440spe_adma_desc_slot *iter;
+		struct dma_cdb *hw_desc;
+
+		chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+		set_bits(op, &sw_desc->flags);
+		sw_desc->src_cnt = src_cnt;
+		sw_desc->dst_cnt = 1;
+		/* 1st descriptor, src[1] data to q page and zero destination */
+		iter = list_first_entry(&sw_desc->group_list,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MULTICAST;
+
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_dest_addr(iter, chan, 0,
+					     ppc440spe_chan->qdest, 1);
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/* 2nd descriptor, multiply src[1] data and store the
+		 * result in destination */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		/* set 'next' pointer */
+		iter->hw_next = list_entry(iter->chain_node.next,
+					   struct ppc440spe_adma_desc_slot,
+					   chain_node);
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    ppc440spe_chan->qdest);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan,	DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[1]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+
+		/*
+		 * 3rd descriptor, multiply src[0] data and xor it
+		 * with destination
+		 */
+		iter = list_first_entry(&iter->chain_node,
+					struct ppc440spe_adma_desc_slot,
+					chain_node);
+		memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+		iter->hw_next = NULL;
+		if (flags & DMA_PREP_INTERRUPT)
+			set_bit(PPC440SPE_DESC_INT, &iter->flags);
+		else
+			clear_bit(PPC440SPE_DESC_INT, &iter->flags);
+
+		hw_desc = iter->hw_desc;
+		hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+		ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB,
+					    src[0]);
+		ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE,
+					     *dst, 0);
+		ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+					    DMA_CDB_SG_DST1, scf[0]);
+		ppc440spe_desc_set_byte_count(iter, ppc440spe_chan, len);
+		iter->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	int slot_cnt;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+		 __func__, dst_cnt, src_cnt, len);
+	/*  select operations WXOR/RXOR depending on the
+	 * source addresses of operators and the number
+	 * of destinations (RXOR support only Q-parity calculations)
+	 */
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+	if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) {
+		/* no active RXOR;
+		 * do RXOR if:
+		 * - there are more than 1 source,
+		 * - len is aligned on 512-byte boundary,
+		 * - source addresses fit to one of 4 possible regions.
+		 */
+		if (src_cnt > 1 &&
+		    !(len & MQ0_CF2H_RXOR_BS_MASK) &&
+		    (src[0] + len) == src[1]) {
+			/* may do RXOR R1 R2 */
+			set_bit(PPC440SPE_DESC_RXOR, &op);
+			if (src_cnt != 2) {
+				/* may try to enhance region of RXOR */
+				if ((src[1] + len) == src[2]) {
+					/* do RXOR R1 R2 R3 */
+					set_bit(PPC440SPE_DESC_RXOR123,
+						&op);
+				} else if ((src[1] + len * 2) == src[2]) {
+					/* do RXOR R1 R2 R4 */
+					set_bit(PPC440SPE_DESC_RXOR124, &op);
+				} else if ((src[1] + len * 3) == src[2]) {
+					/* do RXOR R1 R2 R5 */
+					set_bit(PPC440SPE_DESC_RXOR125,
+						&op);
+				} else {
+					/* do RXOR R1 R2 */
+					set_bit(PPC440SPE_DESC_RXOR12,
+						&op);
+				}
+			} else {
+				/* do RXOR R1 R2 */
+				set_bit(PPC440SPE_DESC_RXOR12, &op);
+			}
+		}
+
+		if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+			/* can not do this operation with RXOR */
+			clear_bit(PPC440SPE_RXOR_RUN,
+				&ppc440spe_rxor_state);
+		} else {
+			/* can do; set block size right now */
+			ppc440spe_desc_set_rxor_block_size(len);
+		}
+	}
+
+	/* Number of necessary slots depends on operation type selected */
+	if (!test_bit(PPC440SPE_DESC_RXOR, &op)) {
+		/*  This is a WXOR only chain. Need descriptors for each
+		 * source to GF-XOR them with WXOR, and need descriptors
+		 * for each destination to zero them with WXOR
+		 */
+		slot_cnt = src_cnt;
+
+		if (flags & DMA_PREP_ZERO_P) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_P, &op);
+		}
+		if (flags & DMA_PREP_ZERO_Q) {
+			slot_cnt++;
+			set_bit(PPC440SPE_ZERO_Q, &op);
+		}
+	} else {
+		/*  Need 1/2 descriptor for RXOR operation, and
+		 * need (src_cnt - (2 or 3)) for WXOR of sources
+		 * remained (if any)
+		 */
+		slot_cnt = dst_cnt;
+
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &op);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &op);
+
+		if (test_bit(PPC440SPE_DESC_RXOR12, &op))
+			slot_cnt += src_cnt - 2;
+		else
+			slot_cnt += src_cnt - 3;
+
+		/*  Thus we have either RXOR only chain or
+		 * mixed RXOR/WXOR
+		 */
+		if (slot_cnt == dst_cnt)
+			/* RXOR only chain */
+			clear_bit(PPC440SPE_DESC_WXOR, &op);
+	}
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	/* for both RXOR/WXOR each descriptor occupies one slot */
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
+				flags, op);
+
+		/* setup dst/src/mult */
+		pr_debug("%s: set dst descriptor 0, 1: 0x%016llx, 0x%016llx\n",
+			 __func__, dst[0], dst[1]);
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+		while (src_cnt--) {
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+						  src_cnt);
+
+			/* NOTE: "Multi = 0 is equivalent to = 1" as it
+			 * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf
+			 * doesn't work for RXOR with DMA0/1! Instead, multi=0
+			 * leads to zeroing source data after RXOR.
+			 * So, for P case set-up mult=1 explicitly.
+			 */
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+				mult, src_cnt,  dst_cnt - 1);
+		}
+
+		/* Setup byte count foreach slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list,
+				chain_node) {
+			ppc440spe_desc_set_byte_count(iter,
+				ppc440spe_chan, len);
+			iter->unmap_len = len;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	return sw_desc;
+}
+
+static struct ppc440spe_adma_desc_slot *ppc440spe_dma2_prep_pq(
+		struct ppc440spe_adma_chan *ppc440spe_chan,
+		dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt,
+		const unsigned char *scf, size_t len, unsigned long flags)
+{
+	int slot_cnt, descs_per_op;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter;
+	unsigned long op = 0;
+	unsigned char mult = 1;
+
+	BUG_ON(!dst_cnt);
+	/*pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
+		 __func__, dst_cnt, src_cnt, len);*/
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	descs_per_op = ppc440spe_dma2_pq_slot_count(src, src_cnt, len);
+	if (descs_per_op < 0) {
+		spin_unlock_bh(&ppc440spe_chan->lock);
+		return NULL;
+	}
+
+	/* depending on number of sources we have 1 or 2 RXOR chains */
+	slot_cnt = descs_per_op * dst_cnt;
+
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1);
+	if (sw_desc) {
+		op = slot_cnt;
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_init_dma2pq(iter, dst_cnt, src_cnt,
+				--op ? 0 : flags);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+				len);
+			iter->unmap_len = len;
+
+			ppc440spe_init_rxor_cursor(&(iter->rxor_cursor));
+			iter->rxor_cursor.len = len;
+			iter->descs_per_op = descs_per_op;
+		}
+		op = 0;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			op++;
+			if (op % descs_per_op == 0)
+				ppc440spe_adma_init_dma2rxor_slot(iter, src,
+								  src_cnt);
+			if (likely(!list_is_last(&iter->chain_node,
+						 &sw_desc->group_list))) {
+				/* set 'next' pointer */
+				iter->hw_next =
+					list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+				ppc440spe_xor_set_link(iter, iter->hw_next);
+			} else {
+				/* this is the last descriptor. */
+				iter->hw_next = NULL;
+			}
+		}
+
+		/* fixup head descriptor */
+		sw_desc->dst_cnt = dst_cnt;
+		if (flags & DMA_PREP_ZERO_P)
+			set_bit(PPC440SPE_ZERO_P, &sw_desc->flags);
+		if (flags & DMA_PREP_ZERO_Q)
+			set_bit(PPC440SPE_ZERO_Q, &sw_desc->flags);
+
+		/* setup dst/src/mult */
+		ppc440spe_adma_pq_set_dest(sw_desc, dst, flags);
+
+		while (src_cnt--) {
+			/* handle descriptors (if dst_cnt == 2) inside
+			 * the ppc440spe_adma_pq_set_srcxxx() functions
+			 */
+			ppc440spe_adma_pq_set_src(sw_desc, src[src_cnt],
+						  src_cnt);
+			if (!(flags & DMA_PREP_PQ_DISABLE_Q))
+				mult = scf[src_cnt];
+			ppc440spe_adma_pq_set_src_mult(sw_desc,
+					mult, src_cnt, dst_cnt - 1);
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	ppc440spe_desc_set_rxor_block_size(len);
+	return sw_desc;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pq - prepare CDB (group) for a GF-XOR operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq(
+		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf,
+		size_t len, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc = NULL;
+	int dst_cnt = 0;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id,
+				    dst, src, src_cnt));
+	BUG_ON(!len);
+	BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
+	BUG_ON(!src_cnt);
+
+	if (src_cnt == 1 && dst[1] == src[0]) {
+		dma_addr_t dest[2];
+
+		/* dst[1] is real destination (Q) */
+		dest[0] = dst[1];
+		/* this is the page to multicast source data to */
+		dest[1] = ppc440spe_chan->qdest;
+		sw_desc = ppc440spe_dma01_prep_mult(ppc440spe_chan,
+				dest, 2, src, src_cnt, scf, len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (src_cnt == 2 && dst[1] == src[1]) {
+		sw_desc = ppc440spe_dma01_prep_sum_product(ppc440spe_chan,
+					&dst[1], src, 2, scf, len, flags);
+		return sw_desc ? &sw_desc->async_tx : NULL;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_P)) {
+		BUG_ON(!dst[0]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_P;
+	}
+
+	if (!(flags & DMA_PREP_PQ_DISABLE_Q)) {
+		BUG_ON(!dst[1]);
+		dst_cnt++;
+		flags |= DMA_PREP_ZERO_Q;
+	}
+
+	BUG_ON(!dst_cnt);
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n",
+		ppc440spe_chan->device->id, __func__, src_cnt, len,
+		flags & DMA_PREP_INTERRUPT ? 1 : 0);
+
+	switch (ppc440spe_chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		sw_desc = ppc440spe_dma01_prep_pq(ppc440spe_chan,
+				dst, dst_cnt, src, src_cnt, scf,
+				len, flags);
+		break;
+
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_dma2_prep_pq(ppc440spe_chan,
+				dst, dst_cnt, src, src_cnt, scf,
+				len, flags);
+		break;
+	}
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_pqzero_sum - prepare CDB group for
+ * a PQ_ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pqzero_sum(
+		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		enum sum_check_flags *pqres, unsigned long flags)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	dma_addr_t pdest, qdest;
+	int slot_cnt, slots_per_op, idst, dst_cnt;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pdest = 0;
+	else
+		pdest = pq[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qdest = 0;
+	else
+		qdest = pq[1];
+
+	ADMA_LL_DBG(prep_dma_pqzero_sum_dbg(ppc440spe_chan->device->id,
+					    src, src_cnt, scf));
+
+	/* Always use WXOR for P/Q calculations (two destinations).
+	 * Need 1 or 2 extra slots to verify results are zero.
+	 */
+	idst = dst_cnt = (pdest && qdest) ? 2 : 1;
+
+	/* One additional slot per destination to clone P/Q
+	 * before calculation (we have to preserve destinations).
+	 */
+	slot_cnt = src_cnt + dst_cnt * 2;
+	slots_per_op = 1;
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt,
+					     slots_per_op);
+	if (sw_desc) {
+		ppc440spe_desc_init_dma01pqzero_sum(sw_desc, dst_cnt, src_cnt);
+
+		/* Setup byte count for each slot just allocated */
+		sw_desc->async_tx.flags = flags;
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = len;
+		}
+
+		if (pdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = sw_desc->group_head;
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						     ppc440spe_chan->pdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, pdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = 0;
+			/* override pdest to preserve original P */
+			pdest = ppc440spe_chan->pdest;
+		}
+		if (qdest) {
+			struct dma_cdb *hw_desc;
+			struct ppc440spe_adma_chan *chan;
+
+			iter = list_first_entry(&sw_desc->group_list,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+
+			if (pdest) {
+				iter = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			}
+
+			memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
+			iter->hw_next = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			hw_desc = iter->hw_desc;
+			hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
+			iter->src_cnt = 0;
+			iter->dst_cnt = 0;
+			ppc440spe_desc_set_dest_addr(iter, chan, 0,
+						     ppc440spe_chan->qdest, 0);
+			ppc440spe_desc_set_src_addr(iter, chan, 0, 0, qdest);
+			ppc440spe_desc_set_byte_count(iter, ppc440spe_chan,
+						      len);
+			iter->unmap_len = 0;
+			/* override qdest to preserve original Q */
+			qdest = ppc440spe_chan->qdest;
+		}
+
+		/* Setup destinations for P/Q ops */
+		ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
+
+		/* Setup zero QWORDs into DCHECK CDBs */
+		idst = dst_cnt;
+		list_for_each_entry_reverse(iter, &sw_desc->group_list,
+					    chain_node) {
+			/*
+			 * The last CDB corresponds to Q-parity check,
+			 * the one before last CDB corresponds
+			 * P-parity check
+			 */
+			if (idst == DMA_DEST_MAX_NUM) {
+				if (idst == dst_cnt) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			} else {
+				if (qdest) {
+					set_bit(PPC440SPE_DESC_QCHECK,
+						&iter->flags);
+				} else {
+					set_bit(PPC440SPE_DESC_PCHECK,
+						&iter->flags);
+				}
+			}
+			iter->xor_check_result = pqres;
+
+			/*
+			 * set it to zero, if check fail then result will
+			 * be updated
+			 */
+			*iter->xor_check_result = 0;
+			ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
+				ppc440spe_qword);
+
+			if (!(--dst_cnt))
+				break;
+		}
+
+		/* Setup sources and mults for P/Q ops */
+		list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
+						     chain_node) {
+			struct ppc440spe_adma_chan *chan;
+			u32 mult_dst;
+
+			chan = to_ppc440spe_adma_chan(iter->async_tx.chan);
+			ppc440spe_desc_set_src_addr(iter, chan, 0,
+						    DMA_CUED_XOR_HB,
+						    src[src_cnt - 1]);
+			if (qdest) {
+				mult_dst = (dst_cnt - 1) ? DMA_CDB_SG_DST2 :
+							   DMA_CDB_SG_DST1;
+				ppc440spe_desc_set_src_mult(iter, chan,
+							    DMA_CUED_MULT1_OFF,
+							    mult_dst,
+							    scf[src_cnt - 1]);
+			}
+			if (!(--src_cnt))
+				break;
+		}
+	}
+	spin_unlock_bh(&ppc440spe_chan->lock);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+/**
+ * ppc440spe_adma_prep_dma_xor_zero_sum - prepare CDB group for
+ * XOR ZERO_SUM operation
+ */
+static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor_zero_sum(
+		struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt,
+		size_t len, enum sum_check_flags *result, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t pq[2];
+
+	/* validate P, disable Q */
+	pq[0] = src[0];
+	pq[1] = 0;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	tx = ppc440spe_adma_prep_dma_pqzero_sum(chan, pq, &src[1],
+						src_cnt - 1, 0, len,
+						result, flags);
+	return tx;
+}
+
+/**
+ * ppc440spe_adma_set_dest - set destination address into descriptor
+ */
+static void ppc440spe_adma_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	BUG_ON(index >= sw_desc->dst_cnt);
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* to do: support transfers lengths >
+		 * PPC440SPE_ADMA_DMA/XOR_MAX_BYTE_COUNT
+		 */
+		ppc440spe_desc_set_dest_addr(sw_desc->group_head,
+			chan, 0, addr, index);
+		break;
+	case PPC440SPE_XOR_ID:
+		sw_desc = ppc440spe_get_group_entry(sw_desc, index);
+		ppc440spe_desc_set_dest_addr(sw_desc,
+			chan, 0, addr, index);
+		break;
+	}
+}
+
+static void ppc440spe_adma_pq_zero_op(struct ppc440spe_adma_desc_slot *iter,
+		struct ppc440spe_adma_chan *chan, dma_addr_t addr)
+{
+	/*  To clear destinations update the descriptor
+	 * (P or Q depending on index) as follows:
+	 * addr is destination (0 corresponds to SG2):
+	 */
+	ppc440spe_desc_set_dest_addr(iter, chan, DMA_CUED_XOR_BASE, addr, 0);
+
+	/* ... and the addr is source: */
+	ppc440spe_desc_set_src_addr(iter, chan, 0, DMA_CUED_XOR_HB, addr);
+
+	/* addr is always SG2 then the mult is always DST1 */
+	ppc440spe_desc_set_src_mult(iter, chan, DMA_CUED_MULT1_OFF,
+				    DMA_CDB_SG_DST1, 1);
+}
+
+/**
+ * ppc440spe_adma_pq_set_dest - set destination address into descriptor
+ * for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_dest(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t *addrs, unsigned long flags)
+{
+	struct ppc440spe_adma_desc_slot *iter;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t paddr, qaddr;
+	dma_addr_t addr = 0, ppath, qpath;
+	int index = 0, i;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		paddr = 0;
+	else
+		paddr = addrs[0];
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		qaddr = 0;
+	else
+		qaddr = addrs[1];
+
+	if (!paddr || !qaddr)
+		addr = paddr ? paddr : qaddr;
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* walk through the WXOR source list and set P/Q-destinations
+		 * for each slot:
+		 */
+		if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* This is WXOR-only chain; may have 1/2 zero descs */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				index++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				index++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index);
+			if (addr) {
+				/* one destination */
+				list_for_each_entry_from(iter,
+					&sw_desc->group_list, chain_node)
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, addr, 0);
+			} else {
+				/* two destinations */
+				list_for_each_entry_from(iter,
+					&sw_desc->group_list, chain_node) {
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, paddr, 0);
+					ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, qaddr, 1);
+				}
+			}
+
+			if (index) {
+				/*  To clear destinations update the descriptor
+				 * (1st,2nd, or both depending on flags)
+				 */
+				index = 0;
+				if (test_bit(PPC440SPE_ZERO_P,
+						&sw_desc->flags)) {
+					iter = ppc440spe_get_group_entry(
+							sw_desc, index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							paddr);
+				}
+
+				if (test_bit(PPC440SPE_ZERO_Q,
+						&sw_desc->flags)) {
+					iter = ppc440spe_get_group_entry(
+							sw_desc, index++);
+					ppc440spe_adma_pq_zero_op(iter, chan,
+							qaddr);
+				}
+
+				return;
+			}
+		} else {
+			/* This is RXOR-only or RXOR/WXOR mixed chain */
+
+			/* If we want to include destination into calculations,
+			 * then make dest addresses cued with mult=1 (XOR).
+			 */
+			ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+					DMA_CUED_XOR_HB :
+					DMA_CUED_XOR_BASE |
+						(1 << DMA_CUED_MULT1_OFF);
+			qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+					DMA_CUED_XOR_HB :
+					DMA_CUED_XOR_BASE |
+						(1 << DMA_CUED_MULT1_OFF);
+
+			/* Setup destination(s) in RXOR slot(s) */
+			iter = ppc440spe_get_group_entry(sw_desc, index++);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						paddr ? ppath : qpath,
+						paddr ? paddr : qaddr, 0);
+			if (!addr) {
+				/* two destinations */
+				iter = ppc440spe_get_group_entry(sw_desc,
+								 index++);
+				ppc440spe_desc_set_dest_addr(iter, chan,
+						qpath, qaddr, 0);
+			}
+
+			if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) {
+				/* Setup destination(s) in remaining WXOR
+				 * slots
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc,
+								 index);
+				if (addr) {
+					/* one destination */
+					list_for_each_entry_from(iter,
+					    &sw_desc->group_list,
+					    chain_node)
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							addr, 0);
+
+				} else {
+					/* two destinations */
+					list_for_each_entry_from(iter,
+					    &sw_desc->group_list,
+					    chain_node) {
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							paddr, 0);
+						ppc440spe_desc_set_dest_addr(
+							iter, chan,
+							DMA_CUED_XOR_BASE,
+							qaddr, 1);
+					}
+				}
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 descriptors have only 1 destination, so there are
+		 * two chains - one for each dest.
+		 * If we want to include destination into calculations,
+		 * then make dest addresses cued with mult=1 (XOR).
+		 */
+		ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
+				DMA_CUED_XOR_HB :
+				DMA_CUED_XOR_BASE |
+					(1 << DMA_CUED_MULT1_OFF);
+
+		qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
+				DMA_CUED_XOR_HB :
+				DMA_CUED_XOR_BASE |
+					(1 << DMA_CUED_MULT1_OFF);
+
+		iter = ppc440spe_get_group_entry(sw_desc, 0);
+		for (i = 0; i < sw_desc->descs_per_op; i++) {
+			ppc440spe_desc_set_dest_addr(iter, chan,
+				paddr ? ppath : qpath,
+				paddr ? paddr : qaddr, 0);
+			iter = list_entry(iter->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+		}
+
+		if (!addr) {
+			/* Two destinations; setup Q here */
+			iter = ppc440spe_get_group_entry(sw_desc,
+				sw_desc->descs_per_op);
+			for (i = 0; i < sw_desc->descs_per_op; i++) {
+				ppc440spe_desc_set_dest_addr(iter,
+					chan, qpath, qaddr, 0);
+				iter = list_entry(iter->chain_node.next,
+						struct ppc440spe_adma_desc_slot,
+						chain_node);
+			}
+		}
+
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor
+ * for the PQ_ZERO_SUM operation
+ */
+static void ppc440spe_adma_pqzero_sum_set_dest(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t paddr, dma_addr_t qaddr)
+{
+	struct ppc440spe_adma_desc_slot *iter, *end;
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t addr = 0;
+	int idx;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	/* walk through the WXOR source list and set P/Q-destinations
+	 * for each slot
+	 */
+	idx = (paddr && qaddr) ? 2 : 1;
+	/* set end */
+	list_for_each_entry_reverse(end, &sw_desc->group_list,
+				    chain_node) {
+		if (!(--idx))
+			break;
+	}
+	/* set start */
+	idx = (paddr && qaddr) ? 2 : 1;
+	iter = ppc440spe_get_group_entry(sw_desc, idx);
+
+	if (paddr && qaddr) {
+		/* two destinations */
+		list_for_each_entry_from(iter, &sw_desc->group_list,
+					 chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, paddr, 0);
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, qaddr, 1);
+		}
+	} else {
+		/* one destination */
+		addr = paddr ? paddr : qaddr;
+		list_for_each_entry_from(iter, &sw_desc->group_list,
+					 chain_node) {
+			if (unlikely(iter == end))
+				break;
+			ppc440spe_desc_set_dest_addr(iter, chan,
+						DMA_CUED_XOR_BASE, addr, 0);
+		}
+	}
+
+	/*  The remaining descriptors are DATACHECK. These have no need in
+	 * destination. Actually, these destinations are used there
+	 * as sources for check operation. So, set addr as source.
+	 */
+	ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
+
+	if (!addr) {
+		end = list_entry(end->chain_node.next,
+				 struct ppc440spe_adma_desc_slot, chain_node);
+		ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
+	}
+}
+
+/**
+ * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
+ */
+static inline void ppc440spe_desc_set_xor_src_cnt(
+			struct ppc440spe_adma_desc_slot *desc,
+			int src_cnt)
+{
+	struct xor_cb *hw_desc = desc->hw_desc;
+
+	hw_desc->cbc &= ~XOR_CDCR_OAC_MSK;
+	hw_desc->cbc |= src_cnt;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_pq_set_src(struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+	dma_addr_t haddr = 0;
+	struct ppc440spe_adma_desc_slot *iter = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		/* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
+		 */
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			/* RXOR-only or RXOR/WXOR operation */
+			int iskip = test_bit(PPC440SPE_DESC_RXOR12,
+				&sw_desc->flags) ?  2 : 3;
+
+			if (index == 0) {
+				/* 1st slot (RXOR) */
+				/* setup sources region (R1-2-3, R1-2-4,
+				 * or R1-2-5)
+				 */
+				if (test_bit(PPC440SPE_DESC_RXOR12,
+						&sw_desc->flags))
+					haddr = DMA_RXOR12 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR123,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR123 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR124,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR124 <<
+						DMA_CUED_REGION_OFF;
+				else if (test_bit(PPC440SPE_DESC_RXOR125,
+				    &sw_desc->flags))
+					haddr = DMA_RXOR125 <<
+						DMA_CUED_REGION_OFF;
+				else
+					BUG();
+				haddr |= DMA_CUED_XOR_BASE;
+				iter = ppc440spe_get_group_entry(sw_desc, 0);
+			} else if (index < iskip) {
+				/* 1st slot (RXOR)
+				 * shall actually set source address only once
+				 * instead of first <iskip>
+				 */
+				iter = NULL;
+			} else {
+				/* 2nd/3d and next slots (WXOR);
+				 * skip first slot with RXOR
+				 */
+				haddr = DMA_CUED_XOR_HB;
+				iter = ppc440spe_get_group_entry(sw_desc,
+				    index - iskip + sw_desc->dst_cnt);
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only operation; skip first slots with
+			 * zeroing destinations
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			haddr = DMA_CUED_XOR_HB;
+			iter = ppc440spe_get_group_entry(sw_desc,
+					index + znum);
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
+
+			if (!index &&
+			    test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
+			    sw_desc->dst_cnt == 2) {
+				/* if we have two destinations for RXOR, then
+				 * setup source in the second descr too
+				 */
+				iter = ppc440spe_get_group_entry(sw_desc, 1);
+				ppc440spe_desc_set_src_addr(iter, chan, 0,
+					haddr, addr);
+			}
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		/* DMA2 may do Biskup */
+		iter = sw_desc->group_head;
+		if (iter->dst_cnt == 2) {
+			/* both P & Q calculations required; set P src here */
+			ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+
+			/* this is for Q */
+			iter = ppc440spe_get_group_entry(sw_desc,
+				sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
+ */
+static void ppc440spe_adma_memcpy_xor_set_src(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		dma_addr_t addr, int index)
+{
+	struct ppc440spe_adma_chan *chan;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+	sw_desc = sw_desc->group_head;
+
+	if (likely(sw_desc))
+		ppc440spe_desc_set_src_addr(sw_desc, chan, index, 0, addr);
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_inc_addr  -
+ */
+static void ppc440spe_adma_dma2rxor_inc_addr(
+		struct ppc440spe_adma_desc_slot *desc,
+		struct ppc440spe_rxor *cursor, int index, int src_cnt)
+{
+	cursor->addr_count++;
+	if (index == src_cnt - 1) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+	} else if (cursor->addr_count == XOR_MAX_OPS) {
+		ppc440spe_desc_set_xor_src_cnt(desc, cursor->addr_count);
+		cursor->addr_count = 0;
+		cursor->desc_count++;
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_prep_src - setup RXOR types in DMA2 CDB
+ */
+static int ppc440spe_adma_dma2rxor_prep_src(
+		struct ppc440spe_adma_desc_slot *hdesc,
+		struct ppc440spe_rxor *cursor, int index,
+		int src_cnt, u32 addr)
+{
+	int rval = 0;
+	u32 sign;
+	struct ppc440spe_adma_desc_slot *desc = hdesc;
+	int i;
+
+	for (i = 0; i < cursor->desc_count; i++) {
+		desc = list_entry(hdesc->chain_node.next,
+				  struct ppc440spe_adma_desc_slot,
+				  chain_node);
+	}
+
+	switch (cursor->state) {
+	case 0:
+		if (addr == cursor->addrl + cursor->len) {
+			/* direct RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			if (index == src_cnt-1) {
+				ppc440spe_rxor_set_region(desc,
+					cursor->addr_count,
+					DMA_RXOR12 << DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (cursor->addrl == addr + cursor->len) {
+			/* reverse RXOR */
+			cursor->state = 1;
+			cursor->xor_count++;
+			set_bit(cursor->addr_count, &desc->reverse_flags[0]);
+			if (index == src_cnt-1) {
+				ppc440spe_rxor_set_region(desc,
+					cursor->addr_count,
+					DMA_RXOR12 << DMA_CUED_REGION_OFF);
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else {
+			printk(KERN_ERR "Cannot build "
+				"DMA2 RXOR command block.\n");
+			BUG();
+		}
+		break;
+	case 1:
+		sign = test_bit(cursor->addr_count,
+				desc->reverse_flags)
+			? -1 : 1;
+		if (index == src_cnt-2 || (sign == -1
+			&& addr != cursor->addrl - 2*cursor->len)) {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR12 << DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		} else if (addr == cursor->addrl + 2*sign*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR123 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 3*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR124 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else if (addr == cursor->addrl + 4*cursor->len) {
+			cursor->state = 2;
+			cursor->xor_count = 0;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR125 << DMA_CUED_REGION_OFF);
+			if (index == src_cnt-1) {
+				ppc440spe_adma_dma2rxor_inc_addr(
+					desc, cursor, index, src_cnt);
+			}
+		} else {
+			cursor->state = 0;
+			cursor->xor_count = 1;
+			cursor->addrl = addr;
+			ppc440spe_rxor_set_region(desc,
+				cursor->addr_count,
+				DMA_RXOR12 << DMA_CUED_REGION_OFF);
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		}
+		break;
+	case 2:
+		cursor->state = 0;
+		cursor->addrl = addr;
+		cursor->xor_count++;
+		if (index) {
+			ppc440spe_adma_dma2rxor_inc_addr(
+				desc, cursor, index, src_cnt);
+		}
+		break;
+	}
+
+	return rval;
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_src(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, dma_addr_t addr)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index addr */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+				struct ppc440spe_adma_desc_slot, chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+
+	if (test_bit(k-1, desc->reverse_flags)) {
+		/* reverse operand order; put last op in RXOR group */
+		if (index == op - 1)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	} else {
+		/* direct operand order; put first op in RXOR group */
+		if (index == lop)
+			ppc440spe_rxor_set_src(desc, k - 1, addr);
+	}
+}
+
+/**
+ * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that
+ *	ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
+ */
+static void ppc440spe_adma_dma2rxor_set_mult(
+		struct ppc440spe_adma_desc_slot *desc,
+		int index, u8 mult)
+{
+	struct xor_cb *xcb = desc->hw_desc;
+	int k = 0, op = 0, lop = 0;
+
+	/* get the RXOR operand which corresponds to index mult */
+	while (op <= index) {
+		lop = op;
+		if (k == XOR_MAX_OPS) {
+			k = 0;
+			desc = list_entry(desc->chain_node.next,
+					  struct ppc440spe_adma_desc_slot,
+					  chain_node);
+			xcb = desc->hw_desc;
+
+		}
+		if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
+		    (DMA_RXOR12 << DMA_CUED_REGION_OFF))
+			op += 2;
+		else
+			op += 3;
+	}
+
+	BUG_ON(k < 1);
+	if (test_bit(k-1, desc->reverse_flags)) {
+		/* reverse order */
+		ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
+	} else {
+		/* direct order */
+		ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
+	}
+}
+
+/**
+ * ppc440spe_init_rxor_cursor -
+ */
+static void ppc440spe_init_rxor_cursor(struct ppc440spe_rxor *cursor)
+{
+	memset(cursor, 0, sizeof(struct ppc440spe_rxor));
+	cursor->state = 2;
+}
+
+/**
+ * ppc440spe_adma_pq_set_src_mult - set multiplication coefficient into
+ * descriptor for the PQXOR operation
+ */
+static void ppc440spe_adma_pq_set_src_mult(
+		struct ppc440spe_adma_desc_slot *sw_desc,
+		unsigned char mult, int index, int dst_pos)
+{
+	struct ppc440spe_adma_chan *chan;
+	u32 mult_idx, mult_dst;
+	struct ppc440spe_adma_desc_slot *iter = NULL, *iter1 = NULL;
+
+	chan = to_ppc440spe_adma_chan(sw_desc->async_tx.chan);
+
+	switch (chan->device->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) {
+			int region = test_bit(PPC440SPE_DESC_RXOR12,
+					&sw_desc->flags) ? 2 : 3;
+
+			if (index < region) {
+				/* RXOR multipliers */
+				iter = ppc440spe_get_group_entry(sw_desc,
+					sw_desc->dst_cnt - 1);
+				if (sw_desc->dst_cnt == 2)
+					iter1 = ppc440spe_get_group_entry(
+							sw_desc, 0);
+
+				mult_idx = DMA_CUED_MULT1_OFF + (index << 3);
+				mult_dst = DMA_CDB_SG_SRC;
+			} else {
+				/* WXOR multiplier */
+				iter = ppc440spe_get_group_entry(sw_desc,
+							index - region +
+							sw_desc->dst_cnt);
+				mult_idx = DMA_CUED_MULT1_OFF;
+				mult_dst = dst_pos ? DMA_CDB_SG_DST2 :
+						     DMA_CDB_SG_DST1;
+			}
+		} else {
+			int znum = 0;
+
+			/* WXOR-only;
+			 * skip first slots with destinations (if ZERO_DST has
+			 * place)
+			 */
+			if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
+				znum++;
+			if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
+				znum++;
+
+			iter = ppc440spe_get_group_entry(sw_desc, index + znum);
+			mult_idx = DMA_CUED_MULT1_OFF;
+			mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
+		}
+
+		if (likely(iter)) {
+			ppc440spe_desc_set_src_mult(iter, chan,
+				mult_idx, mult_dst, mult);
+
+			if (unlikely(iter1)) {
+				/* if we have two destinations for RXOR, then
+				 * we've just set Q mult. Set-up P now.
+				 */
+				ppc440spe_desc_set_src_mult(iter1, chan,
+					mult_idx, mult_dst, 1);
+			}
+
+		}
+		break;
+
+	case PPC440SPE_XOR_ID:
+		iter = sw_desc->group_head;
+		if (sw_desc->dst_cnt == 2) {
+			/* both P & Q calculations required; set P mult here */
+			ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
+
+			/* and then set Q mult */
+			iter = ppc440spe_get_group_entry(sw_desc,
+			       sw_desc->descs_per_op);
+		}
+		ppc440spe_adma_dma2rxor_set_mult(iter, index, mult);
+		break;
+	}
+}
+
+/**
+ * ppc440spe_adma_free_chan_resources - free the resources allocated
+ */
+static void ppc440spe_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	struct ppc440spe_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+	spin_lock_bh(&ppc440spe_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &ppc440spe_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(iter, _iter,
+			&ppc440spe_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		ppc440spe_chan->slots_allocated--;
+	}
+	ppc440spe_chan->last_used = NULL;
+
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d %s slots_allocated %d\n",
+		ppc440spe_chan->device->id,
+		__func__, ppc440spe_chan->slots_allocated);
+	spin_unlock_bh(&ppc440spe_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * ppc440spe_adma_tx_status - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ * @txstate: a holder for the current state of the channel
+ */
+static enum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
+			dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+	enum dma_status ret;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	ppc440spe_adma_slot_cleanup(ppc440spe_chan);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+/**
+ * ppc440spe_adma_eot_handler - end of transfer interrupt handler
+ */
+static irqreturn_t ppc440spe_adma_eot_handler(int irq, void *data)
+{
+	struct ppc440spe_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+	ppc440spe_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_adma_err_handler - DMA error interrupt handler;
+ *	do the same things as a eot handler
+ */
+static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
+{
+	struct ppc440spe_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+	ppc440spe_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ppc440spe_test_callback - called when test operation has been done
+ */
+static void ppc440spe_test_callback(void *unused)
+{
+	complete(&ppc440spe_r6_test_comp);
+}
+
+/**
+ * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
+ */
+static void ppc440spe_adma_issue_pending(struct dma_chan *chan)
+{
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+
+	ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+	dev_dbg(ppc440spe_chan->device->common.dev,
+		"ppc440spe adma%d: %s %d \n", ppc440spe_chan->device->id,
+		__func__, ppc440spe_chan->pending);
+
+	if (ppc440spe_chan->pending) {
+		ppc440spe_chan->pending = 0;
+		ppc440spe_chan_append(ppc440spe_chan);
+	}
+}
+
+/**
+ * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines
+ *	use FIFOs (as opposite to chains used in XOR) so this is a XOR
+ *	specific operation)
+ */
+static void ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(chan->device->common.dev,
+		"ppc440spe adma%d: %s\n", chan->device->id, __func__);
+
+	spin_lock_bh(&chan->lock);
+	slot_cnt = ppc440spe_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = ppc440spe_adma_alloc_slots(chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		group_start = sw_desc->group_head;
+		list_splice_init(&sw_desc->group_list, &chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		ppc440spe_desc_init_null_xor(group_start);
+
+		cookie = dma_cookie_assign(&sw_desc->async_tx);
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		chan->common.completed_cookie = cookie - 1;
+
+		/* channel should not be busy */
+		BUG_ON(ppc440spe_chan_is_busy(chan));
+
+		/* set the descriptor address */
+		ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
+
+		/* run the descriptor */
+		ppc440spe_chan_run(chan);
+	} else
+		printk(KERN_ERR "ppc440spe adma%d"
+			" failed to allocate null descriptor\n",
+			chan->device->id);
+	spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully.
+ *	For this we just perform one WXOR operation with the same source
+ *	and destination addresses, the GF-multiplier is 1; so if RAID-6
+ *	capabilities are enabled then we'll get src/dst filled with zero.
+ */
+static int ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
+{
+	struct ppc440spe_adma_desc_slot *sw_desc, *iter;
+	struct page *pg;
+	char *a;
+	dma_addr_t dma_addr, addrs[2];
+	unsigned long op = 0;
+	int rval = 0;
+
+	set_bit(PPC440SPE_DESC_WXOR, &op);
+
+	pg = alloc_page(GFP_KERNEL);
+	if (!pg)
+		return -ENOMEM;
+
+	spin_lock_bh(&chan->lock);
+	sw_desc = ppc440spe_adma_alloc_slots(chan, 1, 1);
+	if (sw_desc) {
+		/* 1 src, 1 dsr, int_ena, WXOR */
+		ppc440spe_desc_init_dma01pq(sw_desc, 1, 1, 1, op);
+		list_for_each_entry(iter, &sw_desc->group_list, chain_node) {
+			ppc440spe_desc_set_byte_count(iter, chan, PAGE_SIZE);
+			iter->unmap_len = PAGE_SIZE;
+		}
+	} else {
+		rval = -EFAULT;
+		spin_unlock_bh(&chan->lock);
+		goto exit;
+	}
+	spin_unlock_bh(&chan->lock);
+
+	/* Fill the test page with ones */
+	memset(page_address(pg), 0xFF, PAGE_SIZE);
+	dma_addr = dma_map_page(chan->device->dev, pg, 0,
+				PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+	/* Setup addresses */
+	ppc440spe_adma_pq_set_src(sw_desc, dma_addr, 0);
+	ppc440spe_adma_pq_set_src_mult(sw_desc, 1, 0, 0);
+	addrs[0] = dma_addr;
+	addrs[1] = 0;
+	ppc440spe_adma_pq_set_dest(sw_desc, addrs, DMA_PREP_PQ_DISABLE_Q);
+
+	async_tx_ack(&sw_desc->async_tx);
+	sw_desc->async_tx.callback = ppc440spe_test_callback;
+	sw_desc->async_tx.callback_param = NULL;
+
+	init_completion(&ppc440spe_r6_test_comp);
+
+	ppc440spe_adma_tx_submit(&sw_desc->async_tx);
+	ppc440spe_adma_issue_pending(&chan->common);
+
+	wait_for_completion(&ppc440spe_r6_test_comp);
+
+	/* Now check if the test page is zeroed */
+	a = page_address(pg);
+	if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) {
+		/* page is zero - RAID-6 enabled */
+		rval = 0;
+	} else {
+		/* RAID-6 was not enabled */
+		rval = -EINVAL;
+	}
+exit:
+	__free_page(pg);
+	return rval;
+}
+
+static void ppc440spe_adma_init_capabilities(struct ppc440spe_adma_device *adev)
+{
+	switch (adev->id) {
+	case PPC440SPE_DMA0_ID:
+	case PPC440SPE_DMA1_ID:
+		dma_cap_set(DMA_MEMCPY, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ_VAL, adev->common.cap_mask);
+		dma_cap_set(DMA_XOR_VAL, adev->common.cap_mask);
+		break;
+	case PPC440SPE_XOR_ID:
+		dma_cap_set(DMA_XOR, adev->common.cap_mask);
+		dma_cap_set(DMA_PQ, adev->common.cap_mask);
+		dma_cap_set(DMA_INTERRUPT, adev->common.cap_mask);
+		adev->common.cap_mask = adev->common.cap_mask;
+		break;
+	}
+
+	/* Set base routines */
+	adev->common.device_alloc_chan_resources =
+				ppc440spe_adma_alloc_chan_resources;
+	adev->common.device_free_chan_resources =
+				ppc440spe_adma_free_chan_resources;
+	adev->common.device_tx_status = ppc440spe_adma_tx_status;
+	adev->common.device_issue_pending = ppc440spe_adma_issue_pending;
+
+	/* Set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, adev->common.cap_mask)) {
+		adev->common.device_prep_dma_memcpy =
+			ppc440spe_adma_prep_dma_memcpy;
+	}
+	if (dma_has_cap(DMA_XOR, adev->common.cap_mask)) {
+		adev->common.max_xor = XOR_MAX_OPS;
+		adev->common.device_prep_dma_xor =
+			ppc440spe_adma_prep_dma_xor;
+	}
+	if (dma_has_cap(DMA_PQ, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			dma_set_maxpq(&adev->common,
+				DMA0_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+			break;
+		case PPC440SPE_DMA1_ID:
+			dma_set_maxpq(&adev->common,
+				DMA1_FIFO_SIZE / sizeof(struct dma_cdb), 0);
+			break;
+		case PPC440SPE_XOR_ID:
+			adev->common.max_pq = XOR_MAX_OPS * 3;
+			break;
+		}
+		adev->common.device_prep_dma_pq =
+			ppc440spe_adma_prep_dma_pq;
+	}
+	if (dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_pq = DMA0_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_pq = DMA1_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		}
+		adev->common.device_prep_dma_pq_val =
+			ppc440spe_adma_prep_dma_pqzero_sum;
+	}
+	if (dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask)) {
+		switch (adev->id) {
+		case PPC440SPE_DMA0_ID:
+			adev->common.max_xor = DMA0_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		case PPC440SPE_DMA1_ID:
+			adev->common.max_xor = DMA1_FIFO_SIZE /
+						sizeof(struct dma_cdb);
+			break;
+		}
+		adev->common.device_prep_dma_xor_val =
+			ppc440spe_adma_prep_dma_xor_zero_sum;
+	}
+	if (dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask)) {
+		adev->common.device_prep_dma_interrupt =
+			ppc440spe_adma_prep_dma_interrupt;
+	}
+	pr_info("%s: AMCC(R) PPC440SP(E) ADMA Engine: "
+	  "( %s%s%s%s%s%s)\n",
+	  dev_name(adev->dev),
+	  dma_has_cap(DMA_PQ, adev->common.cap_mask) ? "pq " : "",
+	  dma_has_cap(DMA_PQ_VAL, adev->common.cap_mask) ? "pq_val " : "",
+	  dma_has_cap(DMA_XOR, adev->common.cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_XOR_VAL, adev->common.cap_mask) ? "xor_val " : "",
+	  dma_has_cap(DMA_MEMCPY, adev->common.cap_mask) ? "memcpy " : "",
+	  dma_has_cap(DMA_INTERRUPT, adev->common.cap_mask) ? "intr " : "");
+}
+
+static int ppc440spe_adma_setup_irqs(struct ppc440spe_adma_device *adev,
+				     struct ppc440spe_adma_chan *chan,
+				     int *initcode)
+{
+	struct platform_device *ofdev;
+	struct device_node *np;
+	int ret;
+
+	ofdev = container_of(adev->dev, struct platform_device, dev);
+	np = ofdev->dev.of_node;
+	if (adev->id != PPC440SPE_XOR_ID) {
+		adev->err_irq = irq_of_parse_and_map(np, 1);
+		if (!adev->err_irq) {
+			dev_warn(adev->dev, "no err irq resource?\n");
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			adev->err_irq = -ENXIO;
+		} else
+			atomic_inc(&ppc440spe_adma_err_irq_ref);
+	} else {
+		adev->err_irq = -ENXIO;
+	}
+
+	adev->irq = irq_of_parse_and_map(np, 0);
+	if (!adev->irq) {
+		dev_err(adev->dev, "no irq resource\n");
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -ENXIO;
+		goto err_irq_map;
+	}
+	dev_dbg(adev->dev, "irq %d, err irq %d\n",
+		adev->irq, adev->err_irq);
+
+	ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
+			  0, dev_driver_string(adev->dev), chan);
+	if (ret) {
+		dev_err(adev->dev, "can't request irq %d\n",
+			adev->irq);
+		*initcode = PPC_ADMA_INIT_IRQ1;
+		ret = -EIO;
+		goto err_req1;
+	}
+
+	/* only DMA engines have a separate error IRQ
+	 * so it's Ok if err_irq < 0 in XOR engine case.
+	 */
+	if (adev->err_irq > 0) {
+		/* both DMA engines share common error IRQ */
+		ret = request_irq(adev->err_irq,
+				  ppc440spe_adma_err_handler,
+				  IRQF_SHARED,
+				  dev_driver_string(adev->dev),
+				  chan);
+		if (ret) {
+			dev_err(adev->dev, "can't request irq %d\n",
+				adev->err_irq);
+			*initcode = PPC_ADMA_INIT_IRQ2;
+			ret = -EIO;
+			goto err_req2;
+		}
+	}
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* enable XOR engine interrupts */
+		iowrite32be(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			    XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT,
+			    &adev->xor_reg->ier);
+	} else {
+		u32 mask, enable;
+
+		np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+		if (!np) {
+			pr_err("%s: can't find I2O device tree node\n",
+				__func__);
+			ret = -ENODEV;
+			goto err_req2;
+		}
+		adev->i2o_reg = of_iomap(np, 0);
+		if (!adev->i2o_reg) {
+			pr_err("%s: failed to map I2O registers\n", __func__);
+			of_node_put(np);
+			ret = -EINVAL;
+			goto err_req2;
+		}
+		of_node_put(np);
+		/* Unmask 'CS FIFO Attention' interrupts and
+		 * enable generating interrupts on errors
+		 */
+		enable = (adev->id == PPC440SPE_DMA0_ID) ?
+			 ~(I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+			 ~(I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) & enable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	return 0;
+
+err_req2:
+	free_irq(adev->irq, chan);
+err_req1:
+	irq_dispose_mapping(adev->irq);
+err_irq_map:
+	if (adev->err_irq > 0) {
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref))
+			irq_dispose_mapping(adev->err_irq);
+	}
+	return ret;
+}
+
+static void ppc440spe_adma_release_irqs(struct ppc440spe_adma_device *adev,
+					struct ppc440spe_adma_chan *chan)
+{
+	u32 mask, disable;
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		/* disable XOR engine interrupts */
+		mask = ioread32be(&adev->xor_reg->ier);
+		mask &= ~(XOR_IE_CBCIE_BIT | XOR_IE_ICBIE_BIT |
+			  XOR_IE_ICIE_BIT | XOR_IE_RPTIE_BIT);
+		iowrite32be(mask, &adev->xor_reg->ier);
+	} else {
+		/* disable DMAx engine interrupts */
+		disable = (adev->id == PPC440SPE_DMA0_ID) ?
+			  (I2O_IOPIM_P0SNE | I2O_IOPIM_P0EM) :
+			  (I2O_IOPIM_P1SNE | I2O_IOPIM_P1EM);
+		mask = ioread32(&adev->i2o_reg->iopim) | disable;
+		iowrite32(mask, &adev->i2o_reg->iopim);
+	}
+	free_irq(adev->irq, chan);
+	irq_dispose_mapping(adev->irq);
+	if (adev->err_irq > 0) {
+		free_irq(adev->err_irq, chan);
+		if (atomic_dec_and_test(&ppc440spe_adma_err_irq_ref)) {
+			irq_dispose_mapping(adev->err_irq);
+			iounmap(adev->i2o_reg);
+		}
+	}
+}
+
+/**
+ * ppc440spe_adma_probe - probe the asynch device
+ */
+static int ppc440spe_adma_probe(struct platform_device *ofdev)
+{
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	struct ppc440spe_adma_device *adev;
+	struct ppc440spe_adma_chan *chan;
+	struct ppc_dma_chan_ref *ref, *_ref;
+	int ret = 0, initcode = PPC_ADMA_INIT_OK;
+	const u32 *idx;
+	int len;
+	void *regs;
+	u32 id, pool_size;
+
+	if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
+		id = PPC440SPE_XOR_ID;
+		/* As far as the XOR engine is concerned, it does not
+		 * use FIFOs but uses linked list. So there is no dependency
+		 * between pool size to allocate and the engine configuration.
+		 */
+		pool_size = PAGE_SIZE << 1;
+	} else {
+		/* it is DMA0 or DMA1 */
+		idx = of_get_property(np, "cell-index", &len);
+		if (!idx || (len != sizeof(u32))) {
+			dev_err(&ofdev->dev, "Device node %pOF has missing "
+				"or invalid cell-index property\n",
+				np);
+			return -EINVAL;
+		}
+		id = *idx;
+		/* DMA0,1 engines use FIFO to maintain CDBs, so we
+		 * should allocate the pool accordingly to size of this
+		 * FIFO. Thus, the pool size depends on the FIFO depth:
+		 * how much CDBs pointers the FIFO may contain then so
+		 * much CDBs we should provide in the pool.
+		 * That is
+		 *   CDB size = 32B;
+		 *   CDBs number = (DMA0_FIFO_SIZE >> 3);
+		 *   Pool size = CDBs number * CDB size =
+		 *      = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
+		 */
+		pool_size = (id == PPC440SPE_DMA0_ID) ?
+			    DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		pool_size <<= 2;
+	}
+
+	if (of_address_to_resource(np, 0, &res)) {
+		dev_err(&ofdev->dev, "failed to get memory resource\n");
+		initcode = PPC_ADMA_INIT_MEMRES;
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!request_mem_region(res.start, resource_size(&res),
+				dev_driver_string(&ofdev->dev))) {
+		dev_err(&ofdev->dev, "failed to request memory region %pR\n",
+			&res);
+		initcode = PPC_ADMA_INIT_MEMREG;
+		ret = -EBUSY;
+		goto out;
+	}
+
+	/* create a device */
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev) {
+		initcode = PPC_ADMA_INIT_ALLOC;
+		ret = -ENOMEM;
+		goto err_adev_alloc;
+	}
+
+	adev->id = id;
+	adev->pool_size = pool_size;
+	/* allocate coherent memory for hardware descriptors */
+	adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
+					adev->pool_size, &adev->dma_desc_pool,
+					GFP_KERNEL);
+	if (adev->dma_desc_pool_virt == NULL) {
+		dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent "
+			"memory for hardware descriptors\n",
+			adev->pool_size);
+		initcode = PPC_ADMA_INIT_COHERENT;
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+	dev_dbg(&ofdev->dev, "allocated descriptor pool virt 0x%p phys 0x%llx\n",
+		adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
+
+	regs = ioremap(res.start, resource_size(&res));
+	if (!regs) {
+		dev_err(&ofdev->dev, "failed to ioremap regs!\n");
+		ret = -ENOMEM;
+		goto err_regs_alloc;
+	}
+
+	if (adev->id == PPC440SPE_XOR_ID) {
+		adev->xor_reg = regs;
+		/* Reset XOR */
+		iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
+		iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
+	} else {
+		size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
+				   DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
+		adev->dma_reg = regs;
+		/* DMAx_FIFO_SIZE is defined in bytes,
+		 * <fsiz> - is defined in number of CDB pointers (8byte).
+		 * DMA FIFO Length = CSlength + CPlength, where
+		 * CSlength = CPlength = (fsiz + 1) * 8.
+		 */
+		iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
+			  &adev->dma_reg->fsiz);
+		/* Configure DMA engine */
+		iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
+			  &adev->dma_reg->cfg);
+		/* Clear Status */
+		iowrite32(~0, &adev->dma_reg->dsts);
+	}
+
+	adev->dev = &ofdev->dev;
+	adev->common.dev = &ofdev->dev;
+	INIT_LIST_HEAD(&adev->common.channels);
+	platform_set_drvdata(ofdev, adev);
+
+	/* create a channel */
+	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+	if (!chan) {
+		initcode = PPC_ADMA_INIT_CHANNEL;
+		ret = -ENOMEM;
+		goto err_chan_alloc;
+	}
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->chain);
+	INIT_LIST_HEAD(&chan->all_slots);
+	chan->device = adev;
+	chan->common.device = &adev->common;
+	dma_cookie_init(&chan->common);
+	list_add_tail(&chan->common.device_node, &adev->common.channels);
+	tasklet_init(&chan->irq_tasklet, ppc440spe_adma_tasklet,
+		     (unsigned long)chan);
+
+	/* allocate and map helper pages for async validation or
+	 * async_mult/async_sum_product operations on DMA0/1.
+	 */
+	if (adev->id != PPC440SPE_XOR_ID) {
+		chan->pdest_page = alloc_page(GFP_KERNEL);
+		chan->qdest_page = alloc_page(GFP_KERNEL);
+		if (!chan->pdest_page ||
+		    !chan->qdest_page) {
+			if (chan->pdest_page)
+				__free_page(chan->pdest_page);
+			if (chan->qdest_page)
+				__free_page(chan->qdest_page);
+			ret = -ENOMEM;
+			goto err_page_alloc;
+		}
+		chan->pdest = dma_map_page(&ofdev->dev, chan->pdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+		chan->qdest = dma_map_page(&ofdev->dev, chan->qdest_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+	}
+
+	ref = kmalloc(sizeof(*ref), GFP_KERNEL);
+	if (ref) {
+		ref->chan = &chan->common;
+		INIT_LIST_HEAD(&ref->node);
+		list_add_tail(&ref->node, &ppc440spe_adma_chan_list);
+	} else {
+		dev_err(&ofdev->dev, "failed to allocate channel reference!\n");
+		ret = -ENOMEM;
+		goto err_ref_alloc;
+	}
+
+	ret = ppc440spe_adma_setup_irqs(adev, chan, &initcode);
+	if (ret)
+		goto err_irq;
+
+	ppc440spe_adma_init_capabilities(adev);
+
+	ret = dma_async_device_register(&adev->common);
+	if (ret) {
+		initcode = PPC_ADMA_INIT_REGISTER;
+		dev_err(&ofdev->dev, "failed to register dma device\n");
+		goto err_dev_reg;
+	}
+
+	goto out;
+
+err_dev_reg:
+	ppc440spe_adma_release_irqs(adev, chan);
+err_irq:
+	list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list, node) {
+		if (chan == to_ppc440spe_adma_chan(ref->chan)) {
+			list_del(&ref->node);
+			kfree(ref);
+		}
+	}
+err_ref_alloc:
+	if (adev->id != PPC440SPE_XOR_ID) {
+		dma_unmap_page(&ofdev->dev, chan->pdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(&ofdev->dev, chan->qdest,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(chan->pdest_page);
+		__free_page(chan->qdest_page);
+	}
+err_page_alloc:
+	kfree(chan);
+err_chan_alloc:
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+err_regs_alloc:
+	dma_free_coherent(adev->dev, adev->pool_size,
+			  adev->dma_desc_pool_virt,
+			  adev->dma_desc_pool);
+err_dma_alloc:
+	kfree(adev);
+err_adev_alloc:
+	release_mem_region(res.start, resource_size(&res));
+out:
+	if (id < PPC440SPE_ADMA_ENGINES_NUM)
+		ppc440spe_adma_devices[id] = initcode;
+
+	return ret;
+}
+
+/**
+ * ppc440spe_adma_remove - remove the asynch device
+ */
+static int ppc440spe_adma_remove(struct platform_device *ofdev)
+{
+	struct ppc440spe_adma_device *adev = platform_get_drvdata(ofdev);
+	struct device_node *np = ofdev->dev.of_node;
+	struct resource res;
+	struct dma_chan *chan, *_chan;
+	struct ppc_dma_chan_ref *ref, *_ref;
+	struct ppc440spe_adma_chan *ppc440spe_chan;
+
+	if (adev->id < PPC440SPE_ADMA_ENGINES_NUM)
+		ppc440spe_adma_devices[adev->id] = -1;
+
+	dma_async_device_unregister(&adev->common);
+
+	list_for_each_entry_safe(chan, _chan, &adev->common.channels,
+				 device_node) {
+		ppc440spe_chan = to_ppc440spe_adma_chan(chan);
+		ppc440spe_adma_release_irqs(adev, ppc440spe_chan);
+		tasklet_kill(&ppc440spe_chan->irq_tasklet);
+		if (adev->id != PPC440SPE_XOR_ID) {
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->pdest,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+			dma_unmap_page(&ofdev->dev, ppc440spe_chan->qdest,
+					PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(ppc440spe_chan->pdest_page);
+			__free_page(ppc440spe_chan->qdest_page);
+		}
+		list_for_each_entry_safe(ref, _ref, &ppc440spe_adma_chan_list,
+					 node) {
+			if (ppc440spe_chan ==
+			    to_ppc440spe_adma_chan(ref->chan)) {
+				list_del(&ref->node);
+				kfree(ref);
+			}
+		}
+		list_del(&chan->device_node);
+		kfree(ppc440spe_chan);
+	}
+
+	dma_free_coherent(adev->dev, adev->pool_size,
+			  adev->dma_desc_pool_virt, adev->dma_desc_pool);
+	if (adev->id == PPC440SPE_XOR_ID)
+		iounmap(adev->xor_reg);
+	else
+		iounmap(adev->dma_reg);
+	of_address_to_resource(np, 0, &res);
+	release_mem_region(res.start, resource_size(&res));
+	kfree(adev);
+	return 0;
+}
+
+/*
+ * /sys driver interface to enable h/w RAID-6 capabilities
+ * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/
+ * directory are "devices", "enable" and "poly".
+ * "devices" shows available engines.
+ * "enable" is used to enable RAID-6 capabilities or to check
+ * whether these has been activated.
+ * "poly" allows setting/checking used polynomial (for PPC440SPe only).
+ */
+
+static ssize_t devices_show(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	int i;
+
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++) {
+		if (ppc440spe_adma_devices[i] == -1)
+			continue;
+		size += snprintf(buf + size, PAGE_SIZE - size,
+				 "PPC440SP(E)-ADMA.%d: %s\n", i,
+				 ppc_adma_errors[ppc440spe_adma_devices[i]]);
+	}
+	return size;
+}
+static DRIVER_ATTR_RO(devices);
+
+static ssize_t enable_show(struct device_driver *dev, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE,
+			"PPC440SP(e) RAID-6 capabilities are %sABLED.\n",
+			ppc440spe_r6_enabled ? "EN" : "DIS");
+}
+
+static ssize_t enable_store(struct device_driver *dev, const char *buf,
+			    size_t count)
+{
+	unsigned long val;
+
+	if (!count || count > 11)
+		return -EINVAL;
+
+	if (!ppc440spe_r6_tchan)
+		return -EFAULT;
+
+	/* Write a key */
+	sscanf(buf, "%lx", &val);
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
+	isync();
+
+	/* Verify whether it really works now */
+	if (ppc440spe_test_raid6(ppc440spe_r6_tchan) == 0) {
+		pr_info("PPC440SP(e) RAID-6 has been activated "
+			"successfully\n");
+		ppc440spe_r6_enabled = 1;
+	} else {
+		pr_info("PPC440SP(e) RAID-6 hasn't been activated!"
+			" Error key ?\n");
+		ppc440spe_r6_enabled = 0;
+	}
+	return count;
+}
+static DRIVER_ATTR_RW(enable);
+
+static ssize_t poly_show(struct device_driver *dev, char *buf)
+{
+	ssize_t size = 0;
+	u32 reg;
+
+#ifdef CONFIG_440SP
+	/* 440SP has fixed polynomial */
+	reg = 0x4d;
+#else
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg >>= MQ0_CFBHL_POLY;
+	reg &= 0xFF;
+#endif
+
+	size = snprintf(buf, PAGE_SIZE, "PPC440SP(e) RAID-6 driver "
+			"uses 0x1%02x polynomial.\n", reg);
+	return size;
+}
+
+static ssize_t poly_store(struct device_driver *dev, const char *buf,
+			  size_t count)
+{
+	unsigned long reg, val;
+
+#ifdef CONFIG_440SP
+	/* 440SP uses default 0x14D polynomial only */
+	return -EINVAL;
+#endif
+
+	if (!count || count > 6)
+		return -EINVAL;
+
+	/* e.g., 0x14D or 0x11D */
+	sscanf(buf, "%lx", &val);
+
+	if (val & ~0x1FF)
+		return -EINVAL;
+
+	val &= 0xFF;
+	reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
+	reg &= ~(0xFF << MQ0_CFBHL_POLY);
+	reg |= val << MQ0_CFBHL_POLY;
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
+
+	return count;
+}
+static DRIVER_ATTR_RW(poly);
+
+/*
+ * Common initialisation for RAID engines; allocate memory for
+ * DMAx FIFOs, perform configuration common for all DMA engines.
+ * Further DMA engine specific configuration is done at probe time.
+ */
+static int ppc440spe_configure_raid_devices(void)
+{
+	struct device_node *np;
+	struct resource i2o_res;
+	struct i2o_regs __iomem *i2o_reg;
+	dcr_host_t i2o_dcr_host;
+	unsigned int dcr_base, dcr_len;
+	int i, ret;
+
+	np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe");
+	if (!np) {
+		pr_err("%s: can't find I2O device tree node\n",
+			__func__);
+		return -ENODEV;
+	}
+
+	if (of_address_to_resource(np, 0, &i2o_res)) {
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	i2o_reg = of_iomap(np, 0);
+	if (!i2o_reg) {
+		pr_err("%s: failed to map I2O registers\n", __func__);
+		of_node_put(np);
+		return -EINVAL;
+	}
+
+	/* Get I2O DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%pOF: can't get DCR registers base/len!\n", np);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+
+	i2o_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(i2o_dcr_host)) {
+		pr_err("%pOF: failed to map DCRs!\n", np);
+		of_node_put(np);
+		iounmap(i2o_reg);
+		return -ENODEV;
+	}
+	of_node_put(np);
+
+	/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share
+	 * the base address of FIFO memory space.
+	 * Actually we need twice more physical memory than programmed in the
+	 * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
+	 */
+	ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
+					 GFP_KERNEL);
+	if (!ppc440spe_dma_fifo_buf) {
+		pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
+		iounmap(i2o_reg);
+		dcr_unmap(i2o_dcr_host, dcr_len);
+		return -ENOMEM;
+	}
+
+	/*
+	 * Configure h/w
+	 */
+	/* Reset I2O/DMA */
+	mtdcri(SDR0, DCRN_SDR0_SRST, DCRN_SDR0_SRST_I2ODMA);
+	mtdcri(SDR0, DCRN_SDR0_SRST, 0);
+
+	/* Setup the base address of mmaped registers */
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
+	dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
+						I2O_REG_ENABLE);
+	dcr_unmap(i2o_dcr_host, dcr_len);
+
+	/* Setup FIFO memory space base address */
+	iowrite32(0, &i2o_reg->ifbah);
+	iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
+
+	/* set zero FIFO size for I2O, so the whole
+	 * ppc440spe_dma_fifo_buf is used by DMAs.
+	 * DMAx_FIFOs will be configured while probe.
+	 */
+	iowrite32(0, &i2o_reg->ifsiz);
+	iounmap(i2o_reg);
+
+	/* To prepare WXOR/RXOR functionality we need access to
+	 * Memory Queue Module DCRs (finally it will be enabled
+	 * via /sys interface of the ppc440spe ADMA driver).
+	 */
+	np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe");
+	if (!np) {
+		pr_err("%s: can't find MQ device tree node\n",
+			__func__);
+		ret = -ENODEV;
+		goto out_free;
+	}
+
+	/* Get MQ DCRs base */
+	dcr_base = dcr_resource_start(np, 0);
+	dcr_len = dcr_resource_len(np, 0);
+	if (!dcr_base && !dcr_len) {
+		pr_err("%pOF: can't get DCR registers base/len!\n", np);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+
+	ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len);
+	if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
+		pr_err("%pOF: failed to map DCRs!\n", np);
+		ret = -ENODEV;
+		goto out_mq;
+	}
+	of_node_put(np);
+	ppc440spe_mq_dcr_len = dcr_len;
+
+	/* Set HB alias */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
+
+	/* Set:
+	 * - LL transaction passing limit to 1;
+	 * - Memory controller cycle limit to 1;
+	 * - Galois Polynomial to 0x14d (default)
+	 */
+	dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL,
+		  (1 << MQ0_CFBHL_TPLM) | (1 << MQ0_CFBHL_HBCL) |
+		  (PPC440SPE_DEFAULT_POLY << MQ0_CFBHL_POLY));
+
+	atomic_set(&ppc440spe_adma_err_irq_ref, 0);
+	for (i = 0; i < PPC440SPE_ADMA_ENGINES_NUM; i++)
+		ppc440spe_adma_devices[i] = -1;
+
+	return 0;
+
+out_mq:
+	of_node_put(np);
+out_free:
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+static const struct of_device_id ppc440spe_adma_of_match[] = {
+	{ .compatible	= "ibm,dma-440spe", },
+	{ .compatible	= "amcc,xor-accelerator", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ppc440spe_adma_of_match);
+
+static struct platform_driver ppc440spe_adma_driver = {
+	.probe = ppc440spe_adma_probe,
+	.remove = ppc440spe_adma_remove,
+	.driver = {
+		.name = "PPC440SP(E)-ADMA",
+		.of_match_table = ppc440spe_adma_of_match,
+	},
+};
+
+static __init int ppc440spe_adma_init(void)
+{
+	int ret;
+
+	ret = ppc440spe_configure_raid_devices();
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&ppc440spe_adma_driver);
+	if (ret) {
+		pr_err("%s: failed to register platform driver\n",
+			__func__);
+		goto out_reg;
+	}
+
+	/* Initialization status */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_devices);
+	if (ret)
+		goto out_dev;
+
+	/* RAID-6 h/w enable entry */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_enable);
+	if (ret)
+		goto out_en;
+
+	/* GF polynomial to use */
+	ret = driver_create_file(&ppc440spe_adma_driver.driver,
+				 &driver_attr_poly);
+	if (!ret)
+		return ret;
+
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_enable);
+out_en:
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_devices);
+out_dev:
+	/* User will not be able to enable h/w RAID-6 */
+	pr_err("%s: failed to create RAID-6 driver interface\n",
+		__func__);
+	platform_driver_unregister(&ppc440spe_adma_driver);
+out_reg:
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+	return ret;
+}
+
+static void __exit ppc440spe_adma_exit(void)
+{
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_poly);
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_enable);
+	driver_remove_file(&ppc440spe_adma_driver.driver,
+			   &driver_attr_devices);
+	platform_driver_unregister(&ppc440spe_adma_driver);
+	dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
+	kfree(ppc440spe_dma_fifo_buf);
+}
+
+arch_initcall(ppc440spe_adma_init);
+module_exit(ppc440spe_adma_exit);
+
+MODULE_AUTHOR("Yuri Tikhonov <yur@emcraft.com>");
+MODULE_DESCRIPTION("PPC440SPE ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/ppc4xx/adma.h b/drivers/dma/ppc4xx/adma.h
new file mode 100644
index 0000000..26b7a5e
--- /dev/null
+++ b/drivers/dma/ppc4xx/adma.h
@@ -0,0 +1,193 @@
+/*
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of
+ * any kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_ADMA_H
+#define _PPC440SPE_ADMA_H
+
+#include <linux/types.h>
+#include "dma.h"
+#include "xor.h"
+
+#define to_ppc440spe_adma_chan(chan) \
+		container_of(chan, struct ppc440spe_adma_chan, common)
+#define to_ppc440spe_adma_device(dev) \
+		container_of(dev, struct ppc440spe_adma_device, common)
+#define tx_to_ppc440spe_adma_slot(tx) \
+		container_of(tx, struct ppc440spe_adma_desc_slot, async_tx)
+
+/* Default polynomial (for 440SP is only available) */
+#define PPC440SPE_DEFAULT_POLY	0x4d
+
+#define PPC440SPE_ADMA_ENGINES_NUM	(XOR_ENGINES_NUM + DMA_ENGINES_NUM)
+
+#define PPC440SPE_ADMA_WATCHDOG_MSEC	3
+#define PPC440SPE_ADMA_THRESHOLD	1
+
+#define PPC440SPE_DMA0_ID	0
+#define PPC440SPE_DMA1_ID	1
+#define PPC440SPE_XOR_ID	2
+
+#define PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT	0xFFFFFFUL
+/* this is the XOR_CBBCR width */
+#define PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT	(1 << 31)
+#define PPC440SPE_ADMA_ZERO_SUM_MAX_BYTE_COUNT PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT
+
+#define PPC440SPE_RXOR_RUN	0
+
+#define MQ0_CF2H_RXOR_BS_MASK	0x1FF
+
+#undef ADMA_LL_DEBUG
+
+/**
+ * struct ppc440spe_adma_device - internal representation of an ADMA device
+ * @dev: device
+ * @dma_reg: base for DMAx register access
+ * @xor_reg: base for XOR register access
+ * @i2o_reg: base for I2O register access
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @pool_size: size of the pool
+ * @irq: DMAx or XOR irq number
+ * @err_irq: DMAx error irq number
+ * @common: embedded struct dma_device
+ */
+struct ppc440spe_adma_device {
+	struct device *dev;
+	struct dma_regs __iomem *dma_reg;
+	struct xor_regs __iomem *xor_reg;
+	struct i2o_regs __iomem *i2o_reg;
+	int id;
+	void *dma_desc_pool_virt;
+	dma_addr_t dma_desc_pool;
+	size_t pool_size;
+	int irq;
+	int err_irq;
+	struct dma_device common;
+};
+
+/**
+ * struct ppc440spe_adma_chan - internal representation of an ADMA channel
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @hw_chain_inited: h/w descriptor chain initialization flag
+ * @irq_tasklet: bottom half where ppc440spe_adma_slot_cleanup runs
+ * @needs_unmap: if buffers should not be unmapped upon final processing
+ * @pdest_page: P destination page for async validate operation
+ * @qdest_page: Q destination page for async validate operation
+ * @pdest: P dma addr for async validate operation
+ * @qdest: Q dma addr for async validate operation
+ */
+struct ppc440spe_adma_chan {
+	spinlock_t lock;
+	struct ppc440spe_adma_device *device;
+	struct list_head chain;
+	struct dma_chan common;
+	struct list_head all_slots;
+	struct ppc440spe_adma_desc_slot *last_used;
+	int pending;
+	int slots_allocated;
+	int hw_chain_inited;
+	struct tasklet_struct irq_tasklet;
+	u8 needs_unmap;
+	struct page *pdest_page;
+	struct page *qdest_page;
+	dma_addr_t pdest;
+	dma_addr_t qdest;
+};
+
+struct ppc440spe_rxor {
+	u32 addrl;
+	u32 addrh;
+	int len;
+	int xor_count;
+	int addr_count;
+	int desc_count;
+	int state;
+};
+
+/**
+ * struct ppc440spe_adma_desc_slot - PPC440SPE-ADMA software descriptor
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @hw_next: pointer to the next descriptor in chain
+ * @async_tx: support for the async_tx api
+ * @slot_node: node on the iop_adma_chan.all_slots list
+ * @chain_node: node on the op_adma_chan.chain list
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ *              for example transfer lengths larger than the supported hw max
+ * @unmap_len: transaction bytecount
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @stride: currently chained or not
+ * @idx: pool index
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @src_cnt: number of sources set in this descriptor
+ * @dst_cnt: number of destinations set in the descriptor
+ * @slots_per_op: number of slots per operation
+ * @descs_per_op: number of slot per P/Q operation see comment
+ *                for ppc440spe_prep_dma_pqxor function
+ * @flags: desc state/type
+ * @reverse_flags: 1 if a corresponding rxor address uses reversed address order
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct ppc440spe_adma_desc_slot {
+	dma_addr_t phys;
+	struct ppc440spe_adma_desc_slot *group_head;
+	struct ppc440spe_adma_desc_slot *hw_next;
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head slot_node;
+	struct list_head chain_node; /* node in channel ops list */
+	struct list_head group_list; /* list */
+	unsigned int unmap_len;
+	void *hw_desc;
+	u16 stride;
+	u16 idx;
+	u16 slot_cnt;
+	u8 src_cnt;
+	u8 dst_cnt;
+	u8 slots_per_op;
+	u8 descs_per_op;
+	unsigned long flags;
+	unsigned long reverse_flags[8];
+
+#define PPC440SPE_DESC_INT	0	/* generate interrupt on complete */
+#define PPC440SPE_ZERO_P	1	/* clear P destionaion */
+#define PPC440SPE_ZERO_Q	2	/* clear Q destination */
+#define PPC440SPE_COHERENT	3	/* src/dst are coherent */
+
+#define PPC440SPE_DESC_WXOR	4	/* WXORs are in chain */
+#define PPC440SPE_DESC_RXOR	5	/* RXOR is in chain */
+
+#define PPC440SPE_DESC_RXOR123	8	/* CDB for RXOR123 operation */
+#define PPC440SPE_DESC_RXOR124	9	/* CDB for RXOR124 operation */
+#define PPC440SPE_DESC_RXOR125	10	/* CDB for RXOR125 operation */
+#define PPC440SPE_DESC_RXOR12	11	/* CDB for RXOR12 operation */
+#define PPC440SPE_DESC_RXOR_REV	12	/* CDB has srcs in reversed order */
+
+#define PPC440SPE_DESC_PCHECK	13
+#define PPC440SPE_DESC_QCHECK	14
+
+#define PPC440SPE_DESC_RXOR_MSK	0x3
+
+	struct ppc440spe_rxor rxor_cursor;
+
+	union {
+		u32 *xor_check_result;
+		u32 *crc32_result;
+	};
+};
+
+#endif /* _PPC440SPE_ADMA_H */
diff --git a/drivers/dma/ppc4xx/dma.h b/drivers/dma/ppc4xx/dma.h
new file mode 100644
index 0000000..bcde2df
--- /dev/null
+++ b/drivers/dma/ppc4xx/dma.h
@@ -0,0 +1,223 @@
+/*
+ * 440SPe's DMA engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of  the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef	_PPC440SPE_DMA_H
+#define _PPC440SPE_DMA_H
+
+#include <linux/types.h>
+
+/* Number of elements in the array with statical CDBs */
+#define	MAX_STAT_DMA_CDBS	16
+/* Number of DMA engines available on the contoller */
+#define DMA_ENGINES_NUM		2
+
+/* Maximum h/w supported number of destinations */
+#define DMA_DEST_MAX_NUM	2
+
+/* FIFO's params */
+#define DMA0_FIFO_SIZE		0x1000
+#define DMA1_FIFO_SIZE		0x1000
+#define DMA_FIFO_ENABLE		(1<<12)
+
+/* DMA Configuration Register. Data Transfer Engine PLB Priority: */
+#define DMA_CFG_DXEPR_LP	(0<<26)
+#define DMA_CFG_DXEPR_HP	(3<<26)
+#define DMA_CFG_DXEPR_HHP	(2<<26)
+#define DMA_CFG_DXEPR_HHHP	(1<<26)
+
+/* DMA Configuration Register. DMA FIFO Manager PLB Priority: */
+#define DMA_CFG_DFMPP_LP	(0<<23)
+#define DMA_CFG_DFMPP_HP	(3<<23)
+#define DMA_CFG_DFMPP_HHP	(2<<23)
+#define DMA_CFG_DFMPP_HHHP	(1<<23)
+
+/* DMA Configuration Register. Force 64-byte Alignment */
+#define DMA_CFG_FALGN		(1 << 19)
+
+/*UIC0:*/
+#define D0CPF_INT		(1<<12)
+#define D0CSF_INT		(1<<11)
+#define D1CPF_INT		(1<<10)
+#define D1CSF_INT		(1<<9)
+/*UIC1:*/
+#define DMAE_INT		(1<<9)
+
+/* I2O IOP Interrupt Mask Register */
+#define I2O_IOPIM_P0SNE		(1<<3)
+#define I2O_IOPIM_P0EM		(1<<5)
+#define I2O_IOPIM_P1SNE		(1<<6)
+#define I2O_IOPIM_P1EM		(1<<8)
+
+/* DMA CDB fields */
+#define DMA_CDB_MSK		(0xF)
+#define DMA_CDB_64B_ADDR	(1<<2)
+#define DMA_CDB_NO_INT		(1<<3)
+#define DMA_CDB_STATUS_MSK	(0x3)
+#define DMA_CDB_ADDR_MSK	(0xFFFFFFF0)
+
+/* DMA CDB OpCodes */
+#define DMA_CDB_OPC_NO_OP	(0x00)
+#define DMA_CDB_OPC_MV_SG1_SG2	(0x01)
+#define DMA_CDB_OPC_MULTICAST	(0x05)
+#define DMA_CDB_OPC_DFILL128	(0x24)
+#define DMA_CDB_OPC_DCHECK128	(0x23)
+
+#define DMA_CUED_XOR_BASE	(0x10000000)
+#define DMA_CUED_XOR_HB		(0x00000008)
+
+#ifdef CONFIG_440SP
+#define DMA_CUED_MULT1_OFF	0
+#define DMA_CUED_MULT2_OFF	8
+#define DMA_CUED_MULT3_OFF	16
+#define DMA_CUED_REGION_OFF	24
+#define DMA_CUED_XOR_WIN_MSK	(0xFC000000)
+#else
+#define DMA_CUED_MULT1_OFF	2
+#define DMA_CUED_MULT2_OFF	10
+#define DMA_CUED_MULT3_OFF	18
+#define DMA_CUED_REGION_OFF	26
+#define DMA_CUED_XOR_WIN_MSK	(0xF0000000)
+#endif
+
+#define DMA_CUED_REGION_MSK	0x3
+#define DMA_RXOR123		0x0
+#define DMA_RXOR124		0x1
+#define DMA_RXOR125		0x2
+#define DMA_RXOR12		0x3
+
+/* S/G addresses */
+#define DMA_CDB_SG_SRC		1
+#define DMA_CDB_SG_DST1		2
+#define DMA_CDB_SG_DST2		3
+
+/*
+ * DMAx engines Command Descriptor Block Type
+ */
+struct dma_cdb {
+	/*
+	 * Basic CDB structure (Table 20-17, p.499, 440spe_um_1_22.pdf)
+	 */
+	u8	pad0[2];        /* reserved */
+	u8	attr;		/* attributes */
+	u8	opc;		/* opcode */
+	u32	sg1u;		/* upper SG1 address */
+	u32	sg1l;		/* lower SG1 address */
+	u32	cnt;		/* SG count, 3B used */
+	u32	sg2u;		/* upper SG2 address */
+	u32	sg2l;		/* lower SG2 address */
+	u32	sg3u;		/* upper SG3 address */
+	u32	sg3l;		/* lower SG3 address */
+};
+
+/*
+ * DMAx hardware registers (p.515 in 440SPe UM 1.22)
+ */
+struct dma_regs {
+	u32	cpfpl;
+	u32	cpfph;
+	u32	csfpl;
+	u32	csfph;
+	u32	dsts;
+	u32	cfg;
+	u8	pad0[0x8];
+	u16	cpfhp;
+	u16	cpftp;
+	u16	csfhp;
+	u16	csftp;
+	u8	pad1[0x8];
+	u32	acpl;
+	u32	acph;
+	u32	s1bpl;
+	u32	s1bph;
+	u32	s2bpl;
+	u32	s2bph;
+	u32	s3bpl;
+	u32	s3bph;
+	u8	pad2[0x10];
+	u32	earl;
+	u32	earh;
+	u8	pad3[0x8];
+	u32	seat;
+	u32	sead;
+	u32	op;
+	u32	fsiz;
+};
+
+/*
+ * I2O hardware registers (p.528 in 440SPe UM 1.22)
+ */
+struct i2o_regs {
+	u32	ists;
+	u32	iseat;
+	u32	isead;
+	u8	pad0[0x14];
+	u32	idbel;
+	u8	pad1[0xc];
+	u32	ihis;
+	u32	ihim;
+	u8	pad2[0x8];
+	u32	ihiq;
+	u32	ihoq;
+	u8	pad3[0x8];
+	u32	iopis;
+	u32	iopim;
+	u32	iopiq;
+	u8	iopoq;
+	u8	pad4[3];
+	u16	iiflh;
+	u16	iiflt;
+	u16	iiplh;
+	u16	iiplt;
+	u16	ioflh;
+	u16	ioflt;
+	u16	ioplh;
+	u16	ioplt;
+	u32	iidc;
+	u32	ictl;
+	u32	ifcpp;
+	u8	pad5[0x4];
+	u16	mfac0;
+	u16	mfac1;
+	u16	mfac2;
+	u16	mfac3;
+	u16	mfac4;
+	u16	mfac5;
+	u16	mfac6;
+	u16	mfac7;
+	u16	ifcfh;
+	u16	ifcht;
+	u8	pad6[0x4];
+	u32	iifmc;
+	u32	iodb;
+	u32	iodbc;
+	u32	ifbal;
+	u32	ifbah;
+	u32	ifsiz;
+	u32	ispd0;
+	u32	ispd1;
+	u32	ispd2;
+	u32	ispd3;
+	u32	ihipl;
+	u32	ihiph;
+	u32	ihopl;
+	u32	ihoph;
+	u32	iiipl;
+	u32	iiiph;
+	u32	iiopl;
+	u32	iioph;
+	u32	ifcpl;
+	u32	ifcph;
+	u8	pad7[0x8];
+	u32	iopt;
+};
+
+#endif /* _PPC440SPE_DMA_H */
diff --git a/drivers/dma/ppc4xx/xor.h b/drivers/dma/ppc4xx/xor.h
new file mode 100644
index 0000000..daed738
--- /dev/null
+++ b/drivers/dma/ppc4xx/xor.h
@@ -0,0 +1,110 @@
+/*
+ * 440SPe's XOR engines support header file
+ *
+ * 2006-2009 (C) DENX Software Engineering.
+ *
+ * Author: Yuri Tikhonov <yur@emcraft.com>
+ *
+ * This file is licensed under the term of  the GNU General Public License
+ * version 2. The program licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef _PPC440SPE_XOR_H
+#define _PPC440SPE_XOR_H
+
+#include <linux/types.h>
+
+/* Number of XOR engines available on the contoller */
+#define XOR_ENGINES_NUM		1
+
+/* Number of operands supported in the h/w */
+#define XOR_MAX_OPS		16
+
+/*
+ * XOR Command Block Control Register bits
+ */
+#define XOR_CBCR_LNK_BIT        (1<<31) /* link present */
+#define XOR_CBCR_TGT_BIT        (1<<30) /* target present */
+#define XOR_CBCR_CBCE_BIT       (1<<29) /* command block compete enable */
+#define XOR_CBCR_RNZE_BIT       (1<<28) /* result not zero enable */
+#define XOR_CBCR_XNOR_BIT       (1<<15) /* XOR/XNOR */
+#define XOR_CDCR_OAC_MSK        (0x7F)  /* operand address count */
+
+/*
+ * XORCore Status Register bits
+ */
+#define XOR_SR_XCP_BIT		(1<<31)	/* core processing */
+#define XOR_SR_ICB_BIT		(1<<17)	/* invalid CB */
+#define XOR_SR_IC_BIT		(1<<16)	/* invalid command */
+#define XOR_SR_IPE_BIT		(1<<15)	/* internal parity error */
+#define XOR_SR_RNZ_BIT		(1<<2)	/* result not Zero */
+#define XOR_SR_CBC_BIT		(1<<1)	/* CB complete */
+#define XOR_SR_CBLC_BIT		(1<<0)	/* CB list complete */
+
+/*
+ * XORCore Control Set and Reset Register bits
+ */
+#define XOR_CRSR_XASR_BIT	(1<<31)	/* soft reset */
+#define XOR_CRSR_XAE_BIT	(1<<30)	/* enable */
+#define XOR_CRSR_RCBE_BIT	(1<<29)	/* refetch CB enable */
+#define XOR_CRSR_PAUS_BIT	(1<<28)	/* pause */
+#define XOR_CRSR_64BA_BIT	(1<<27) /* 64/32 CB format */
+#define XOR_CRSR_CLP_BIT	(1<<25)	/* continue list processing */
+
+/*
+ * XORCore Interrupt Enable Register
+ */
+#define XOR_IE_ICBIE_BIT	(1<<17)	/* Invalid Command Block IRQ Enable */
+#define XOR_IE_ICIE_BIT		(1<<16)	/* Invalid Command IRQ Enable */
+#define XOR_IE_RPTIE_BIT	(1<<14)	/* Read PLB Timeout Error IRQ Enable */
+#define XOR_IE_CBCIE_BIT	(1<<1)	/* CB complete interrupt enable */
+#define XOR_IE_CBLCI_BIT	(1<<0)	/* CB list complete interrupt enable */
+
+/*
+ * XOR Accelerator engine Command Block Type
+ */
+struct xor_cb {
+	/*
+	 * Basic 64-bit format XOR CB (Table 19-1, p.463, 440spe_um_1_22.pdf)
+	 */
+	u32	cbc;		/* control */
+	u32	cbbc;		/* byte count */
+	u32	cbs;		/* status */
+	u8	pad0[4];	/* reserved */
+	u32	cbtah;		/* target address high */
+	u32	cbtal;		/* target address low */
+	u32	cblah;		/* link address high */
+	u32	cblal;		/* link address low */
+	struct {
+		u32 h;
+		u32 l;
+	} __attribute__ ((packed)) ops[16];
+} __attribute__ ((packed));
+
+/*
+ * XOR hardware registers Table 19-3, UM 1.22
+ */
+struct xor_regs {
+	u32	op_ar[16][2];	/* operand address[0]-high,[1]-low registers */
+	u8	pad0[352];	/* reserved */
+	u32	cbcr;		/* CB control register */
+	u32	cbbcr;		/* CB byte count register */
+	u32	cbsr;		/* CB status register */
+	u8	pad1[4];	/* reserved */
+	u32	cbtahr;		/* operand target address high register */
+	u32	cbtalr;		/* operand target address low register */
+	u32	cblahr;		/* CB link address high register */
+	u32	cblalr;		/* CB link address low register */
+	u32	crsr;		/* control set register */
+	u32	crrr;		/* control reset register */
+	u32	ccbahr;		/* current CB address high register */
+	u32	ccbalr;		/* current CB address low register */
+	u32	plbr;		/* PLB configuration register */
+	u32	ier;		/* interrupt enable register */
+	u32	pecr;		/* parity error count register */
+	u32	sr;		/* status register */
+	u32	revidr;		/* revision ID register */
+};
+
+#endif /* _PPC440SPE_XOR_H */
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
new file mode 100644
index 0000000..b31c28b
--- /dev/null
+++ b/drivers/dma/pxa_dma.c
@@ -0,0 +1,1523 @@
+/*
+ * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/wait.h>
+#include <linux/dma/pxa-dma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DCSR(n)		(0x0000 + ((n) << 2))
+#define DALGN(n)	0x00a0
+#define DINT		0x00f0
+#define DDADR(n)	(0x0200 + ((n) << 4))
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
+#define DCMD(n)		(0x020c + ((n) << 4))
+
+#define PXA_DCSR_RUN		BIT(31)	/* Run Bit (read / write) */
+#define PXA_DCSR_NODESC		BIT(30)	/* No-Descriptor Fetch (read / write) */
+#define PXA_DCSR_STOPIRQEN	BIT(29)	/* Stop Interrupt Enable (R/W) */
+#define PXA_DCSR_REQPEND	BIT(8)	/* Request Pending (read-only) */
+#define PXA_DCSR_STOPSTATE	BIT(3)	/* Stop State (read-only) */
+#define PXA_DCSR_ENDINTR	BIT(2)	/* End Interrupt (read / write) */
+#define PXA_DCSR_STARTINTR	BIT(1)	/* Start Interrupt (read / write) */
+#define PXA_DCSR_BUSERR		BIT(0)	/* Bus Error Interrupt (read / write) */
+
+#define PXA_DCSR_EORIRQEN	BIT(28)	/* End of Receive IRQ Enable (R/W) */
+#define PXA_DCSR_EORJMPEN	BIT(27)	/* Jump to next descriptor on EOR */
+#define PXA_DCSR_EORSTOPEN	BIT(26)	/* STOP on an EOR */
+#define PXA_DCSR_SETCMPST	BIT(25)	/* Set Descriptor Compare Status */
+#define PXA_DCSR_CLRCMPST	BIT(24)	/* Clear Descriptor Compare Status */
+#define PXA_DCSR_CMPST		BIT(10)	/* The Descriptor Compare Status */
+#define PXA_DCSR_EORINTR	BIT(9)	/* The end of Receive */
+
+#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
+
+#define PXA_DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
+#define PXA_DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
+#define PXA_DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
+#define PXA_DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
+#define PXA_DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
+#define PXA_DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
+#define PXA_DCMD_ENDIAN		BIT(18)	/* Device Endian-ness. */
+#define PXA_DCMD_BURST8		(1 << 16)	/* 8 byte burst */
+#define PXA_DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define PXA_DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define PXA_DCMD_WIDTH1		(1 << 14)	/* 1 byte width */
+#define PXA_DCMD_WIDTH2		(2 << 14)	/* 2 byte width (HalfWord) */
+#define PXA_DCMD_WIDTH4		(3 << 14)	/* 4 byte width (Word) */
+#define PXA_DCMD_LENGTH		0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	(PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1))
+
+struct pxad_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(16);
+
+struct pxad_desc_sw {
+	struct virt_dma_desc	vd;		/* Virtual descriptor */
+	int			nb_desc;	/* Number of hw. descriptors */
+	size_t			len;		/* Number of bytes xfered */
+	dma_addr_t		first;		/* First descriptor's addr */
+
+	/* At least one descriptor has an src/dst address not multiple of 8 */
+	bool			misaligned;
+	bool			cyclic;
+	struct dma_pool		*desc_pool;	/* Channel's used allocator */
+
+	struct pxad_desc_hw	*hw_desc[];	/* DMA coherent descriptors */
+};
+
+struct pxad_phy {
+	int			idx;
+	void __iomem		*base;
+	struct pxad_chan	*vchan;
+};
+
+struct pxad_chan {
+	struct virt_dma_chan	vc;		/* Virtual channel */
+	u32			drcmr;		/* Requestor of the channel */
+	enum pxad_chan_prio	prio;		/* Required priority of phy */
+	/*
+	 * At least one desc_sw in submitted or issued transfers on this channel
+	 * has one address such as: addr % 8 != 0. This implies the DALGN
+	 * setting on the phy.
+	 */
+	bool			misaligned;
+	struct dma_slave_config	cfg;		/* Runtime config */
+
+	/* protected by vc->lock */
+	struct pxad_phy		*phy;
+	struct dma_pool		*desc_pool;	/* Descriptors pool */
+	dma_cookie_t		bus_error;
+
+	wait_queue_head_t	wq_state;
+};
+
+struct pxad_device {
+	struct dma_device		slave;
+	int				nr_chans;
+	int				nr_requestors;
+	void __iomem			*base;
+	struct pxad_phy			*phys;
+	spinlock_t			phy_lock;	/* Phy association */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry			*dbgfs_root;
+	struct dentry			*dbgfs_state;
+	struct dentry			**dbgfs_chan;
+#endif
+};
+
+#define tx_to_pxad_desc(tx)					\
+	container_of(tx, struct pxad_desc_sw, async_tx)
+#define to_pxad_chan(dchan)					\
+	container_of(dchan, struct pxad_chan, vc.chan)
+#define to_pxad_dev(dmadev)					\
+	container_of(dmadev, struct pxad_device, slave)
+#define to_pxad_sw_desc(_vd)				\
+	container_of((_vd), struct pxad_desc_sw, vd)
+
+#define _phy_readl_relaxed(phy, _reg)					\
+	readl_relaxed((phy)->base + _reg((phy)->idx))
+#define phy_readl_relaxed(phy, _reg)					\
+	({								\
+		u32 _v;							\
+		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): readl(%s): 0x%08x\n", __func__, #_reg,	\
+			  _v);						\
+		_v;							\
+	})
+#define phy_writel(phy, val, _reg)					\
+	do {								\
+		writel((val), (phy)->base + _reg((phy)->idx));		\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel(0x%08x, %s)\n",			\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+#define phy_writel_relaxed(phy, val, _reg)				\
+	do {								\
+		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel_relaxed(0x%08x, %s)\n",		\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+
+static unsigned int pxad_drcmr(unsigned int line)
+{
+	if (line < 64)
+		return 0x100 + line * 4;
+	return 0x1000 + line * 4;
+}
+
+bool pxad_filter_fn(struct dma_chan *chan, void *param);
+
+/*
+ * Debug fs
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+static int dbg_show_requester_chan(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	int i;
+	u32 drcmr;
+
+	seq_printf(s, "DMA channel %d requester :\n", phy->idx);
+	for (i = 0; i < 70; i++) {
+		drcmr = readl_relaxed(phy->base + pxad_drcmr(i));
+		if ((drcmr & DRCMR_CHLNUM) == phy->idx)
+			seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
+				   !!(drcmr & DRCMR_MAPVLD));
+	}
+	return 0;
+}
+
+static inline int dbg_burst_from_dcmd(u32 dcmd)
+{
+	int burst = (dcmd >> 16) & 0x3;
+
+	return burst ? 4 << burst : 0;
+}
+
+static int is_phys_valid(unsigned long addr)
+{
+	return pfn_valid(__phys_to_pfn(addr));
+}
+
+#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "")
+#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "")
+
+static int dbg_show_descriptors(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	int i, max_show = 20, burst, width;
+	u32 dcmd;
+	unsigned long phys_desc, ddadr;
+	struct pxad_desc_hw *desc;
+
+	phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR);
+
+	seq_printf(s, "DMA channel %d descriptors :\n", phy->idx);
+	seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+	for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
+		desc = phys_to_virt(phys_desc);
+		dcmd = desc->dcmd;
+		burst = dbg_burst_from_dcmd(dcmd);
+		width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+		seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+			   i, phys_desc, desc);
+		seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+		seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+		seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+		seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+			   dcmd,
+			   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+			   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+			   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+			   PXA_DCMD_STR(ENDIAN), burst, width,
+			   dcmd & PXA_DCMD_LENGTH);
+		phys_desc = desc->ddadr;
+	}
+	if (i == max_show)
+		seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+			   i, phys_desc);
+	else
+		seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+			   i, phys_desc, phys_desc == DDADR_STOP ?
+			   "DDADR_STOP" : "invalid");
+
+	return 0;
+}
+
+static int dbg_show_chan_state(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	u32 dcsr, dcmd;
+	int burst, width;
+	static const char * const str_prio[] = {
+		"high", "normal", "low", "invalid"
+	};
+
+	dcsr = _phy_readl_relaxed(phy, DCSR);
+	dcmd = _phy_readl_relaxed(phy, DCMD);
+	burst = dbg_burst_from_dcmd(dcmd);
+	width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+	seq_printf(s, "DMA channel %d\n", phy->idx);
+	seq_printf(s, "\tPriority : %s\n",
+			  str_prio[(phy->idx & 0xf) / 4]);
+	seq_printf(s, "\tUnaligned transfer bit: %s\n",
+			  _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ?
+			  "yes" : "no");
+	seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		   dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC),
+		   PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN),
+		   PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN),
+		   PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST),
+		   PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR),
+		   PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE),
+		   PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR),
+		   PXA_DCSR_STR(BUSERR));
+
+	seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+		   dcmd,
+		   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+		   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+		   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+		   PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH);
+	seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR));
+	seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR));
+	seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR));
+
+	return 0;
+}
+
+static int dbg_show_state(struct seq_file *s, void *p)
+{
+	struct pxad_device *pdev = s->private;
+
+	/* basic device status */
+	seq_puts(s, "DMA engine status\n");
+	seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans);
+
+	return 0;
+}
+
+#define DBGFS_FUNC_DECL(name) \
+static int dbg_open_##name(struct inode *inode, struct file *file) \
+{ \
+	return single_open(file, dbg_show_##name, inode->i_private); \
+} \
+static const struct file_operations dbg_fops_##name = { \
+	.open		= dbg_open_##name, \
+	.llseek		= seq_lseek, \
+	.read		= seq_read, \
+	.release	= single_release, \
+}
+
+DBGFS_FUNC_DECL(state);
+DBGFS_FUNC_DECL(chan_state);
+DBGFS_FUNC_DECL(descriptors);
+DBGFS_FUNC_DECL(requester_chan);
+
+static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev,
+					     int ch, struct dentry *chandir)
+{
+	char chan_name[11];
+	struct dentry *chan, *chan_state = NULL, *chan_descr = NULL;
+	struct dentry *chan_reqs = NULL;
+	void *dt;
+
+	scnprintf(chan_name, sizeof(chan_name), "%d", ch);
+	chan = debugfs_create_dir(chan_name, chandir);
+	dt = (void *)&pdev->phys[ch];
+
+	if (chan)
+		chan_state = debugfs_create_file("state", 0400, chan, dt,
+						 &dbg_fops_chan_state);
+	if (chan_state)
+		chan_descr = debugfs_create_file("descriptors", 0400, chan, dt,
+						 &dbg_fops_descriptors);
+	if (chan_descr)
+		chan_reqs = debugfs_create_file("requesters", 0400, chan, dt,
+						&dbg_fops_requester_chan);
+	if (!chan_reqs)
+		goto err_state;
+
+	return chan;
+
+err_state:
+	debugfs_remove_recursive(chan);
+	return NULL;
+}
+
+static void pxad_init_debugfs(struct pxad_device *pdev)
+{
+	int i;
+	struct dentry *chandir;
+
+	pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL);
+	if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root)
+		goto err_root;
+
+	pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root,
+						pdev, &dbg_fops_state);
+	if (!pdev->dbgfs_state)
+		goto err_state;
+
+	pdev->dbgfs_chan =
+		kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state),
+			      GFP_KERNEL);
+	if (!pdev->dbgfs_chan)
+		goto err_alloc;
+
+	chandir = debugfs_create_dir("channels", pdev->dbgfs_root);
+	if (!chandir)
+		goto err_chandir;
+
+	for (i = 0; i < pdev->nr_chans; i++) {
+		pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir);
+		if (!pdev->dbgfs_chan[i])
+			goto err_chans;
+	}
+
+	return;
+err_chans:
+err_chandir:
+	kfree(pdev->dbgfs_chan);
+err_alloc:
+err_state:
+	debugfs_remove_recursive(pdev->dbgfs_root);
+err_root:
+	pr_err("pxad: debugfs is not available\n");
+}
+
+static void pxad_cleanup_debugfs(struct pxad_device *pdev)
+{
+	debugfs_remove_recursive(pdev->dbgfs_root);
+}
+#else
+static inline void pxad_init_debugfs(struct pxad_device *pdev) {}
+static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {}
+#endif
+
+static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
+{
+	int prio, i;
+	struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
+	struct pxad_phy *phy, *found = NULL;
+	unsigned long flags;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
+		for (i = 0; i < pdev->nr_chans; i++) {
+			if (prio != (i & 0xf) >> 2)
+				continue;
+			phy = &pdev->phys[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				found = phy;
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	dev_dbg(&pchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, found,
+		found ? found->idx : -1);
+
+	return found;
+}
+
+static void pxad_free_phy(struct pxad_chan *chan)
+{
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	unsigned long flags;
+	u32 reg;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): freeing\n", __func__);
+	if (!chan->phy)
+		return;
+
+	/* clear the channel mapping in DRCMR */
+	if (chan->drcmr <= pdev->nr_requestors) {
+		reg = pxad_drcmr(chan->drcmr);
+		writel_relaxed(0, chan->phy->base + reg);
+	}
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	chan->phy->vchan = NULL;
+	chan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+static bool is_chan_running(struct pxad_chan *chan)
+{
+	u32 dcsr;
+	struct pxad_phy *phy = chan->phy;
+
+	if (!phy)
+		return false;
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	return dcsr & PXA_DCSR_RUN;
+}
+
+static bool is_running_chan_misaligned(struct pxad_chan *chan)
+{
+	u32 dalgn;
+
+	BUG_ON(!chan->phy);
+	dalgn = phy_readl_relaxed(chan->phy, DALGN);
+	return dalgn & (BIT(chan->phy->idx));
+}
+
+static void phy_enable(struct pxad_phy *phy, bool misaligned)
+{
+	struct pxad_device *pdev;
+	u32 reg, dalgn;
+
+	if (!phy->vchan)
+		return;
+
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
+		phy, phy->idx, misaligned);
+
+	pdev = to_pxad_dev(phy->vchan->vc.chan.device);
+	if (phy->vchan->drcmr <= pdev->nr_requestors) {
+		reg = pxad_drcmr(phy->vchan->drcmr);
+		writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+	}
+
+	dalgn = phy_readl_relaxed(phy, DALGN);
+	if (misaligned)
+		dalgn |= BIT(phy->idx);
+	else
+		dalgn &= ~BIT(phy->idx);
+	phy_writel_relaxed(phy, dalgn, DALGN);
+
+	phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR |
+		   PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR);
+}
+
+static void phy_disable(struct pxad_phy *phy)
+{
+	u32 dcsr;
+
+	if (!phy)
+		return;
+
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, phy, phy->idx);
+	phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR);
+}
+
+static void pxad_launch_chan(struct pxad_chan *chan,
+				 struct pxad_desc_sw *desc)
+{
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): desc=%p\n", __func__, desc);
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(&chan->vc.chan.dev->device,
+				"%s(): no free dma channel\n", __func__);
+			return;
+		}
+	}
+	chan->bus_error = 0;
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	phy_writel(chan->phy, desc->first, DDADR);
+	phy_enable(chan->phy, chan->misaligned);
+	wake_up(&chan->wq_state);
+}
+
+static void set_updater_desc(struct pxad_desc_sw *sw_desc,
+			     unsigned long flags)
+{
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+	dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr;
+
+	updater->ddadr = DDADR_STOP;
+	updater->dsadr = dma;
+	updater->dtadr = dma + 8;
+	updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 |
+		(PXA_DCMD_LENGTH & sizeof(u32));
+	if (flags & DMA_PREP_INTERRUPT)
+		updater->dcmd |= PXA_DCMD_ENDIRQEN;
+	if (sw_desc->cyclic)
+		sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
+}
+
+static bool is_desc_completed(struct virt_dma_desc *vd)
+{
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+
+	return updater->dtadr != (updater->dsadr + 8);
+}
+
+static void pxad_desc_chain(struct virt_dma_desc *vd1,
+				struct virt_dma_desc *vd2)
+{
+	struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1);
+	struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2);
+	dma_addr_t dma_to_chain;
+
+	dma_to_chain = desc2->first;
+	desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain;
+}
+
+static bool pxad_try_hotchain(struct virt_dma_chan *vc,
+				  struct virt_dma_desc *vd)
+{
+	struct virt_dma_desc *vd_last_issued = NULL;
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+
+	/*
+	 * Attempt to hot chain the tx if the phy is still running. This is
+	 * considered successful only if either the channel is still running
+	 * after the chaining, or if the chained transfer is completed after
+	 * having been hot chained.
+	 * A change of alignment is not allowed, and forbids hotchaining.
+	 */
+	if (is_chan_running(chan)) {
+		BUG_ON(list_empty(&vc->desc_issued));
+
+		if (!is_running_chan_misaligned(chan) &&
+		    to_pxad_sw_desc(vd)->misaligned)
+			return false;
+
+		vd_last_issued = list_entry(vc->desc_issued.prev,
+					    struct virt_dma_desc, node);
+		pxad_desc_chain(vd_last_issued, vd);
+		if (is_chan_running(chan) || is_desc_completed(vd))
+			return true;
+	}
+
+	return false;
+}
+
+static unsigned int clear_chan_irq(struct pxad_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+
+	if (!(dint & BIT(phy->idx)))
+		return PXA_DCSR_RUN;
+
+	/* clear irq */
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	phy_writel(phy, dcsr, DCSR);
+	if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan))
+		dev_warn(&phy->vchan->vc.chan.dev->device,
+			 "%s(chan=%p): PXA_DCSR_BUSERR\n",
+			 __func__, &phy->vchan);
+
+	return dcsr & ~PXA_DCSR_RUN;
+}
+
+static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
+{
+	struct pxad_phy *phy = dev_id;
+	struct pxad_chan *chan = phy->vchan;
+	struct virt_dma_desc *vd, *tmp;
+	unsigned int dcsr;
+	unsigned long flags;
+	bool vd_completed;
+	dma_cookie_t last_started = 0;
+
+	BUG_ON(!chan);
+
+	dcsr = clear_chan_irq(phy);
+	if (dcsr & PXA_DCSR_RUN)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+		vd_completed = is_desc_completed(vd);
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n",
+			__func__, vd, vd->tx.cookie, vd_completed,
+			dcsr);
+		last_started = vd->tx.cookie;
+		if (to_pxad_sw_desc(vd)->cyclic) {
+			vchan_cyclic_callback(vd);
+			break;
+		}
+		if (vd_completed) {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
+		} else {
+			break;
+		}
+	}
+
+	if (dcsr & PXA_DCSR_BUSERR) {
+		chan->bus_error = last_started;
+		phy_disable(phy);
+	}
+
+	if (!chan->bus_error && dcsr & PXA_DCSR_STOPSTATE) {
+		dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): channel stopped, submitted_empty=%d issued_empty=%d",
+			__func__,
+			list_empty(&chan->vc.desc_submitted),
+			list_empty(&chan->vc.desc_issued));
+		phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR);
+
+		if (list_empty(&chan->vc.desc_issued)) {
+			chan->misaligned =
+				!list_empty(&chan->vc.desc_submitted);
+		} else {
+			vd = list_first_entry(&chan->vc.desc_issued,
+					      struct virt_dma_desc, node);
+			pxad_launch_chan(chan, to_pxad_sw_desc(vd));
+		}
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	wake_up(&chan->wq_state);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pxad_int_handler(int irq, void *dev_id)
+{
+	struct pxad_device *pdev = dev_id;
+	struct pxad_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret = IRQ_NONE;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phys[i];
+		if (pxad_chan_handler(irq, phy) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int pxad_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(dma_chan_name(dchan),
+					  pdev->slave.dev,
+					  sizeof(struct pxad_desc_hw),
+					  __alignof__(struct pxad_desc_hw),
+					  0);
+	if (!chan->desc_pool) {
+		dev_err(&chan->vc.chan.dev->device,
+			"%s(): unable to allocate descriptor pool\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	return 1;
+}
+
+static void pxad_free_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	vchan_free_chan_resources(&chan->vc);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+
+	chan->drcmr = U32_MAX;
+	chan->prio = PXAD_PRIO_LOWEST;
+}
+
+static void pxad_free_desc(struct virt_dma_desc *vd)
+{
+	int i;
+	dma_addr_t dma;
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+
+	BUG_ON(sw_desc->nb_desc == 0);
+	for (i = sw_desc->nb_desc - 1; i >= 0; i--) {
+		if (i > 0)
+			dma = sw_desc->hw_desc[i - 1]->ddadr;
+		else
+			dma = sw_desc->first;
+		dma_pool_free(sw_desc->desc_pool,
+			      sw_desc->hw_desc[i], dma);
+	}
+	sw_desc->nb_desc = 0;
+	kfree(sw_desc);
+}
+
+static struct pxad_desc_sw *
+pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
+{
+	struct pxad_desc_sw *sw_desc;
+	dma_addr_t dma;
+	int i;
+
+	sw_desc = kzalloc(sizeof(*sw_desc) +
+			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
+			  GFP_NOWAIT);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->desc_pool = chan->desc_pool;
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool,
+						     GFP_NOWAIT, &dma);
+		if (!sw_desc->hw_desc[i]) {
+			dev_err(&chan->vc.chan.dev->device,
+				"%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n",
+				__func__, i, sw_desc->desc_pool);
+			goto err;
+		}
+
+		if (i == 0)
+			sw_desc->first = dma;
+		else
+			sw_desc->hw_desc[i - 1]->ddadr = dma;
+		sw_desc->nb_desc++;
+	}
+
+	return sw_desc;
+err:
+	pxad_free_desc(&sw_desc->vd);
+	return NULL;
+}
+
+static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+	struct virt_dma_desc *vd_chained = NULL,
+		*vd = container_of(tx, struct virt_dma_desc, tx);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	set_updater_desc(to_pxad_sw_desc(vd), tx->flags);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) {
+		list_move_tail(&vd->node, &vc->desc_issued);
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): txd %p[%x]: submitted (hot linked)\n",
+			__func__, vd, cookie);
+		goto out;
+	}
+
+	/*
+	 * Fallback to placing the tx in the submitted queue
+	 */
+	if (!list_empty(&vc->desc_submitted)) {
+		vd_chained = list_entry(vc->desc_submitted.prev,
+					struct virt_dma_desc, node);
+		/*
+		 * Only chain the descriptors if no new misalignment is
+		 * introduced. If a new misalignment is chained, let the channel
+		 * stop, and be relaunched in misalign mode from the irq
+		 * handler.
+		 */
+		if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned)
+			pxad_desc_chain(vd_chained, vd);
+		else
+			vd_chained = NULL;
+	}
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]: submitted (%s linked)\n",
+		__func__, vd, cookie, vd_chained ? "cold" : "not");
+	list_move_tail(&vd->node, &vc->desc_submitted);
+	chan->misaligned |= to_pxad_sw_desc(vd)->misaligned;
+
+out:
+	spin_unlock_irqrestore(&vc->lock, flags);
+	return cookie;
+}
+
+static void pxad_issue_pending(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct virt_dma_desc *vd_first;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (list_empty(&chan->vc.desc_submitted))
+		goto out;
+
+	vd_first = list_first_entry(&chan->vc.desc_submitted,
+				    struct virt_dma_desc, node);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie);
+
+	vchan_issue_pending(&chan->vc);
+	if (!pxad_try_hotchain(&chan->vc, vd_first))
+		pxad_launch_chan(chan, to_pxad_sw_desc(vd_first));
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static inline struct dma_async_tx_descriptor *
+pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
+		 unsigned long tx_flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc);
+
+	INIT_LIST_HEAD(&vd->node);
+	tx = vchan_tx_prep(vc, vd, tx_flags);
+	tx->tx_submit = pxad_tx_submit;
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__,
+		vc, vd, vd->tx.cookie,
+		tx_flags);
+
+	return tx;
+}
+
+static void pxad_get_config(struct pxad_chan *chan,
+			    enum dma_transfer_direction dir,
+			    u32 *dcmd, u32 *dev_src, u32 *dev_dst)
+{
+	u32 maxburst = 0, dev_addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+	*dcmd = 0;
+	if (dir == DMA_DEV_TO_MEM) {
+		maxburst = chan->cfg.src_maxburst;
+		width = chan->cfg.src_addr_width;
+		dev_addr = chan->cfg.src_addr;
+		*dev_src = dev_addr;
+		*dcmd |= PXA_DCMD_INCTRGADDR;
+		if (chan->drcmr <= pdev->nr_requestors)
+			*dcmd |= PXA_DCMD_FLOWSRC;
+	}
+	if (dir == DMA_MEM_TO_DEV) {
+		maxburst = chan->cfg.dst_maxburst;
+		width = chan->cfg.dst_addr_width;
+		dev_addr = chan->cfg.dst_addr;
+		*dev_dst = dev_addr;
+		*dcmd |= PXA_DCMD_INCSRCADDR;
+		if (chan->drcmr <= pdev->nr_requestors)
+			*dcmd |= PXA_DCMD_FLOWTRG;
+	}
+	if (dir == DMA_MEM_TO_MEM)
+		*dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR |
+			PXA_DCMD_INCSRCADDR;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dev_addr=0x%x maxburst=%d width=%d  dir=%d\n",
+		__func__, dev_addr, maxburst, width, dir);
+
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		*dcmd |= PXA_DCMD_WIDTH1;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH2;
+	else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH4;
+
+	if (maxburst == 8)
+		*dcmd |= PXA_DCMD_BURST8;
+	else if (maxburst == 16)
+		*dcmd |= PXA_DCMD_BURST16;
+	else if (maxburst == 32)
+		*dcmd |= PXA_DCMD_BURST32;
+
+	/* FIXME: drivers should be ported over to use the filter
+	 * function. Once that's done, the following two lines can
+	 * be removed.
+	 */
+	if (chan->cfg.slave_id)
+		chan->drcmr = chan->cfg.slave_id;
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_memcpy(struct dma_chan *dchan,
+		 dma_addr_t dma_dst, dma_addr_t dma_src,
+		 size_t len, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw *hw_desc;
+	u32 dcmd;
+	unsigned int i, nb_desc = 0;
+	size_t copy;
+
+	if (!dchan || !len)
+		return NULL;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n",
+		__func__, (unsigned long)dma_dst, (unsigned long)dma_src,
+		len, flags);
+	pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL);
+
+	nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->len = len;
+
+	if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) ||
+	    !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT))
+		sw_desc->misaligned = true;
+
+	i = 0;
+	do {
+		hw_desc = sw_desc->hw_desc[i++];
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy);
+		hw_desc->dsadr = dma_src;
+		hw_desc->dtadr = dma_dst;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		   unsigned int sg_len, enum dma_transfer_direction dir,
+		   unsigned long flags, void *context)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0, i, j = 0;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dir=%d flags=%lx\n", __func__, dir, flags);
+
+	for_each_sg(sgl, sg, sg_len, i)
+		nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		sw_desc->len += avail;
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (dma & 0x7)
+				sw_desc->misaligned = true;
+
+			sw_desc->hw_desc[j]->dcmd =
+				dcmd | (PXA_DCMD_LENGTH & len);
+			sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma;
+			sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma;
+
+			dma += len;
+			avail -= len;
+		} while (avail);
+	}
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_dma_cyclic(struct dma_chan *dchan,
+		     dma_addr_t buf_addr, size_t len, size_t period_len,
+		     enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw **phw_desc;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+	if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) {
+		dev_err(&chan->vc.chan.dev->device,
+			"Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES ||
+	    !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
+		__func__, (unsigned long)buf_addr, len, period_len, dir, flags);
+
+	nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES);
+	nb_desc *= DIV_ROUND_UP(len, period_len);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->cyclic = true;
+	sw_desc->len = len;
+
+	phw_desc = sw_desc->hw_desc;
+	dma = buf_addr;
+	do {
+		phw_desc[0]->dsadr = dsadr ? dsadr : dma;
+		phw_desc[0]->dtadr = dtadr ? dtadr : dma;
+		phw_desc[0]->dcmd = dcmd;
+		phw_desc++;
+		dma += period_len;
+		len -= period_len;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static int pxad_config(struct dma_chan *dchan,
+		       struct dma_slave_config *cfg)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan->cfg = *cfg;
+	return 0;
+}
+
+static int pxad_terminate_all(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	struct virt_dma_desc *vd = NULL;
+	unsigned long flags;
+	struct pxad_phy *phy;
+	LIST_HEAD(head);
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vchan %p: terminate all\n", __func__, &chan->vc);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	list_for_each_entry(vd, &head, node) {
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): cancelling txd %p[%x] (completed=%d)", __func__,
+			vd, vd->tx.cookie, is_desc_completed(vd));
+	}
+
+	phy = chan->phy;
+	if (phy) {
+		phy_disable(chan->phy);
+		pxad_free_phy(chan);
+		chan->phy = NULL;
+		spin_lock(&pdev->phy_lock);
+		phy->vchan = NULL;
+		spin_unlock(&pdev->phy_lock);
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	return 0;
+}
+
+static unsigned int pxad_residue(struct pxad_chan *chan,
+				 dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd = NULL;
+	struct pxad_desc_sw *sw_desc = NULL;
+	struct pxad_desc_hw *hw_desc = NULL;
+	u32 curr, start, len, end, residue = 0;
+	unsigned long flags;
+	bool passed = false;
+	int i;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	vd = vchan_find_desc(&chan->vc, cookie);
+	if (!vd)
+		goto out;
+
+	sw_desc = to_pxad_sw_desc(vd);
+	if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+		curr = phy_readl_relaxed(chan->phy, DSADR);
+	else
+		curr = phy_readl_relaxed(chan->phy, DTADR);
+
+	/*
+	 * curr has to be actually read before checking descriptor
+	 * completion, so that a curr inside a status updater
+	 * descriptor implies the following test returns true, and
+	 * preventing reordering of curr load and the test.
+	 */
+	rmb();
+	if (is_desc_completed(vd))
+		goto out;
+
+	for (i = 0; i < sw_desc->nb_desc - 1; i++) {
+		hw_desc = sw_desc->hw_desc[i];
+		if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+			start = hw_desc->dsadr;
+		else
+			start = hw_desc->dtadr;
+		len = hw_desc->dcmd & PXA_DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor
+		 * which lies inside the boundaries of the curr
+		 * pointer. All descriptors that occur in the list
+		 * _after_ we found that partially handled descriptor
+		 * are still to be processed and are hence added to the
+		 * residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+	}
+	if (!passed)
+		residue = sw_desc->len;
+
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x] sw_desc=%p: %d\n",
+		__func__, vd, cookie, sw_desc, residue);
+	return residue;
+}
+
+static enum dma_status pxad_tx_status(struct dma_chan *dchan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	enum dma_status ret;
+
+	if (cookie == chan->bus_error)
+		return DMA_ERROR;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(txstate && (ret != DMA_ERROR)))
+		dma_set_residue(txstate, pxad_residue(chan, cookie));
+
+	return ret;
+}
+
+static void pxad_synchronize(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	wait_event(chan->wq_state, !is_chan_running(chan));
+	vchan_synchronize(&chan->vc);
+}
+
+static void pxad_free_channels(struct dma_device *dmadev)
+{
+	struct pxad_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+}
+
+static int pxad_remove(struct platform_device *op)
+{
+	struct pxad_device *pdev = platform_get_drvdata(op);
+
+	pxad_cleanup_debugfs(pdev);
+	pxad_free_channels(&pdev->slave);
+	dma_async_device_unregister(&pdev->slave);
+	return 0;
+}
+
+static int pxad_init_phys(struct platform_device *op,
+			  struct pxad_device *pdev,
+			  unsigned int nb_phy_chans)
+{
+	int irq0, irq, nr_irq = 0, i, ret;
+	struct pxad_phy *phy;
+
+	irq0 = platform_get_irq(op, 0);
+	if (irq0 < 0)
+		return irq0;
+
+	pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans,
+				  sizeof(pdev->phys[0]), GFP_KERNEL);
+	if (!pdev->phys)
+		return -ENOMEM;
+
+	for (i = 0; i < nb_phy_chans; i++)
+		if (platform_get_irq(op, i) > 0)
+			nr_irq++;
+
+	for (i = 0; i < nb_phy_chans; i++) {
+		phy = &pdev->phys[i];
+		phy->base = pdev->base;
+		phy->idx = i;
+		irq = platform_get_irq(op, i);
+		if ((nr_irq > 1) && (irq > 0))
+			ret = devm_request_irq(&op->dev, irq,
+					       pxad_chan_handler,
+					       IRQF_SHARED, "pxa-dma", phy);
+		if ((nr_irq == 1) && (i == 0))
+			ret = devm_request_irq(&op->dev, irq0,
+					       pxad_int_handler,
+					       IRQF_SHARED, "pxa-dma", pdev);
+		if (ret) {
+			dev_err(pdev->slave.dev,
+				"%s(): can't request irq %d:%d\n", __func__,
+				irq, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id pxad_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pxad_dt_ids);
+
+static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct pxad_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan)
+		return NULL;
+
+	to_pxad_chan(chan)->drcmr = dma_spec->args[0];
+	to_pxad_chan(chan)->prio = dma_spec->args[1];
+
+	return chan;
+}
+
+static int pxad_init_dmadev(struct platform_device *op,
+			    struct pxad_device *pdev,
+			    unsigned int nr_phy_chans,
+			    unsigned int nr_requestors)
+{
+	int ret;
+	unsigned int i;
+	struct pxad_chan *c;
+
+	pdev->nr_chans = nr_phy_chans;
+	pdev->nr_requestors = nr_requestors;
+	INIT_LIST_HEAD(&pdev->slave.channels);
+	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
+	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
+	pdev->slave.device_tx_status = pxad_tx_status;
+	pdev->slave.device_issue_pending = pxad_issue_pending;
+	pdev->slave.device_config = pxad_config;
+	pdev->slave.device_synchronize = pxad_synchronize;
+	pdev->slave.device_terminate_all = pxad_terminate_all;
+
+	if (op->dev.coherent_dma_mask)
+		dma_set_mask(&op->dev, op->dev.coherent_dma_mask);
+	else
+		dma_set_mask(&op->dev, DMA_BIT_MASK(32));
+
+	ret = pxad_init_phys(op, pdev, nr_phy_chans);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_phy_chans; i++) {
+		c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+
+		c->drcmr = U32_MAX;
+		c->prio = PXAD_PRIO_LOWEST;
+		c->vc.desc_free = pxad_free_desc;
+		vchan_init(&c->vc, &pdev->slave);
+		init_waitqueue_head(&c->wq_state);
+	}
+
+	return dma_async_device_register(&pdev->slave);
+}
+
+static int pxad_probe(struct platform_device *op)
+{
+	struct pxad_device *pdev;
+	const struct of_device_id *of_id;
+	const struct dma_slave_map *slave_map = NULL;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int ret, dma_channels = 0, nb_requestors = 0, slave_map_cnt = 0;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+
+	spin_lock_init(&pdev->phy_lock);
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	pdev->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(pdev->base))
+		return PTR_ERR(pdev->base);
+
+	of_id = of_match_device(pxad_dt_ids, &op->dev);
+	if (of_id) {
+		of_property_read_u32(op->dev.of_node, "#dma-channels",
+				     &dma_channels);
+		ret = of_property_read_u32(op->dev.of_node, "#dma-requests",
+					   &nb_requestors);
+		if (ret) {
+			dev_warn(pdev->slave.dev,
+				 "#dma-requests set to default 32 as missing in OF: %d",
+				 ret);
+			nb_requestors = 32;
+		};
+	} else if (pdata && pdata->dma_channels) {
+		dma_channels = pdata->dma_channels;
+		nb_requestors = pdata->nb_requestors;
+		slave_map = pdata->slave_map;
+		slave_map_cnt = pdata->slave_map_cnt;
+	} else {
+		dma_channels = 32;	/* default 32 channel */
+	}
+
+	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask);
+	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
+	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
+	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+	pdev->slave.filter.map = slave_map;
+	pdev->slave.filter.mapcnt = slave_map_cnt;
+	pdev->slave.filter.fn = pxad_filter_fn;
+
+	pdev->slave.copy_align = PDMA_ALIGNMENT;
+	pdev->slave.src_addr_widths = widths;
+	pdev->slave.dst_addr_widths = widths;
+	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	pdev->slave.descriptor_reuse = true;
+
+	pdev->slave.dev = &op->dev;
+	ret = pxad_init_dmadev(op, pdev, dma_channels, nb_requestors);
+	if (ret) {
+		dev_err(pdev->slave.dev, "unable to register\n");
+		return ret;
+	}
+
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 pxad_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(pdev->slave.dev,
+				"of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(op, pdev);
+	pxad_init_debugfs(pdev);
+	dev_info(pdev->slave.dev, "initialized %d channels on %d requestors\n",
+		 dma_channels, nb_requestors);
+	return 0;
+}
+
+static const struct platform_device_id pxad_id_table[] = {
+	{ "pxa-dma", },
+	{ },
+};
+
+static struct platform_driver pxad_driver = {
+	.driver		= {
+		.name	= "pxa-dma",
+		.of_match_table = pxad_dt_ids,
+	},
+	.id_table	= pxad_id_table,
+	.probe		= pxad_probe,
+	.remove		= pxad_remove,
+};
+
+bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct pxad_chan *c = to_pxad_chan(chan);
+	struct pxad_param *p = param;
+
+	if (chan->device->dev->driver != &pxad_driver.driver)
+		return false;
+
+	c->drcmr = p->drcmr;
+	c->prio = p->prio;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(pxad_filter_fn);
+
+module_platform_driver(pxad_driver);
+
+MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/Kconfig b/drivers/dma/qcom/Kconfig
new file mode 100644
index 0000000..a7761c4
--- /dev/null
+++ b/drivers/dma/qcom/Kconfig
@@ -0,0 +1,29 @@
+config QCOM_BAM_DMA
+	tristate "QCOM BAM DMA support"
+	depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	---help---
+	  Enable support for the QCOM BAM DMA controller.  This controller
+	  provides DMA capabilities for a variety of on-chip devices.
+
+config QCOM_HIDMA_MGMT
+	tristate "Qualcomm Technologies HIDMA Management support"
+	select DMA_ENGINE
+	help
+	  Enable support for the Qualcomm Technologies HIDMA Management.
+	  Each DMA device requires one management interface driver
+	  for basic initialization before QCOM_HIDMA channel driver can
+	  start managing the channels. In a virtualized environment,
+	  the guest OS would run QCOM_HIDMA channel driver and the
+	  host would run the QCOM_HIDMA_MGMT management driver.
+
+config QCOM_HIDMA
+	tristate "Qualcomm Technologies HIDMA Channel support"
+	select DMA_ENGINE
+	help
+	  Enable support for the Qualcomm Technologies HIDMA controller.
+	  The HIDMA controller supports optimized buffer copies
+	  (user to kernel, kernel to kernel, etc.).  It only supports
+	  memcpy interface. The core is not intended for general
+	  purpose slave DMA.
diff --git a/drivers/dma/qcom/Makefile b/drivers/dma/qcom/Makefile
new file mode 100644
index 0000000..1ae92da
--- /dev/null
+++ b/drivers/dma/qcom/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_QCOM_BAM_DMA) += bam_dma.o
+obj-$(CONFIG_QCOM_HIDMA_MGMT) += hdma_mgmt.o
+hdma_mgmt-objs	 := hidma_mgmt.o hidma_mgmt_sys.o
+obj-$(CONFIG_QCOM_HIDMA) +=  hdma.o
+hdma-objs        := hidma_ll.o hidma.o hidma_dbg.o
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
new file mode 100644
index 0000000..1617715
--- /dev/null
+++ b/drivers/dma/qcom/bam_dma.c
@@ -0,0 +1,1477 @@
+/*
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+/*
+ * QCOM BAM DMA engine driver
+ *
+ * QCOM BAM DMA blocks are distributed amongst a number of the on-chip
+ * peripherals on the MSM 8x74.  The configuration of the channels are dependent
+ * on the way they are hard wired to that specific peripheral.  The peripheral
+ * device tree entries specify the configuration of each channel.
+ *
+ * The DMA controller requires the use of external memory for storage of the
+ * hardware descriptors for each channel.  The descriptor FIFO is accessed as a
+ * circular buffer and operations are managed according to the offset within the
+ * FIFO.  After pipe/channel reset, all of the pipe registers and internal state
+ * are back to defaults.
+ *
+ * During DMA operations, we write descriptors to the FIFO, being careful to
+ * handle wrapping and then write the last FIFO offset to that channel's
+ * P_EVNT_REG register to kick off the transaction.  The P_SW_OFSTS register
+ * indicates the current FIFO offset that is being processed, so there is some
+ * indication of where the hardware is currently working.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_dma.h>
+#include <linux/circ_buf.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+struct bam_desc_hw {
+	__le32 addr;		/* Buffer physical address */
+	__le16 size;		/* Buffer size in bytes */
+	__le16 flags;
+};
+
+#define BAM_DMA_AUTOSUSPEND_DELAY 100
+
+#define DESC_FLAG_INT BIT(15)
+#define DESC_FLAG_EOT BIT(14)
+#define DESC_FLAG_EOB BIT(13)
+#define DESC_FLAG_NWD BIT(12)
+#define DESC_FLAG_CMD BIT(11)
+
+struct bam_async_desc {
+	struct virt_dma_desc vd;
+
+	u32 num_desc;
+	u32 xfer_len;
+
+	/* transaction flags, EOT|EOB|NWD */
+	u16 flags;
+
+	struct bam_desc_hw *curr_desc;
+
+	/* list node for the desc in the bam_chan list of descriptors */
+	struct list_head desc_node;
+	enum dma_transfer_direction dir;
+	size_t length;
+	struct bam_desc_hw desc[0];
+};
+
+enum bam_reg {
+	BAM_CTRL,
+	BAM_REVISION,
+	BAM_NUM_PIPES,
+	BAM_DESC_CNT_TRSHLD,
+	BAM_IRQ_SRCS,
+	BAM_IRQ_SRCS_MSK,
+	BAM_IRQ_SRCS_UNMASKED,
+	BAM_IRQ_STTS,
+	BAM_IRQ_CLR,
+	BAM_IRQ_EN,
+	BAM_CNFG_BITS,
+	BAM_IRQ_SRCS_EE,
+	BAM_IRQ_SRCS_MSK_EE,
+	BAM_P_CTRL,
+	BAM_P_RST,
+	BAM_P_HALT,
+	BAM_P_IRQ_STTS,
+	BAM_P_IRQ_CLR,
+	BAM_P_IRQ_EN,
+	BAM_P_EVNT_DEST_ADDR,
+	BAM_P_EVNT_REG,
+	BAM_P_SW_OFSTS,
+	BAM_P_DATA_FIFO_ADDR,
+	BAM_P_DESC_FIFO_ADDR,
+	BAM_P_EVNT_GEN_TRSHLD,
+	BAM_P_FIFO_SIZES,
+};
+
+struct reg_offset_data {
+	u32 base_offset;
+	unsigned int pipe_mult, evnt_mult, ee_mult;
+};
+
+static const struct reg_offset_data bam_v1_3_reg_info[] = {
+	[BAM_CTRL]		= { 0x0F80, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0F84, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x0FBC, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0F88, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x0F8C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0F90, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0FB0, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0F94, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0F98, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x0F9C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x0FFC, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x1800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x1804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x0000, 0x80, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x0004, 0x80, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x0008, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x0010, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x0014, 0x80, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x0018, 0x80, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x102C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1018, 0x00, 0x40, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1000, 0x00, 0x40, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1024, 0x00, 0x40, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x101C, 0x00, 0x40, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1028, 0x00, 0x40, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1020, 0x00, 0x40, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_4_reg_info[] = {
+	[BAM_CTRL]		= { 0x0000, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x0004, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x003C, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x0008, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x000C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x0010, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x0030, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x0014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x0018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x001C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x007C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x0800, 0x00, 0x00, 0x80 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x0804, 0x00, 0x00, 0x80 },
+	[BAM_P_CTRL]		= { 0x1000, 0x1000, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x1004, 0x1000, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x1008, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x1010, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x1014, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x1018, 0x1000, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x182C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x1818, 0x00, 0x1000, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x1800, 0x00, 0x1000, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x1824, 0x00, 0x1000, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x181C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x1828, 0x00, 0x1000, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x1820, 0x00, 0x1000, 0x00 },
+};
+
+static const struct reg_offset_data bam_v1_7_reg_info[] = {
+	[BAM_CTRL]		= { 0x00000, 0x00, 0x00, 0x00 },
+	[BAM_REVISION]		= { 0x01000, 0x00, 0x00, 0x00 },
+	[BAM_NUM_PIPES]		= { 0x01008, 0x00, 0x00, 0x00 },
+	[BAM_DESC_CNT_TRSHLD]	= { 0x00008, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS]		= { 0x03010, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_MSK]	= { 0x03014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_UNMASKED]	= { 0x03018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_STTS]		= { 0x00014, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_CLR]		= { 0x00018, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_EN]		= { 0x0001C, 0x00, 0x00, 0x00 },
+	[BAM_CNFG_BITS]		= { 0x0007C, 0x00, 0x00, 0x00 },
+	[BAM_IRQ_SRCS_EE]	= { 0x03000, 0x00, 0x00, 0x1000 },
+	[BAM_IRQ_SRCS_MSK_EE]	= { 0x03004, 0x00, 0x00, 0x1000 },
+	[BAM_P_CTRL]		= { 0x13000, 0x1000, 0x00, 0x00 },
+	[BAM_P_RST]		= { 0x13004, 0x1000, 0x00, 0x00 },
+	[BAM_P_HALT]		= { 0x13008, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_STTS]	= { 0x13010, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_CLR]		= { 0x13014, 0x1000, 0x00, 0x00 },
+	[BAM_P_IRQ_EN]		= { 0x13018, 0x1000, 0x00, 0x00 },
+	[BAM_P_EVNT_DEST_ADDR]	= { 0x1382C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_REG]	= { 0x13818, 0x00, 0x1000, 0x00 },
+	[BAM_P_SW_OFSTS]	= { 0x13800, 0x00, 0x1000, 0x00 },
+	[BAM_P_DATA_FIFO_ADDR]	= { 0x13824, 0x00, 0x1000, 0x00 },
+	[BAM_P_DESC_FIFO_ADDR]	= { 0x1381C, 0x00, 0x1000, 0x00 },
+	[BAM_P_EVNT_GEN_TRSHLD]	= { 0x13828, 0x00, 0x1000, 0x00 },
+	[BAM_P_FIFO_SIZES]	= { 0x13820, 0x00, 0x1000, 0x00 },
+};
+
+/* BAM CTRL */
+#define BAM_SW_RST			BIT(0)
+#define BAM_EN				BIT(1)
+#define BAM_EN_ACCUM			BIT(4)
+#define BAM_TESTBUS_SEL_SHIFT		5
+#define BAM_TESTBUS_SEL_MASK		0x3F
+#define BAM_DESC_CACHE_SEL_SHIFT	13
+#define BAM_DESC_CACHE_SEL_MASK		0x3
+#define BAM_CACHED_DESC_STORE		BIT(15)
+#define IBC_DISABLE			BIT(16)
+
+/* BAM REVISION */
+#define REVISION_SHIFT		0
+#define REVISION_MASK		0xFF
+#define NUM_EES_SHIFT		8
+#define NUM_EES_MASK		0xF
+#define CE_BUFFER_SIZE		BIT(13)
+#define AXI_ACTIVE		BIT(14)
+#define USE_VMIDMT		BIT(15)
+#define SECURED			BIT(16)
+#define BAM_HAS_NO_BYPASS	BIT(17)
+#define HIGH_FREQUENCY_BAM	BIT(18)
+#define INACTIV_TMRS_EXST	BIT(19)
+#define NUM_INACTIV_TMRS	BIT(20)
+#define DESC_CACHE_DEPTH_SHIFT	21
+#define DESC_CACHE_DEPTH_1	(0 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_2	(1 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_3	(2 << DESC_CACHE_DEPTH_SHIFT)
+#define DESC_CACHE_DEPTH_4	(3 << DESC_CACHE_DEPTH_SHIFT)
+#define CMD_DESC_EN		BIT(23)
+#define INACTIV_TMR_BASE_SHIFT	24
+#define INACTIV_TMR_BASE_MASK	0xFF
+
+/* BAM NUM PIPES */
+#define BAM_NUM_PIPES_SHIFT		0
+#define BAM_NUM_PIPES_MASK		0xFF
+#define PERIPH_NON_PIPE_GRP_SHIFT	16
+#define PERIPH_NON_PIP_GRP_MASK		0xFF
+#define BAM_NON_PIPE_GRP_SHIFT		24
+#define BAM_NON_PIPE_GRP_MASK		0xFF
+
+/* BAM CNFG BITS */
+#define BAM_PIPE_CNFG		BIT(2)
+#define BAM_FULL_PIPE		BIT(11)
+#define BAM_NO_EXT_P_RST	BIT(12)
+#define BAM_IBC_DISABLE		BIT(13)
+#define BAM_SB_CLK_REQ		BIT(14)
+#define BAM_PSM_CSW_REQ		BIT(15)
+#define BAM_PSM_P_RES		BIT(16)
+#define BAM_AU_P_RES		BIT(17)
+#define BAM_SI_P_RES		BIT(18)
+#define BAM_WB_P_RES		BIT(19)
+#define BAM_WB_BLK_CSW		BIT(20)
+#define BAM_WB_CSW_ACK_IDL	BIT(21)
+#define BAM_WB_RETR_SVPNT	BIT(22)
+#define BAM_WB_DSC_AVL_P_RST	BIT(23)
+#define BAM_REG_P_EN		BIT(24)
+#define BAM_PSM_P_HD_DATA	BIT(25)
+#define BAM_AU_ACCUMED		BIT(26)
+#define BAM_CMD_ENABLE		BIT(27)
+
+#define BAM_CNFG_BITS_DEFAULT	(BAM_PIPE_CNFG |	\
+				 BAM_NO_EXT_P_RST |	\
+				 BAM_IBC_DISABLE |	\
+				 BAM_SB_CLK_REQ |	\
+				 BAM_PSM_CSW_REQ |	\
+				 BAM_PSM_P_RES |	\
+				 BAM_AU_P_RES |		\
+				 BAM_SI_P_RES |		\
+				 BAM_WB_P_RES |		\
+				 BAM_WB_BLK_CSW |	\
+				 BAM_WB_CSW_ACK_IDL |	\
+				 BAM_WB_RETR_SVPNT |	\
+				 BAM_WB_DSC_AVL_P_RST |	\
+				 BAM_REG_P_EN |		\
+				 BAM_PSM_P_HD_DATA |	\
+				 BAM_AU_ACCUMED |	\
+				 BAM_CMD_ENABLE)
+
+/* PIPE CTRL */
+#define P_EN			BIT(1)
+#define P_DIRECTION		BIT(3)
+#define P_SYS_STRM		BIT(4)
+#define P_SYS_MODE		BIT(5)
+#define P_AUTO_EOB		BIT(6)
+#define P_AUTO_EOB_SEL_SHIFT	7
+#define P_AUTO_EOB_SEL_512	(0 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_256	(1 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_128	(2 << P_AUTO_EOB_SEL_SHIFT)
+#define P_AUTO_EOB_SEL_64	(3 << P_AUTO_EOB_SEL_SHIFT)
+#define P_PREFETCH_LIMIT_SHIFT	9
+#define P_PREFETCH_LIMIT_32	(0 << P_PREFETCH_LIMIT_SHIFT)
+#define P_PREFETCH_LIMIT_16	(1 << P_PREFETCH_LIMIT_SHIFT)
+#define P_PREFETCH_LIMIT_4	(2 << P_PREFETCH_LIMIT_SHIFT)
+#define P_WRITE_NWD		BIT(11)
+#define P_LOCK_GROUP_SHIFT	16
+#define P_LOCK_GROUP_MASK	0x1F
+
+/* BAM_DESC_CNT_TRSHLD */
+#define CNT_TRSHLD		0xffff
+#define DEFAULT_CNT_THRSHLD	0x4
+
+/* BAM_IRQ_SRCS */
+#define BAM_IRQ			BIT(31)
+#define P_IRQ			0x7fffffff
+
+/* BAM_IRQ_SRCS_MSK */
+#define BAM_IRQ_MSK		BAM_IRQ
+#define P_IRQ_MSK		P_IRQ
+
+/* BAM_IRQ_STTS */
+#define BAM_TIMER_IRQ		BIT(4)
+#define BAM_EMPTY_IRQ		BIT(3)
+#define BAM_ERROR_IRQ		BIT(2)
+#define BAM_HRESP_ERR_IRQ	BIT(1)
+
+/* BAM_IRQ_CLR */
+#define BAM_TIMER_CLR		BIT(4)
+#define BAM_EMPTY_CLR		BIT(3)
+#define BAM_ERROR_CLR		BIT(2)
+#define BAM_HRESP_ERR_CLR	BIT(1)
+
+/* BAM_IRQ_EN */
+#define BAM_TIMER_EN		BIT(4)
+#define BAM_EMPTY_EN		BIT(3)
+#define BAM_ERROR_EN		BIT(2)
+#define BAM_HRESP_ERR_EN	BIT(1)
+
+/* BAM_P_IRQ_EN */
+#define P_PRCSD_DESC_EN		BIT(0)
+#define P_TIMER_EN		BIT(1)
+#define P_WAKE_EN		BIT(2)
+#define P_OUT_OF_DESC_EN	BIT(3)
+#define P_ERR_EN		BIT(4)
+#define P_TRNSFR_END_EN		BIT(5)
+#define P_DEFAULT_IRQS_EN	(P_PRCSD_DESC_EN | P_ERR_EN | P_TRNSFR_END_EN)
+
+/* BAM_P_SW_OFSTS */
+#define P_SW_OFSTS_MASK		0xffff
+
+#define BAM_DESC_FIFO_SIZE	SZ_32K
+#define MAX_DESCRIPTORS (BAM_DESC_FIFO_SIZE / sizeof(struct bam_desc_hw) - 1)
+#define BAM_FIFO_SIZE	(SZ_32K - 8)
+#define IS_BUSY(chan)	(CIRC_SPACE(bchan->tail, bchan->head,\
+			 MAX_DESCRIPTORS + 1) == 0)
+
+struct bam_chan {
+	struct virt_dma_chan vc;
+
+	struct bam_device *bdev;
+
+	/* configuration from device tree */
+	u32 id;
+
+	/* runtime configuration */
+	struct dma_slave_config slave;
+
+	/* fifo storage */
+	struct bam_desc_hw *fifo_virt;
+	dma_addr_t fifo_phys;
+
+	/* fifo markers */
+	unsigned short head;		/* start of active descriptor entries */
+	unsigned short tail;		/* end of active descriptor entries */
+
+	unsigned int initialized;	/* is the channel hw initialized? */
+	unsigned int paused;		/* is the channel paused? */
+	unsigned int reconfigure;	/* new slave config? */
+	/* list of descriptors currently processed */
+	struct list_head desc_list;
+
+	struct list_head node;
+};
+
+static inline struct bam_chan *to_bam_chan(struct dma_chan *common)
+{
+	return container_of(common, struct bam_chan, vc.chan);
+}
+
+struct bam_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct device_dma_parameters dma_parms;
+	struct bam_chan *channels;
+	u32 num_channels;
+	u32 num_ees;
+
+	/* execution environment ID, from DT */
+	u32 ee;
+	bool controlled_remotely;
+
+	const struct reg_offset_data *layout;
+
+	struct clk *bamclk;
+	int irq;
+
+	/* dma start transaction tasklet */
+	struct tasklet_struct task;
+};
+
+/**
+ * bam_addr - returns BAM register address
+ * @bdev: bam device
+ * @pipe: pipe instance (ignored when register doesn't have multiple instances)
+ * @reg:  register enum
+ */
+static inline void __iomem *bam_addr(struct bam_device *bdev, u32 pipe,
+		enum bam_reg reg)
+{
+	const struct reg_offset_data r = bdev->layout[reg];
+
+	return bdev->regs + r.base_offset +
+		r.pipe_mult * pipe +
+		r.evnt_mult * pipe +
+		r.ee_mult * bdev->ee;
+}
+
+/**
+ * bam_reset_channel - Reset individual BAM DMA channel
+ * @bchan: bam channel
+ *
+ * This function resets a specific BAM channel
+ */
+static void bam_reset_channel(struct bam_chan *bchan)
+{
+	struct bam_device *bdev = bchan->bdev;
+
+	lockdep_assert_held(&bchan->vc.lock);
+
+	/* reset channel */
+	writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_RST));
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_RST));
+
+	/* don't allow cpu to reorder BAM register accesses done after this */
+	wmb();
+
+	/* make sure hw is initialized when channel is used the first time  */
+	bchan->initialized = 0;
+}
+
+/**
+ * bam_chan_init_hw - Initialize channel hardware
+ * @bchan: bam channel
+ * @dir: DMA transfer direction
+ *
+ * This function resets and initializes the BAM channel
+ */
+static void bam_chan_init_hw(struct bam_chan *bchan,
+	enum dma_transfer_direction dir)
+{
+	struct bam_device *bdev = bchan->bdev;
+	u32 val;
+
+	/* Reset the channel to clear internal state of the FIFO */
+	bam_reset_channel(bchan);
+
+	/*
+	 * write out 8 byte aligned address.  We have enough space for this
+	 * because we allocated 1 more descriptor (8 bytes) than we can use
+	 */
+	writel_relaxed(ALIGN(bchan->fifo_phys, sizeof(struct bam_desc_hw)),
+			bam_addr(bdev, bchan->id, BAM_P_DESC_FIFO_ADDR));
+	writel_relaxed(BAM_FIFO_SIZE,
+			bam_addr(bdev, bchan->id, BAM_P_FIFO_SIZES));
+
+	/* enable the per pipe interrupts, enable EOT, ERR, and INT irqs */
+	writel_relaxed(P_DEFAULT_IRQS_EN,
+			bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+
+	/* unmask the specific pipe and EE combo */
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+	val |= BIT(bchan->id);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+	/* don't allow cpu to reorder the channel enable done below */
+	wmb();
+
+	/* set fixed direction and mode, then enable channel */
+	val = P_EN | P_SYS_MODE;
+	if (dir == DMA_DEV_TO_MEM)
+		val |= P_DIRECTION;
+
+	writel_relaxed(val, bam_addr(bdev, bchan->id, BAM_P_CTRL));
+
+	bchan->initialized = 1;
+
+	/* init FIFO pointers */
+	bchan->head = 0;
+	bchan->tail = 0;
+}
+
+/**
+ * bam_alloc_chan - Allocate channel resources for DMA channel.
+ * @chan: specified channel
+ *
+ * This function allocates the FIFO descriptor memory
+ */
+static int bam_alloc_chan(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->bdev;
+
+	if (bchan->fifo_virt)
+		return 0;
+
+	/* allocate FIFO descriptor space, but only if necessary */
+	bchan->fifo_virt = dma_alloc_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+					&bchan->fifo_phys, GFP_KERNEL);
+
+	if (!bchan->fifo_virt) {
+		dev_err(bdev->dev, "Failed to allocate desc fifo\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int bam_pm_runtime_get_sync(struct device *dev)
+{
+	if (pm_runtime_enabled(dev))
+		return pm_runtime_get_sync(dev);
+
+	return 0;
+}
+
+/**
+ * bam_free_chan - Frees dma resources associated with specific channel
+ * @chan: specified channel
+ *
+ * Free the allocated fifo descriptor memory and channel resources
+ *
+ */
+static void bam_free_chan(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->bdev;
+	u32 val;
+	unsigned long flags;
+	int ret;
+
+	ret = bam_pm_runtime_get_sync(bdev->dev);
+	if (ret < 0)
+		return;
+
+	vchan_free_chan_resources(to_virt_chan(chan));
+
+	if (!list_empty(&bchan->desc_list)) {
+		dev_err(bchan->bdev->dev, "Cannot free busy channel\n");
+		goto err;
+	}
+
+	spin_lock_irqsave(&bchan->vc.lock, flags);
+	bam_reset_channel(bchan);
+	spin_unlock_irqrestore(&bchan->vc.lock, flags);
+
+	dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE, bchan->fifo_virt,
+		    bchan->fifo_phys);
+	bchan->fifo_virt = NULL;
+
+	/* mask irq for pipe/channel */
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+	val &= ~BIT(bchan->id);
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+	/* disable irq */
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_IRQ_EN));
+
+err:
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
+}
+
+/**
+ * bam_slave_config - set slave configuration for channel
+ * @chan: dma channel
+ * @cfg: slave configuration
+ *
+ * Sets slave configuration for channel
+ *
+ */
+static int bam_slave_config(struct dma_chan *chan,
+			    struct dma_slave_config *cfg)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	unsigned long flag;
+
+	spin_lock_irqsave(&bchan->vc.lock, flag);
+	memcpy(&bchan->slave, cfg, sizeof(*cfg));
+	bchan->reconfigure = 1;
+	spin_unlock_irqrestore(&bchan->vc.lock, flag);
+
+	return 0;
+}
+
+/**
+ * bam_prep_slave_sg - Prep slave sg transaction
+ *
+ * @chan: dma channel
+ * @sgl: scatter gather list
+ * @sg_len: length of sg
+ * @direction: DMA transfer direction
+ * @flags: DMA flags
+ * @context: transfer context (unused)
+ */
+static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->bdev;
+	struct bam_async_desc *async_desc;
+	struct scatterlist *sg;
+	u32 i;
+	struct bam_desc_hw *desc;
+	unsigned int num_alloc = 0;
+
+
+	if (!is_slave_direction(direction)) {
+		dev_err(bdev->dev, "invalid dma direction\n");
+		return NULL;
+	}
+
+	/* calculate number of required entries */
+	for_each_sg(sgl, sg, sg_len, i)
+		num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE);
+
+	/* allocate enough room to accomodate the number of entries */
+	async_desc = kzalloc(sizeof(*async_desc) +
+			(num_alloc * sizeof(struct bam_desc_hw)), GFP_NOWAIT);
+
+	if (!async_desc)
+		goto err_out;
+
+	if (flags & DMA_PREP_FENCE)
+		async_desc->flags |= DESC_FLAG_NWD;
+
+	if (flags & DMA_PREP_INTERRUPT)
+		async_desc->flags |= DESC_FLAG_EOT;
+
+	async_desc->num_desc = num_alloc;
+	async_desc->curr_desc = async_desc->desc;
+	async_desc->dir = direction;
+
+	/* fill in temporary descriptors */
+	desc = async_desc->desc;
+	for_each_sg(sgl, sg, sg_len, i) {
+		unsigned int remainder = sg_dma_len(sg);
+		unsigned int curr_offset = 0;
+
+		do {
+			if (flags & DMA_PREP_CMD)
+				desc->flags |= cpu_to_le16(DESC_FLAG_CMD);
+
+			desc->addr = cpu_to_le32(sg_dma_address(sg) +
+						 curr_offset);
+
+			if (remainder > BAM_FIFO_SIZE) {
+				desc->size = cpu_to_le16(BAM_FIFO_SIZE);
+				remainder -= BAM_FIFO_SIZE;
+				curr_offset += BAM_FIFO_SIZE;
+			} else {
+				desc->size = cpu_to_le16(remainder);
+				remainder = 0;
+			}
+
+			async_desc->length += le16_to_cpu(desc->size);
+			desc++;
+		} while (remainder > 0);
+	}
+
+	return vchan_tx_prep(&bchan->vc, &async_desc->vd, flags);
+
+err_out:
+	kfree(async_desc);
+	return NULL;
+}
+
+/**
+ * bam_dma_terminate_all - terminate all transactions on a channel
+ * @chan: bam dma channel
+ *
+ * Dequeues and frees all transactions
+ * No callbacks are done
+ *
+ */
+static int bam_dma_terminate_all(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_async_desc *async_desc, *tmp;
+	unsigned long flag;
+	LIST_HEAD(head);
+
+	/* remove all transactions, including active transaction */
+	spin_lock_irqsave(&bchan->vc.lock, flag);
+	list_for_each_entry_safe(async_desc, tmp,
+				 &bchan->desc_list, desc_node) {
+		list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
+		list_del(&async_desc->desc_node);
+	}
+
+	vchan_get_all_descriptors(&bchan->vc, &head);
+	spin_unlock_irqrestore(&bchan->vc.lock, flag);
+
+	vchan_dma_desc_free_list(&bchan->vc, &head);
+
+	return 0;
+}
+
+/**
+ * bam_pause - Pause DMA channel
+ * @chan: dma channel
+ *
+ */
+static int bam_pause(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->bdev;
+	unsigned long flag;
+	int ret;
+
+	ret = bam_pm_runtime_get_sync(bdev->dev);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irqsave(&bchan->vc.lock, flag);
+	writel_relaxed(1, bam_addr(bdev, bchan->id, BAM_P_HALT));
+	bchan->paused = 1;
+	spin_unlock_irqrestore(&bchan->vc.lock, flag);
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
+
+	return 0;
+}
+
+/**
+ * bam_resume - Resume DMA channel operations
+ * @chan: dma channel
+ *
+ */
+static int bam_resume(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_device *bdev = bchan->bdev;
+	unsigned long flag;
+	int ret;
+
+	ret = bam_pm_runtime_get_sync(bdev->dev);
+	if (ret < 0)
+		return ret;
+
+	spin_lock_irqsave(&bchan->vc.lock, flag);
+	writel_relaxed(0, bam_addr(bdev, bchan->id, BAM_P_HALT));
+	bchan->paused = 0;
+	spin_unlock_irqrestore(&bchan->vc.lock, flag);
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
+
+	return 0;
+}
+
+/**
+ * process_channel_irqs - processes the channel interrupts
+ * @bdev: bam controller
+ *
+ * This function processes the channel interrupts
+ *
+ */
+static u32 process_channel_irqs(struct bam_device *bdev)
+{
+	u32 i, srcs, pipe_stts, offset, avail;
+	unsigned long flags;
+	struct bam_async_desc *async_desc, *tmp;
+
+	srcs = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_SRCS_EE));
+
+	/* return early if no pipe/channel interrupts are present */
+	if (!(srcs & P_IRQ))
+		return srcs;
+
+	for (i = 0; i < bdev->num_channels; i++) {
+		struct bam_chan *bchan = &bdev->channels[i];
+
+		if (!(srcs & BIT(i)))
+			continue;
+
+		/* clear pipe irq */
+		pipe_stts = readl_relaxed(bam_addr(bdev, i, BAM_P_IRQ_STTS));
+
+		writel_relaxed(pipe_stts, bam_addr(bdev, i, BAM_P_IRQ_CLR));
+
+		spin_lock_irqsave(&bchan->vc.lock, flags);
+
+		offset = readl_relaxed(bam_addr(bdev, i, BAM_P_SW_OFSTS)) &
+				       P_SW_OFSTS_MASK;
+		offset /= sizeof(struct bam_desc_hw);
+
+		/* Number of bytes available to read */
+		avail = CIRC_CNT(offset, bchan->head, MAX_DESCRIPTORS + 1);
+
+		list_for_each_entry_safe(async_desc, tmp,
+					 &bchan->desc_list, desc_node) {
+			/* Not enough data to read */
+			if (avail < async_desc->xfer_len)
+				break;
+
+			/* manage FIFO */
+			bchan->head += async_desc->xfer_len;
+			bchan->head %= MAX_DESCRIPTORS;
+
+			async_desc->num_desc -= async_desc->xfer_len;
+			async_desc->curr_desc += async_desc->xfer_len;
+			avail -= async_desc->xfer_len;
+
+			/*
+			 * if complete, process cookie. Otherwise
+			 * push back to front of desc_issued so that
+			 * it gets restarted by the tasklet
+			 */
+			if (!async_desc->num_desc) {
+				vchan_cookie_complete(&async_desc->vd);
+			} else {
+				list_add(&async_desc->vd.node,
+					 &bchan->vc.desc_issued);
+			}
+			list_del(&async_desc->desc_node);
+		}
+
+		spin_unlock_irqrestore(&bchan->vc.lock, flags);
+	}
+
+	return srcs;
+}
+
+/**
+ * bam_dma_irq - irq handler for bam controller
+ * @irq: IRQ of interrupt
+ * @data: callback data
+ *
+ * IRQ handler for the bam controller
+ */
+static irqreturn_t bam_dma_irq(int irq, void *data)
+{
+	struct bam_device *bdev = data;
+	u32 clr_mask = 0, srcs = 0;
+	int ret;
+
+	srcs |= process_channel_irqs(bdev);
+
+	/* kick off tasklet to start next dma transfer */
+	if (srcs & P_IRQ)
+		tasklet_schedule(&bdev->task);
+
+	ret = bam_pm_runtime_get_sync(bdev->dev);
+	if (ret < 0)
+		return ret;
+
+	if (srcs & BAM_IRQ) {
+		clr_mask = readl_relaxed(bam_addr(bdev, 0, BAM_IRQ_STTS));
+
+		/*
+		 * don't allow reorder of the various accesses to the BAM
+		 * registers
+		 */
+		mb();
+
+		writel_relaxed(clr_mask, bam_addr(bdev, 0, BAM_IRQ_CLR));
+	}
+
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * bam_tx_status - returns status of transaction
+ * @chan: dma channel
+ * @cookie: transaction cookie
+ * @txstate: DMA transaction state
+ *
+ * Return status of dma transaction
+ */
+static enum dma_status bam_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	struct bam_async_desc *async_desc;
+	struct virt_dma_desc *vd;
+	int ret;
+	size_t residue = 0;
+	unsigned int i;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!txstate)
+		return bchan->paused ? DMA_PAUSED : ret;
+
+	spin_lock_irqsave(&bchan->vc.lock, flags);
+	vd = vchan_find_desc(&bchan->vc, cookie);
+	if (vd) {
+		residue = container_of(vd, struct bam_async_desc, vd)->length;
+	} else {
+		list_for_each_entry(async_desc, &bchan->desc_list, desc_node) {
+			if (async_desc->vd.tx.cookie != cookie)
+				continue;
+
+			for (i = 0; i < async_desc->num_desc; i++)
+				residue += le16_to_cpu(
+						async_desc->curr_desc[i].size);
+		}
+	}
+
+	spin_unlock_irqrestore(&bchan->vc.lock, flags);
+
+	dma_set_residue(txstate, residue);
+
+	if (ret == DMA_IN_PROGRESS && bchan->paused)
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+/**
+ * bam_apply_new_config
+ * @bchan: bam dma channel
+ * @dir: DMA direction
+ */
+static void bam_apply_new_config(struct bam_chan *bchan,
+	enum dma_transfer_direction dir)
+{
+	struct bam_device *bdev = bchan->bdev;
+	u32 maxburst;
+
+	if (!bdev->controlled_remotely) {
+		if (dir == DMA_DEV_TO_MEM)
+			maxburst = bchan->slave.src_maxburst;
+		else
+			maxburst = bchan->slave.dst_maxburst;
+
+		writel_relaxed(maxburst,
+			       bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+	}
+
+	bchan->reconfigure = 0;
+}
+
+/**
+ * bam_start_dma - start next transaction
+ * @bchan: bam dma channel
+ */
+static void bam_start_dma(struct bam_chan *bchan)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&bchan->vc);
+	struct bam_device *bdev = bchan->bdev;
+	struct bam_async_desc *async_desc = NULL;
+	struct bam_desc_hw *desc;
+	struct bam_desc_hw *fifo = PTR_ALIGN(bchan->fifo_virt,
+					sizeof(struct bam_desc_hw));
+	int ret;
+	unsigned int avail;
+	struct dmaengine_desc_callback cb;
+
+	lockdep_assert_held(&bchan->vc.lock);
+
+	if (!vd)
+		return;
+
+	ret = bam_pm_runtime_get_sync(bdev->dev);
+	if (ret < 0)
+		return;
+
+	while (vd && !IS_BUSY(bchan)) {
+		list_del(&vd->node);
+
+		async_desc = container_of(vd, struct bam_async_desc, vd);
+
+		/* on first use, initialize the channel hardware */
+		if (!bchan->initialized)
+			bam_chan_init_hw(bchan, async_desc->dir);
+
+		/* apply new slave config changes, if necessary */
+		if (bchan->reconfigure)
+			bam_apply_new_config(bchan, async_desc->dir);
+
+		desc = async_desc->curr_desc;
+		avail = CIRC_SPACE(bchan->tail, bchan->head,
+				   MAX_DESCRIPTORS + 1);
+
+		if (async_desc->num_desc > avail)
+			async_desc->xfer_len = avail;
+		else
+			async_desc->xfer_len = async_desc->num_desc;
+
+		/* set any special flags on the last descriptor */
+		if (async_desc->num_desc == async_desc->xfer_len)
+			desc[async_desc->xfer_len - 1].flags |=
+						cpu_to_le16(async_desc->flags);
+
+		vd = vchan_next_desc(&bchan->vc);
+
+		dmaengine_desc_get_callback(&async_desc->vd.tx, &cb);
+
+		/*
+		 * An interrupt is generated at this desc, if
+		 *  - FIFO is FULL.
+		 *  - No more descriptors to add.
+		 *  - If a callback completion was requested for this DESC,
+		 *     In this case, BAM will deliver the completion callback
+		 *     for this desc and continue processing the next desc.
+		 */
+		if (((avail <= async_desc->xfer_len) || !vd ||
+		     dmaengine_desc_callback_valid(&cb)) &&
+		    !(async_desc->flags & DESC_FLAG_EOT))
+			desc[async_desc->xfer_len - 1].flags |=
+				cpu_to_le16(DESC_FLAG_INT);
+
+		if (bchan->tail + async_desc->xfer_len > MAX_DESCRIPTORS) {
+			u32 partial = MAX_DESCRIPTORS - bchan->tail;
+
+			memcpy(&fifo[bchan->tail], desc,
+			       partial * sizeof(struct bam_desc_hw));
+			memcpy(fifo, &desc[partial],
+			       (async_desc->xfer_len - partial) *
+				sizeof(struct bam_desc_hw));
+		} else {
+			memcpy(&fifo[bchan->tail], desc,
+			       async_desc->xfer_len *
+			       sizeof(struct bam_desc_hw));
+		}
+
+		bchan->tail += async_desc->xfer_len;
+		bchan->tail %= MAX_DESCRIPTORS;
+		list_add_tail(&async_desc->desc_node, &bchan->desc_list);
+	}
+
+	/* ensure descriptor writes and dma start not reordered */
+	wmb();
+	writel_relaxed(bchan->tail * sizeof(struct bam_desc_hw),
+			bam_addr(bdev, bchan->id, BAM_P_EVNT_REG));
+
+	pm_runtime_mark_last_busy(bdev->dev);
+	pm_runtime_put_autosuspend(bdev->dev);
+}
+
+/**
+ * dma_tasklet - DMA IRQ tasklet
+ * @data: tasklet argument (bam controller structure)
+ *
+ * Sets up next DMA operation and then processes all completed transactions
+ */
+static void dma_tasklet(unsigned long data)
+{
+	struct bam_device *bdev = (struct bam_device *)data;
+	struct bam_chan *bchan;
+	unsigned long flags;
+	unsigned int i;
+
+	/* go through the channels and kick off transactions */
+	for (i = 0; i < bdev->num_channels; i++) {
+		bchan = &bdev->channels[i];
+		spin_lock_irqsave(&bchan->vc.lock, flags);
+
+		if (!list_empty(&bchan->vc.desc_issued) && !IS_BUSY(bchan))
+			bam_start_dma(bchan);
+		spin_unlock_irqrestore(&bchan->vc.lock, flags);
+	}
+
+}
+
+/**
+ * bam_issue_pending - starts pending transactions
+ * @chan: dma channel
+ *
+ * Calls tasklet directly which in turn starts any pending transactions
+ */
+static void bam_issue_pending(struct dma_chan *chan)
+{
+	struct bam_chan *bchan = to_bam_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&bchan->vc.lock, flags);
+
+	/* if work pending and idle, start a transaction */
+	if (vchan_issue_pending(&bchan->vc) && !IS_BUSY(bchan))
+		bam_start_dma(bchan);
+
+	spin_unlock_irqrestore(&bchan->vc.lock, flags);
+}
+
+/**
+ * bam_dma_free_desc - free descriptor memory
+ * @vd: virtual descriptor
+ *
+ */
+static void bam_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct bam_async_desc *async_desc = container_of(vd,
+			struct bam_async_desc, vd);
+
+	kfree(async_desc);
+}
+
+static struct dma_chan *bam_dma_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *of)
+{
+	struct bam_device *bdev = container_of(of->of_dma_data,
+					struct bam_device, common);
+	unsigned int request;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	request = dma_spec->args[0];
+	if (request >= bdev->num_channels)
+		return NULL;
+
+	return dma_get_slave_channel(&(bdev->channels[request].vc.chan));
+}
+
+/**
+ * bam_init
+ * @bdev: bam device
+ *
+ * Initialization helper for global bam registers
+ */
+static int bam_init(struct bam_device *bdev)
+{
+	u32 val;
+
+	/* read revision and configuration information */
+	if (!bdev->num_ees) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_REVISION));
+		bdev->num_ees = (val >> NUM_EES_SHIFT) & NUM_EES_MASK;
+	}
+
+	/* check that configured EE is within range */
+	if (bdev->ee >= bdev->num_ees)
+		return -EINVAL;
+
+	if (!bdev->num_channels) {
+		val = readl_relaxed(bam_addr(bdev, 0, BAM_NUM_PIPES));
+		bdev->num_channels = val & BAM_NUM_PIPES_MASK;
+	}
+
+	if (bdev->controlled_remotely)
+		return 0;
+
+	/* s/w reset bam */
+	/* after reset all pipes are disabled and idle */
+	val = readl_relaxed(bam_addr(bdev, 0, BAM_CTRL));
+	val |= BAM_SW_RST;
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+	val &= ~BAM_SW_RST;
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+
+	/* make sure previous stores are visible before enabling BAM */
+	wmb();
+
+	/* enable bam */
+	val |= BAM_EN;
+	writel_relaxed(val, bam_addr(bdev, 0, BAM_CTRL));
+
+	/* set descriptor threshhold, start with 4 bytes */
+	writel_relaxed(DEFAULT_CNT_THRSHLD,
+			bam_addr(bdev, 0, BAM_DESC_CNT_TRSHLD));
+
+	/* Enable default set of h/w workarounds, ie all except BAM_FULL_PIPE */
+	writel_relaxed(BAM_CNFG_BITS_DEFAULT, bam_addr(bdev, 0, BAM_CNFG_BITS));
+
+	/* enable irqs for errors */
+	writel_relaxed(BAM_ERROR_EN | BAM_HRESP_ERR_EN,
+			bam_addr(bdev, 0, BAM_IRQ_EN));
+
+	/* unmask global bam interrupt */
+	writel_relaxed(BAM_IRQ_MSK, bam_addr(bdev, 0, BAM_IRQ_SRCS_MSK_EE));
+
+	return 0;
+}
+
+static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
+	u32 index)
+{
+	bchan->id = index;
+	bchan->bdev = bdev;
+
+	vchan_init(&bchan->vc, &bdev->common);
+	bchan->vc.desc_free = bam_dma_free_desc;
+	INIT_LIST_HEAD(&bchan->desc_list);
+}
+
+static const struct of_device_id bam_of_match[] = {
+	{ .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
+	{ .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+	{ .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, bam_of_match);
+
+static int bam_dma_probe(struct platform_device *pdev)
+{
+	struct bam_device *bdev;
+	const struct of_device_id *match;
+	struct resource *iores;
+	int ret, i;
+
+	bdev = devm_kzalloc(&pdev->dev, sizeof(*bdev), GFP_KERNEL);
+	if (!bdev)
+		return -ENOMEM;
+
+	bdev->dev = &pdev->dev;
+
+	match = of_match_node(bam_of_match, pdev->dev.of_node);
+	if (!match) {
+		dev_err(&pdev->dev, "Unsupported BAM module\n");
+		return -ENODEV;
+	}
+
+	bdev->layout = match->data;
+
+	iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	bdev->regs = devm_ioremap_resource(&pdev->dev, iores);
+	if (IS_ERR(bdev->regs))
+		return PTR_ERR(bdev->regs);
+
+	bdev->irq = platform_get_irq(pdev, 0);
+	if (bdev->irq < 0)
+		return bdev->irq;
+
+	ret = of_property_read_u32(pdev->dev.of_node, "qcom,ee", &bdev->ee);
+	if (ret) {
+		dev_err(bdev->dev, "Execution environment unspecified\n");
+		return ret;
+	}
+
+	bdev->controlled_remotely = of_property_read_bool(pdev->dev.of_node,
+						"qcom,controlled-remotely");
+
+	if (bdev->controlled_remotely) {
+		ret = of_property_read_u32(pdev->dev.of_node, "num-channels",
+					   &bdev->num_channels);
+		if (ret)
+			dev_err(bdev->dev, "num-channels unspecified in dt\n");
+
+		ret = of_property_read_u32(pdev->dev.of_node, "qcom,num-ees",
+					   &bdev->num_ees);
+		if (ret)
+			dev_err(bdev->dev, "num-ees unspecified in dt\n");
+	}
+
+	bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
+	if (IS_ERR(bdev->bamclk)) {
+		if (!bdev->controlled_remotely)
+			return PTR_ERR(bdev->bamclk);
+
+		bdev->bamclk = NULL;
+	}
+
+	ret = clk_prepare_enable(bdev->bamclk);
+	if (ret) {
+		dev_err(bdev->dev, "failed to prepare/enable clock\n");
+		return ret;
+	}
+
+	ret = bam_init(bdev);
+	if (ret)
+		goto err_disable_clk;
+
+	tasklet_init(&bdev->task, dma_tasklet, (unsigned long)bdev);
+
+	bdev->channels = devm_kcalloc(bdev->dev, bdev->num_channels,
+				sizeof(*bdev->channels), GFP_KERNEL);
+
+	if (!bdev->channels) {
+		ret = -ENOMEM;
+		goto err_tasklet_kill;
+	}
+
+	/* allocate and initialize channels */
+	INIT_LIST_HEAD(&bdev->common.channels);
+
+	for (i = 0; i < bdev->num_channels; i++)
+		bam_channel_init(bdev, &bdev->channels[i], i);
+
+	ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
+			IRQF_TRIGGER_HIGH, "bam_dma", bdev);
+	if (ret)
+		goto err_bam_channel_exit;
+
+	/* set max dma segment size */
+	bdev->common.dev = bdev->dev;
+	bdev->common.dev->dma_parms = &bdev->dma_parms;
+	ret = dma_set_max_seg_size(bdev->common.dev, BAM_FIFO_SIZE);
+	if (ret) {
+		dev_err(bdev->dev, "cannot set maximum segment size\n");
+		goto err_bam_channel_exit;
+	}
+
+	platform_set_drvdata(pdev, bdev);
+
+	/* set capabilities */
+	dma_cap_zero(bdev->common.cap_mask);
+	dma_cap_set(DMA_SLAVE, bdev->common.cap_mask);
+
+	/* initialize dmaengine apis */
+	bdev->common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	bdev->common.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	bdev->common.src_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	bdev->common.dst_addr_widths = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	bdev->common.device_alloc_chan_resources = bam_alloc_chan;
+	bdev->common.device_free_chan_resources = bam_free_chan;
+	bdev->common.device_prep_slave_sg = bam_prep_slave_sg;
+	bdev->common.device_config = bam_slave_config;
+	bdev->common.device_pause = bam_pause;
+	bdev->common.device_resume = bam_resume;
+	bdev->common.device_terminate_all = bam_dma_terminate_all;
+	bdev->common.device_issue_pending = bam_issue_pending;
+	bdev->common.device_tx_status = bam_tx_status;
+	bdev->common.dev = bdev->dev;
+
+	ret = dma_async_device_register(&bdev->common);
+	if (ret) {
+		dev_err(bdev->dev, "failed to register dma async device\n");
+		goto err_bam_channel_exit;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
+					&bdev->common);
+	if (ret)
+		goto err_unregister_dma;
+
+	if (bdev->controlled_remotely) {
+		pm_runtime_disable(&pdev->dev);
+		return 0;
+	}
+
+	pm_runtime_irq_safe(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	return 0;
+
+err_unregister_dma:
+	dma_async_device_unregister(&bdev->common);
+err_bam_channel_exit:
+	for (i = 0; i < bdev->num_channels; i++)
+		tasklet_kill(&bdev->channels[i].vc.task);
+err_tasklet_kill:
+	tasklet_kill(&bdev->task);
+err_disable_clk:
+	clk_disable_unprepare(bdev->bamclk);
+
+	return ret;
+}
+
+static int bam_dma_remove(struct platform_device *pdev)
+{
+	struct bam_device *bdev = platform_get_drvdata(pdev);
+	u32 i;
+
+	pm_runtime_force_suspend(&pdev->dev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&bdev->common);
+
+	/* mask all interrupts for this execution environment */
+	writel_relaxed(0, bam_addr(bdev, 0,  BAM_IRQ_SRCS_MSK_EE));
+
+	devm_free_irq(bdev->dev, bdev->irq, bdev);
+
+	for (i = 0; i < bdev->num_channels; i++) {
+		bam_dma_terminate_all(&bdev->channels[i].vc.chan);
+		tasklet_kill(&bdev->channels[i].vc.task);
+
+		if (!bdev->channels[i].fifo_virt)
+			continue;
+
+		dma_free_wc(bdev->dev, BAM_DESC_FIFO_SIZE,
+			    bdev->channels[i].fifo_virt,
+			    bdev->channels[i].fifo_phys);
+	}
+
+	tasklet_kill(&bdev->task);
+
+	clk_disable_unprepare(bdev->bamclk);
+
+	return 0;
+}
+
+static int __maybe_unused bam_dma_runtime_suspend(struct device *dev)
+{
+	struct bam_device *bdev = dev_get_drvdata(dev);
+
+	clk_disable(bdev->bamclk);
+
+	return 0;
+}
+
+static int __maybe_unused bam_dma_runtime_resume(struct device *dev)
+{
+	struct bam_device *bdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_enable(bdev->bamclk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused bam_dma_suspend(struct device *dev)
+{
+	struct bam_device *bdev = dev_get_drvdata(dev);
+
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_suspend(dev);
+
+	clk_unprepare(bdev->bamclk);
+
+	return 0;
+}
+
+static int __maybe_unused bam_dma_resume(struct device *dev)
+{
+	struct bam_device *bdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare(bdev->bamclk);
+	if (ret)
+		return ret;
+
+	if (!bdev->controlled_remotely)
+		pm_runtime_force_resume(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops bam_dma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(bam_dma_suspend, bam_dma_resume)
+	SET_RUNTIME_PM_OPS(bam_dma_runtime_suspend, bam_dma_runtime_resume,
+				NULL)
+};
+
+static struct platform_driver bam_dma_driver = {
+	.probe = bam_dma_probe,
+	.remove = bam_dma_remove,
+	.driver = {
+		.name = "bam-dma-engine",
+		.pm = &bam_dma_pm_ops,
+		.of_match_table = bam_of_match,
+	},
+};
+
+module_platform_driver(bam_dma_driver);
+
+MODULE_AUTHOR("Andy Gross <agross@codeaurora.org>");
+MODULE_DESCRIPTION("QCOM BAM DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
new file mode 100644
index 0000000..43d4b00
--- /dev/null
+++ b/drivers/dma/qcom/hidma.c
@@ -0,0 +1,977 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine interface
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009;  for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/* Linux Foundation elects GPLv2 license only. */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/delay.h>
+#include <linux/acpi.h>
+#include <linux/irq.h>
+#include <linux/atomic.h>
+#include <linux/pm_runtime.h>
+#include <linux/msi.h>
+
+#include "../dmaengine.h"
+#include "hidma.h"
+
+/*
+ * Default idle time is 2 seconds. This parameter can
+ * be overridden by changing the following
+ * /sys/bus/platform/devices/QCOM8061:<xy>/power/autosuspend_delay_ms
+ * during kernel boot.
+ */
+#define HIDMA_AUTOSUSPEND_TIMEOUT		2000
+#define HIDMA_ERR_INFO_SW			0xFF
+#define HIDMA_ERR_CODE_UNEXPECTED_TERMINATE	0x0
+#define HIDMA_NR_DEFAULT_DESC			10
+#define HIDMA_MSI_INTS				11
+
+static inline struct hidma_dev *to_hidma_dev(struct dma_device *dmadev)
+{
+	return container_of(dmadev, struct hidma_dev, ddev);
+}
+
+static inline
+struct hidma_dev *to_hidma_dev_from_lldev(struct hidma_lldev **_lldevp)
+{
+	return container_of(_lldevp, struct hidma_dev, lldev);
+}
+
+static inline struct hidma_chan *to_hidma_chan(struct dma_chan *dmach)
+{
+	return container_of(dmach, struct hidma_chan, chan);
+}
+
+static inline
+struct hidma_desc *to_hidma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct hidma_desc, desc);
+}
+
+static void hidma_free(struct hidma_dev *dmadev)
+{
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+}
+
+static unsigned int nr_desc_prm;
+module_param(nr_desc_prm, uint, 0644);
+MODULE_PARM_DESC(nr_desc_prm, "number of descriptors (default: 0)");
+
+enum hidma_cap {
+	HIDMA_MSI_CAP = 1,
+	HIDMA_IDENTITY_CAP,
+};
+
+/* process completed descriptors */
+static void hidma_process_completed(struct hidma_chan *mchan)
+{
+	struct dma_device *ddev = mchan->chan.device;
+	struct hidma_dev *mdma = to_hidma_dev(ddev);
+	struct dma_async_tx_descriptor *desc;
+	dma_cookie_t last_cookie;
+	struct hidma_desc *mdesc;
+	struct hidma_desc *next;
+	unsigned long irqflags;
+	struct list_head list;
+
+	INIT_LIST_HEAD(&list);
+
+	/* Get all completed descriptors */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_splice_tail_init(&mchan->completed, &list);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	/* Execute callbacks and run dependencies */
+	list_for_each_entry_safe(mdesc, next, &list, node) {
+		enum dma_status llstat;
+		struct dmaengine_desc_callback cb;
+		struct dmaengine_result result;
+
+		desc = &mdesc->desc;
+		last_cookie = desc->cookie;
+
+		spin_lock_irqsave(&mchan->lock, irqflags);
+		dma_cookie_complete(desc);
+		spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+		llstat = hidma_ll_status(mdma->lldev, mdesc->tre_ch);
+		dmaengine_desc_get_callback(desc, &cb);
+
+		dma_run_dependencies(desc);
+
+		spin_lock_irqsave(&mchan->lock, irqflags);
+		list_move(&mdesc->node, &mchan->free);
+
+		if (llstat == DMA_COMPLETE) {
+			mchan->last_success = last_cookie;
+			result.result = DMA_TRANS_NOERROR;
+		} else
+			result.result = DMA_TRANS_ABORTED;
+
+		spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+		dmaengine_desc_callback_invoke(&cb, &result);
+	}
+}
+
+/*
+ * Called once for each submitted descriptor.
+ * PM is locked once for each descriptor that is currently
+ * in execution.
+ */
+static void hidma_callback(void *data)
+{
+	struct hidma_desc *mdesc = data;
+	struct hidma_chan *mchan = to_hidma_chan(mdesc->desc.chan);
+	struct dma_device *ddev = mchan->chan.device;
+	struct hidma_dev *dmadev = to_hidma_dev(ddev);
+	unsigned long irqflags;
+	bool queued = false;
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (mdesc->node.next) {
+		/* Delete from the active list, add to completed list */
+		list_move_tail(&mdesc->node, &mchan->completed);
+		queued = true;
+
+		/* calculate the next running descriptor */
+		mchan->running = list_first_entry(&mchan->active,
+						  struct hidma_desc, node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	hidma_process_completed(mchan);
+
+	if (queued) {
+		pm_runtime_mark_last_busy(dmadev->ddev.dev);
+		pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	}
+}
+
+static int hidma_chan_init(struct hidma_dev *dmadev, u32 dma_sig)
+{
+	struct hidma_chan *mchan;
+	struct dma_device *ddev;
+
+	mchan = devm_kzalloc(dmadev->ddev.dev, sizeof(*mchan), GFP_KERNEL);
+	if (!mchan)
+		return -ENOMEM;
+
+	ddev = &dmadev->ddev;
+	mchan->dma_sig = dma_sig;
+	mchan->dmadev = dmadev;
+	mchan->chan.device = ddev;
+	dma_cookie_init(&mchan->chan);
+
+	INIT_LIST_HEAD(&mchan->free);
+	INIT_LIST_HEAD(&mchan->prepared);
+	INIT_LIST_HEAD(&mchan->active);
+	INIT_LIST_HEAD(&mchan->completed);
+	INIT_LIST_HEAD(&mchan->queued);
+
+	spin_lock_init(&mchan->lock);
+	list_add_tail(&mchan->chan.device_node, &ddev->channels);
+	dmadev->ddev.chancnt++;
+	return 0;
+}
+
+static void hidma_issue_task(unsigned long arg)
+{
+	struct hidma_dev *dmadev = (struct hidma_dev *)arg;
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	hidma_ll_start(dmadev->lldev);
+}
+
+static void hidma_issue_pending(struct dma_chan *dmach)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_dev *dmadev = mchan->dmadev;
+	unsigned long flags;
+	struct hidma_desc *qdesc, *next;
+	int status;
+
+	spin_lock_irqsave(&mchan->lock, flags);
+	list_for_each_entry_safe(qdesc, next, &mchan->queued, node) {
+		hidma_ll_queue_request(dmadev->lldev, qdesc->tre_ch);
+		list_move_tail(&qdesc->node, &mchan->active);
+	}
+
+	if (!mchan->running) {
+		struct hidma_desc *desc = list_first_entry(&mchan->active,
+							   struct hidma_desc,
+							   node);
+		mchan->running = desc;
+	}
+	spin_unlock_irqrestore(&mchan->lock, flags);
+
+	/* PM will be released in hidma_callback function. */
+	status = pm_runtime_get(dmadev->ddev.dev);
+	if (status < 0)
+		tasklet_schedule(&dmadev->task);
+	else
+		hidma_ll_start(dmadev->lldev);
+}
+
+static inline bool hidma_txn_is_success(dma_cookie_t cookie,
+		dma_cookie_t last_success, dma_cookie_t last_used)
+{
+	if (last_success <= last_used) {
+		if ((cookie <= last_success) || (cookie > last_used))
+			return true;
+	} else {
+		if ((cookie <= last_success) && (cookie > last_used))
+			return true;
+	}
+	return false;
+}
+
+static enum dma_status hidma_tx_status(struct dma_chan *dmach,
+				       dma_cookie_t cookie,
+				       struct dma_tx_state *txstate)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dmach, cookie, txstate);
+	if (ret == DMA_COMPLETE) {
+		bool is_success;
+
+		is_success = hidma_txn_is_success(cookie, mchan->last_success,
+						  dmach->cookie);
+		return is_success ? ret : DMA_ERROR;
+	}
+
+	if (mchan->paused && (ret == DMA_IN_PROGRESS)) {
+		unsigned long flags;
+		dma_cookie_t runcookie;
+
+		spin_lock_irqsave(&mchan->lock, flags);
+		if (mchan->running)
+			runcookie = mchan->running->desc.cookie;
+		else
+			runcookie = -EINVAL;
+
+		if (runcookie == cookie)
+			ret = DMA_PAUSED;
+
+		spin_unlock_irqrestore(&mchan->lock, flags);
+	}
+
+	return ret;
+}
+
+/*
+ * Submit descriptor to hardware.
+ * Lock the PM for each descriptor we are sending.
+ */
+static dma_cookie_t hidma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct hidma_chan *mchan = to_hidma_chan(txd->chan);
+	struct hidma_dev *dmadev = mchan->dmadev;
+	struct hidma_desc *mdesc;
+	unsigned long irqflags;
+	dma_cookie_t cookie;
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	if (!hidma_ll_isenabled(dmadev->lldev)) {
+		pm_runtime_mark_last_busy(dmadev->ddev.dev);
+		pm_runtime_put_autosuspend(dmadev->ddev.dev);
+		return -ENODEV;
+	}
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+	mdesc = container_of(txd, struct hidma_desc, desc);
+	spin_lock_irqsave(&mchan->lock, irqflags);
+
+	/* Move descriptor to queued */
+	list_move_tail(&mdesc->node, &mchan->queued);
+
+	/* Update cookie */
+	cookie = dma_cookie_assign(txd);
+
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return cookie;
+}
+
+static int hidma_alloc_chan_resources(struct dma_chan *dmach)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_dev *dmadev = mchan->dmadev;
+	struct hidma_desc *mdesc, *tmp;
+	unsigned long irqflags;
+	LIST_HEAD(descs);
+	unsigned int i;
+	int rc = 0;
+
+	if (mchan->allocated)
+		return 0;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < dmadev->nr_descriptors; i++) {
+		mdesc = kzalloc(sizeof(struct hidma_desc), GFP_NOWAIT);
+		if (!mdesc) {
+			rc = -ENOMEM;
+			break;
+		}
+		dma_async_tx_descriptor_init(&mdesc->desc, dmach);
+		mdesc->desc.tx_submit = hidma_tx_submit;
+
+		rc = hidma_ll_request(dmadev->lldev, mchan->dma_sig,
+				      "DMA engine", hidma_callback, mdesc,
+				      &mdesc->tre_ch);
+		if (rc) {
+			dev_err(dmach->device->dev,
+				"channel alloc failed at %u\n", i);
+			kfree(mdesc);
+			break;
+		}
+		list_add_tail(&mdesc->node, &descs);
+	}
+
+	if (rc) {
+		/* return the allocated descriptors */
+		list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+			hidma_ll_free(dmadev->lldev, mdesc->tre_ch);
+			kfree(mdesc);
+		}
+		return rc;
+	}
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_splice_tail_init(&descs, &mchan->free);
+	mchan->allocated = true;
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+	return 1;
+}
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_desc *mdesc = NULL;
+	struct hidma_dev *mdma = mchan->dmadev;
+	unsigned long irqflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	if (!mdesc)
+		return NULL;
+
+	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+				     src, dest, len, flags,
+				     HIDMA_TRE_MEMCPY);
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return &mdesc->desc;
+}
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value,
+		size_t len, unsigned long flags)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_desc *mdesc = NULL;
+	struct hidma_dev *mdma = mchan->dmadev;
+	unsigned long irqflags;
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	if (!mdesc)
+		return NULL;
+
+	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+				     value, dest, len, flags,
+				     HIDMA_TRE_MEMSET);
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return &mdesc->desc;
+}
+
+static int hidma_terminate_channel(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan = to_hidma_chan(chan);
+	struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device);
+	struct hidma_desc *tmp, *mdesc;
+	unsigned long irqflags;
+	LIST_HEAD(list);
+	int rc;
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	/* give completed requests a chance to finish */
+	hidma_process_completed(mchan);
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	mchan->last_success = 0;
+	list_splice_init(&mchan->active, &list);
+	list_splice_init(&mchan->prepared, &list);
+	list_splice_init(&mchan->completed, &list);
+	list_splice_init(&mchan->queued, &list);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	/* this suspends the existing transfer */
+	rc = hidma_ll_disable(dmadev->lldev);
+	if (rc) {
+		dev_err(dmadev->ddev.dev, "channel did not pause\n");
+		goto out;
+	}
+
+	/* return all user requests */
+	list_for_each_entry_safe(mdesc, tmp, &list, node) {
+		struct dma_async_tx_descriptor *txd = &mdesc->desc;
+
+		dma_descriptor_unmap(txd);
+		dmaengine_desc_get_callback_invoke(txd, NULL);
+		dma_run_dependencies(txd);
+
+		/* move myself to free_list */
+		list_move(&mdesc->node, &mchan->free);
+	}
+
+	rc = hidma_ll_enable(dmadev->lldev);
+out:
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	return rc;
+}
+
+static int hidma_terminate_all(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan = to_hidma_chan(chan);
+	struct hidma_dev *dmadev = to_hidma_dev(mchan->chan.device);
+	int rc;
+
+	rc = hidma_terminate_channel(chan);
+	if (rc)
+		return rc;
+
+	/* reinitialize the hardware */
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	rc = hidma_ll_setup(dmadev->lldev);
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	return rc;
+}
+
+static void hidma_free_chan_resources(struct dma_chan *dmach)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_dev *mdma = mchan->dmadev;
+	struct hidma_desc *mdesc, *tmp;
+	unsigned long irqflags;
+	LIST_HEAD(descs);
+
+	/* terminate running transactions and free descriptors */
+	hidma_terminate_channel(dmach);
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+
+	/* Move data */
+	list_splice_tail_init(&mchan->free, &descs);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+		hidma_ll_free(mdma->lldev, mdesc->tre_ch);
+		list_del(&mdesc->node);
+		kfree(mdesc);
+	}
+
+	mchan->allocated = 0;
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+}
+
+static int hidma_pause(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan;
+	struct hidma_dev *dmadev;
+
+	mchan = to_hidma_chan(chan);
+	dmadev = to_hidma_dev(mchan->chan.device);
+	if (!mchan->paused) {
+		pm_runtime_get_sync(dmadev->ddev.dev);
+		if (hidma_ll_disable(dmadev->lldev))
+			dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+		mchan->paused = true;
+		pm_runtime_mark_last_busy(dmadev->ddev.dev);
+		pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	}
+	return 0;
+}
+
+static int hidma_resume(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan;
+	struct hidma_dev *dmadev;
+	int rc = 0;
+
+	mchan = to_hidma_chan(chan);
+	dmadev = to_hidma_dev(mchan->chan.device);
+	if (mchan->paused) {
+		pm_runtime_get_sync(dmadev->ddev.dev);
+		rc = hidma_ll_enable(dmadev->lldev);
+		if (!rc)
+			mchan->paused = false;
+		else
+			dev_err(dmadev->ddev.dev,
+				"failed to resume the channel");
+		pm_runtime_mark_last_busy(dmadev->ddev.dev);
+		pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	}
+	return rc;
+}
+
+static irqreturn_t hidma_chirq_handler(int chirq, void *arg)
+{
+	struct hidma_lldev *lldev = arg;
+
+	/*
+	 * All interrupts are request driven.
+	 * HW doesn't send an interrupt by itself.
+	 */
+	return hidma_ll_inthandler(chirq, lldev);
+}
+
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static irqreturn_t hidma_chirq_handler_msi(int chirq, void *arg)
+{
+	struct hidma_lldev **lldevp = arg;
+	struct hidma_dev *dmadev = to_hidma_dev_from_lldev(lldevp);
+
+	return hidma_ll_inthandler_msi(chirq, *lldevp,
+				       1 << (chirq - dmadev->msi_virqbase));
+}
+#endif
+
+static ssize_t hidma_show_values(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct hidma_dev *mdev = dev_get_drvdata(dev);
+
+	buf[0] = 0;
+
+	if (strcmp(attr->attr.name, "chid") == 0)
+		sprintf(buf, "%d\n", mdev->chidx);
+
+	return strlen(buf);
+}
+
+static inline void  hidma_sysfs_uninit(struct hidma_dev *dev)
+{
+	device_remove_file(dev->ddev.dev, dev->chid_attrs);
+}
+
+static struct device_attribute*
+hidma_create_sysfs_entry(struct hidma_dev *dev, char *name, int mode)
+{
+	struct device_attribute *attrs;
+	char *name_copy;
+
+	attrs = devm_kmalloc(dev->ddev.dev, sizeof(struct device_attribute),
+			     GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	name_copy = devm_kstrdup(dev->ddev.dev, name, GFP_KERNEL);
+	if (!name_copy)
+		return NULL;
+
+	attrs->attr.name = name_copy;
+	attrs->attr.mode = mode;
+	attrs->show = hidma_show_values;
+	sysfs_attr_init(&attrs->attr);
+
+	return attrs;
+}
+
+static int hidma_sysfs_init(struct hidma_dev *dev)
+{
+	dev->chid_attrs = hidma_create_sysfs_entry(dev, "chid", S_IRUGO);
+	if (!dev->chid_attrs)
+		return -ENOMEM;
+
+	return device_create_file(dev->ddev.dev, dev->chid_attrs);
+}
+
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static void hidma_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	struct device *dev = msi_desc_to_dev(desc);
+	struct hidma_dev *dmadev = dev_get_drvdata(dev);
+
+	if (!desc->platform.msi_index) {
+		writel(msg->address_lo, dmadev->dev_evca + 0x118);
+		writel(msg->address_hi, dmadev->dev_evca + 0x11C);
+		writel(msg->data, dmadev->dev_evca + 0x120);
+	}
+}
+#endif
+
+static void hidma_free_msis(struct hidma_dev *dmadev)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+	struct device *dev = dmadev->ddev.dev;
+	struct msi_desc *desc;
+
+	/* free allocated MSI interrupts above */
+	for_each_msi_entry(desc, dev)
+		devm_free_irq(dev, desc->irq, &dmadev->lldev);
+
+	platform_msi_domain_free_irqs(dev);
+#endif
+}
+
+static int hidma_request_msi(struct hidma_dev *dmadev,
+			     struct platform_device *pdev)
+{
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+	int rc;
+	struct msi_desc *desc;
+	struct msi_desc *failed_desc = NULL;
+
+	rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS,
+					    hidma_write_msi_msg);
+	if (rc)
+		return rc;
+
+	for_each_msi_entry(desc, &pdev->dev) {
+		if (!desc->platform.msi_index)
+			dmadev->msi_virqbase = desc->irq;
+
+		rc = devm_request_irq(&pdev->dev, desc->irq,
+				       hidma_chirq_handler_msi,
+				       0, "qcom-hidma-msi",
+				       &dmadev->lldev);
+		if (rc) {
+			failed_desc = desc;
+			break;
+		}
+	}
+
+	if (rc) {
+		/* free allocated MSI interrupts above */
+		for_each_msi_entry(desc, &pdev->dev) {
+			if (desc == failed_desc)
+				break;
+			devm_free_irq(&pdev->dev, desc->irq,
+				      &dmadev->lldev);
+		}
+	} else {
+		/* Add callback to free MSIs on teardown */
+		hidma_ll_setup_irq(dmadev->lldev, true);
+
+	}
+	if (rc)
+		dev_warn(&pdev->dev,
+			 "failed to request MSI irq, falling back to wired IRQ\n");
+	return rc;
+#else
+	return -EINVAL;
+#endif
+}
+
+static bool hidma_test_capability(struct device *dev, enum hidma_cap test_cap)
+{
+	enum hidma_cap cap;
+
+	cap = (enum hidma_cap) device_get_match_data(dev);
+	return cap ? ((cap & test_cap) > 0) : 0;
+}
+
+static int hidma_probe(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev;
+	struct resource *trca_resource;
+	struct resource *evca_resource;
+	int chirq;
+	void __iomem *evca;
+	void __iomem *trca;
+	int rc;
+	bool msi;
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	trca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	trca = devm_ioremap_resource(&pdev->dev, trca_resource);
+	if (IS_ERR(trca)) {
+		rc = -ENOMEM;
+		goto bailout;
+	}
+
+	evca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	evca = devm_ioremap_resource(&pdev->dev, evca_resource);
+	if (IS_ERR(evca)) {
+		rc = -ENOMEM;
+		goto bailout;
+	}
+
+	/*
+	 * This driver only handles the channel IRQs.
+	 * Common IRQ is handled by the management driver.
+	 */
+	chirq = platform_get_irq(pdev, 0);
+	if (chirq < 0) {
+		rc = -ENODEV;
+		goto bailout;
+	}
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+	if (!dmadev) {
+		rc = -ENOMEM;
+		goto bailout;
+	}
+
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+	spin_lock_init(&dmadev->lock);
+	dmadev->ddev.dev = &pdev->dev;
+	pm_runtime_get_sync(dmadev->ddev.dev);
+
+	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
+	dma_cap_set(DMA_MEMSET, dmadev->ddev.cap_mask);
+	if (WARN_ON(!pdev->dev.dma_mask)) {
+		rc = -ENXIO;
+		goto dmafree;
+	}
+
+	dmadev->dev_evca = evca;
+	dmadev->evca_resource = evca_resource;
+	dmadev->dev_trca = trca;
+	dmadev->trca_resource = trca_resource;
+	dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy;
+	dmadev->ddev.device_prep_dma_memset = hidma_prep_dma_memset;
+	dmadev->ddev.device_alloc_chan_resources = hidma_alloc_chan_resources;
+	dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources;
+	dmadev->ddev.device_tx_status = hidma_tx_status;
+	dmadev->ddev.device_issue_pending = hidma_issue_pending;
+	dmadev->ddev.device_pause = hidma_pause;
+	dmadev->ddev.device_resume = hidma_resume;
+	dmadev->ddev.device_terminate_all = hidma_terminate_all;
+	dmadev->ddev.copy_align = 8;
+
+	/*
+	 * Determine the MSI capability of the platform. Old HW doesn't
+	 * support MSI.
+	 */
+	msi = hidma_test_capability(&pdev->dev, HIDMA_MSI_CAP);
+	device_property_read_u32(&pdev->dev, "desc-count",
+				 &dmadev->nr_descriptors);
+
+	if (nr_desc_prm) {
+		dev_info(&pdev->dev, "overriding number of descriptors as %d\n",
+			 nr_desc_prm);
+		dmadev->nr_descriptors = nr_desc_prm;
+	}
+
+	if (!dmadev->nr_descriptors)
+		dmadev->nr_descriptors = HIDMA_NR_DEFAULT_DESC;
+
+	if (hidma_test_capability(&pdev->dev, HIDMA_IDENTITY_CAP))
+		dmadev->chidx = readl(dmadev->dev_trca + 0x40);
+	else
+		dmadev->chidx = readl(dmadev->dev_trca + 0x28);
+
+	/* Set DMA mask to 64 bits. */
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (rc)
+			goto dmafree;
+	}
+
+	dmadev->lldev = hidma_ll_init(dmadev->ddev.dev,
+				      dmadev->nr_descriptors, dmadev->dev_trca,
+				      dmadev->dev_evca, dmadev->chidx);
+	if (!dmadev->lldev) {
+		rc = -EPROBE_DEFER;
+		goto dmafree;
+	}
+
+	platform_set_drvdata(pdev, dmadev);
+	if (msi)
+		rc = hidma_request_msi(dmadev, pdev);
+
+	if (!msi || rc) {
+		hidma_ll_setup_irq(dmadev->lldev, false);
+		rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler,
+				      0, "qcom-hidma", dmadev->lldev);
+		if (rc)
+			goto uninit;
+	}
+
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+	rc = hidma_chan_init(dmadev, 0);
+	if (rc)
+		goto uninit;
+
+	rc = dma_async_device_register(&dmadev->ddev);
+	if (rc)
+		goto uninit;
+
+	dmadev->irq = chirq;
+	tasklet_init(&dmadev->task, hidma_issue_task, (unsigned long)dmadev);
+	hidma_debug_init(dmadev);
+	hidma_sysfs_init(dmadev);
+	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	return 0;
+
+uninit:
+	if (msi)
+		hidma_free_msis(dmadev);
+
+	hidma_debug_uninit(dmadev);
+	hidma_ll_uninit(dmadev->lldev);
+dmafree:
+	if (dmadev)
+		hidma_free(dmadev);
+bailout:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return rc;
+}
+
+static void hidma_shutdown(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+	dev_info(dmadev->ddev.dev, "HI-DMA engine shutdown\n");
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	if (hidma_ll_disable(dmadev->lldev))
+		dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+
+}
+
+static int hidma_remove(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	dma_async_device_unregister(&dmadev->ddev);
+	if (!dmadev->lldev->msi_support)
+		devm_free_irq(dmadev->ddev.dev, dmadev->irq, dmadev->lldev);
+	else
+		hidma_free_msis(dmadev);
+
+	tasklet_kill(&dmadev->task);
+	hidma_sysfs_uninit(dmadev);
+	hidma_debug_uninit(dmadev);
+	hidma_ll_uninit(dmadev->lldev);
+	hidma_free(dmadev);
+
+	dev_info(&pdev->dev, "HI-DMA engine removed\n");
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hidma_acpi_ids[] = {
+	{"QCOM8061"},
+	{"QCOM8062", HIDMA_MSI_CAP},
+	{"QCOM8063", (HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP)},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hidma_acpi_ids);
+#endif
+
+static const struct of_device_id hidma_match[] = {
+	{.compatible = "qcom,hidma-1.0",},
+	{.compatible = "qcom,hidma-1.1", .data = (void *)(HIDMA_MSI_CAP),},
+	{.compatible = "qcom,hidma-1.2",
+	 .data = (void *)(HIDMA_MSI_CAP | HIDMA_IDENTITY_CAP),},
+	{},
+};
+MODULE_DEVICE_TABLE(of, hidma_match);
+
+static struct platform_driver hidma_driver = {
+	.probe = hidma_probe,
+	.remove = hidma_remove,
+	.shutdown = hidma_shutdown,
+	.driver = {
+		   .name = "hidma",
+		   .of_match_table = hidma_match,
+		   .acpi_match_table = ACPI_PTR(hidma_acpi_ids),
+	},
+};
+
+module_platform_driver(hidma_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma.h b/drivers/dma/qcom/hidma.h
new file mode 100644
index 0000000..5f9966e
--- /dev/null
+++ b/drivers/dma/qcom/hidma.h
@@ -0,0 +1,171 @@
+/*
+ * Qualcomm Technologies HIDMA data structures
+ *
+ * Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef QCOM_HIDMA_H
+#define QCOM_HIDMA_H
+
+#include <linux/kfifo.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+
+#define HIDMA_TRE_SIZE			32 /* each TRE is 32 bytes  */
+#define HIDMA_TRE_CFG_IDX		0
+#define HIDMA_TRE_LEN_IDX		1
+#define HIDMA_TRE_SRC_LOW_IDX		2
+#define HIDMA_TRE_SRC_HI_IDX		3
+#define HIDMA_TRE_DEST_LOW_IDX		4
+#define HIDMA_TRE_DEST_HI_IDX		5
+
+enum tre_type {
+	HIDMA_TRE_MEMCPY = 3,
+	HIDMA_TRE_MEMSET = 4,
+};
+
+struct hidma_tre {
+	atomic_t allocated;		/* if this channel is allocated	    */
+	bool queued;			/* flag whether this is pending     */
+	u16 status;			/* status			    */
+	u32 idx;			/* index of the tre		    */
+	u32 dma_sig;			/* signature of the tre		    */
+	const char *dev_name;		/* name of the device		    */
+	void (*callback)(void *data);	/* requester callback		    */
+	void *data;			/* Data associated with this channel*/
+	struct hidma_lldev *lldev;	/* lldma device pointer		    */
+	u32 tre_local[HIDMA_TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy  */
+	u32 tre_index;			/* the offset where this was written*/
+	u32 int_flags;			/* interrupt flags		    */
+	u8 err_info;			/* error record in this transfer    */
+	u8 err_code;			/* completion code		    */
+};
+
+struct hidma_lldev {
+	bool msi_support;		/* flag indicating MSI support    */
+	bool initialized;		/* initialized flag               */
+	u8 trch_state;			/* trch_state of the device	  */
+	u8 evch_state;			/* evch_state of the device	  */
+	u8 chidx;			/* channel index in the core	  */
+	u32 nr_tres;			/* max number of configs          */
+	spinlock_t lock;		/* reentrancy                     */
+	struct hidma_tre *trepool;	/* trepool of user configs */
+	struct device *dev;		/* device			  */
+	void __iomem *trca;		/* Transfer Channel address       */
+	void __iomem *evca;		/* Event Channel address          */
+	struct hidma_tre
+		**pending_tre_list;	/* Pointers to pending TREs	  */
+	atomic_t pending_tre_count;	/* Number of TREs pending	  */
+
+	void *tre_ring;			/* TRE ring			  */
+	dma_addr_t tre_dma;		/* TRE ring to be shared with HW  */
+	u32 tre_ring_size;		/* Byte size of the ring	  */
+	u32 tre_processed_off;		/* last processed TRE		  */
+
+	void *evre_ring;		/* EVRE ring			   */
+	dma_addr_t evre_dma;		/* EVRE ring to be shared with HW  */
+	u32 evre_ring_size;		/* Byte size of the ring	   */
+	u32 evre_processed_off;		/* last processed EVRE		   */
+
+	u32 tre_write_offset;           /* TRE write location              */
+	struct tasklet_struct task;	/* task delivering notifications   */
+	DECLARE_KFIFO_PTR(handoff_fifo,
+		struct hidma_tre *);    /* pending TREs FIFO               */
+};
+
+struct hidma_desc {
+	struct dma_async_tx_descriptor	desc;
+	/* link list node for this channel*/
+	struct list_head		node;
+	u32				tre_ch;
+};
+
+struct hidma_chan {
+	bool				paused;
+	bool				allocated;
+	char				dbg_name[16];
+	u32				dma_sig;
+	dma_cookie_t			last_success;
+
+	/*
+	 * active descriptor on this channel
+	 * It is used by the DMA complete notification to
+	 * locate the descriptor that initiated the transfer.
+	 */
+	struct dentry			*debugfs;
+	struct dentry			*stats;
+	struct hidma_dev		*dmadev;
+	struct hidma_desc		*running;
+
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+};
+
+struct hidma_dev {
+	int				irq;
+	int				chidx;
+	u32				nr_descriptors;
+	int				msi_virqbase;
+
+	struct hidma_lldev		*lldev;
+	void				__iomem *dev_trca;
+	struct resource			*trca_resource;
+	void				__iomem *dev_evca;
+	struct resource			*evca_resource;
+
+	/* used to protect the pending channel list*/
+	spinlock_t			lock;
+	struct dma_device		ddev;
+
+	struct dentry			*debugfs;
+	struct dentry			*stats;
+
+	/* sysfs entry for the channel id */
+	struct device_attribute		*chid_attrs;
+
+	/* Task delivering issue_pending */
+	struct tasklet_struct		task;
+};
+
+int hidma_ll_request(struct hidma_lldev *llhndl, u32 dev_id,
+			const char *dev_name,
+			void (*callback)(void *data), void *data, u32 *tre_ch);
+
+void hidma_ll_free(struct hidma_lldev *llhndl, u32 tre_ch);
+enum dma_status hidma_ll_status(struct hidma_lldev *llhndl, u32 tre_ch);
+bool hidma_ll_isenabled(struct hidma_lldev *llhndl);
+void hidma_ll_queue_request(struct hidma_lldev *llhndl, u32 tre_ch);
+void hidma_ll_start(struct hidma_lldev *llhndl);
+int hidma_ll_disable(struct hidma_lldev *lldev);
+int hidma_ll_enable(struct hidma_lldev *llhndl);
+void hidma_ll_set_transfer_params(struct hidma_lldev *llhndl, u32 tre_ch,
+	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags, u32 txntype);
+void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi);
+int hidma_ll_setup(struct hidma_lldev *lldev);
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 max_channels,
+			void __iomem *trca, void __iomem *evca,
+			u8 chidx);
+int hidma_ll_uninit(struct hidma_lldev *llhndl);
+irqreturn_t hidma_ll_inthandler(int irq, void *arg);
+irqreturn_t hidma_ll_inthandler_msi(int irq, void *arg, int cause);
+void hidma_cleanup_pending_tre(struct hidma_lldev *llhndl, u8 err_info,
+				u8 err_code);
+int hidma_debug_init(struct hidma_dev *dmadev);
+void hidma_debug_uninit(struct hidma_dev *dmadev);
+#endif
diff --git a/drivers/dma/qcom/hidma_dbg.c b/drivers/dma/qcom/hidma_dbg.c
new file mode 100644
index 0000000..3bdcb80
--- /dev/null
+++ b/drivers/dma/qcom/hidma_dbg.c
@@ -0,0 +1,217 @@
+/*
+ * Qualcomm Technologies HIDMA debug file
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/list.h>
+#include <linux/pm_runtime.h>
+
+#include "hidma.h"
+
+static void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+	u32 length;
+	dma_addr_t src_start;
+	dma_addr_t dest_start;
+	u32 *tre_local;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in chstats:%d", tre_ch);
+		return;
+	}
+	tre = &lldev->trepool[tre_ch];
+	seq_printf(s, "------Channel %d -----\n", tre_ch);
+	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
+	seq_printf(s, "queued = 0x%x\n", tre->queued);
+	seq_printf(s, "err_info = 0x%x\n", tre->err_info);
+	seq_printf(s, "err_code = 0x%x\n", tre->err_code);
+	seq_printf(s, "status = 0x%x\n", tre->status);
+	seq_printf(s, "idx = 0x%x\n", tre->idx);
+	seq_printf(s, "dma_sig = 0x%x\n", tre->dma_sig);
+	seq_printf(s, "dev_name=%s\n", tre->dev_name);
+	seq_printf(s, "callback=%p\n", tre->callback);
+	seq_printf(s, "data=%p\n", tre->data);
+	seq_printf(s, "tre_index = 0x%x\n", tre->tre_index);
+
+	tre_local = &tre->tre_local[0];
+	src_start = tre_local[HIDMA_TRE_SRC_LOW_IDX];
+	src_start = ((u64) (tre_local[HIDMA_TRE_SRC_HI_IDX]) << 32) + src_start;
+	dest_start = tre_local[HIDMA_TRE_DEST_LOW_IDX];
+	dest_start += ((u64) (tre_local[HIDMA_TRE_DEST_HI_IDX]) << 32);
+	length = tre_local[HIDMA_TRE_LEN_IDX];
+
+	seq_printf(s, "src=%pap\n", &src_start);
+	seq_printf(s, "dest=%pap\n", &dest_start);
+	seq_printf(s, "length = 0x%x\n", length);
+}
+
+static void hidma_ll_devstats(struct seq_file *s, void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+
+	seq_puts(s, "------Device -----\n");
+	seq_printf(s, "lldev init = 0x%x\n", lldev->initialized);
+	seq_printf(s, "trch_state = 0x%x\n", lldev->trch_state);
+	seq_printf(s, "evch_state = 0x%x\n", lldev->evch_state);
+	seq_printf(s, "chidx = 0x%x\n", lldev->chidx);
+	seq_printf(s, "nr_tres = 0x%x\n", lldev->nr_tres);
+	seq_printf(s, "trca=%p\n", lldev->trca);
+	seq_printf(s, "tre_ring=%p\n", lldev->tre_ring);
+	seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_dma);
+	seq_printf(s, "tre_ring_size = 0x%x\n", lldev->tre_ring_size);
+	seq_printf(s, "tre_processed_off = 0x%x\n", lldev->tre_processed_off);
+	seq_printf(s, "pending_tre_count=%d\n",
+			atomic_read(&lldev->pending_tre_count));
+	seq_printf(s, "evca=%p\n", lldev->evca);
+	seq_printf(s, "evre_ring=%p\n", lldev->evre_ring);
+	seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_dma);
+	seq_printf(s, "evre_ring_size = 0x%x\n", lldev->evre_ring_size);
+	seq_printf(s, "evre_processed_off = 0x%x\n", lldev->evre_processed_off);
+	seq_printf(s, "tre_write_offset = 0x%x\n", lldev->tre_write_offset);
+}
+
+/*
+ * hidma_chan_stats: display HIDMA channel statistics
+ *
+ * Display the statistics for the current HIDMA virtual channel device.
+ */
+static int hidma_chan_stats(struct seq_file *s, void *unused)
+{
+	struct hidma_chan *mchan = s->private;
+	struct hidma_desc *mdesc;
+	struct hidma_dev *dmadev = mchan->dmadev;
+
+	pm_runtime_get_sync(dmadev->ddev.dev);
+	seq_printf(s, "paused=%u\n", mchan->paused);
+	seq_printf(s, "dma_sig=%u\n", mchan->dma_sig);
+	seq_puts(s, "prepared\n");
+	list_for_each_entry(mdesc, &mchan->prepared, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	seq_puts(s, "active\n");
+	list_for_each_entry(mdesc, &mchan->active, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	seq_puts(s, "completed\n");
+	list_for_each_entry(mdesc, &mchan->completed, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	hidma_ll_devstats(s, mchan->dmadev->lldev);
+	pm_runtime_mark_last_busy(dmadev->ddev.dev);
+	pm_runtime_put_autosuspend(dmadev->ddev.dev);
+	return 0;
+}
+
+/*
+ * hidma_dma_info: display HIDMA device info
+ *
+ * Display the info for the current HIDMA device.
+ */
+static int hidma_dma_info(struct seq_file *s, void *unused)
+{
+	struct hidma_dev *dmadev = s->private;
+	resource_size_t sz;
+
+	seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
+	seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
+	seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->trca_resource->start);
+	sz = resource_size(dmadev->trca_resource);
+	seq_printf(s, "dev_trca_size=%pa\n", &sz);
+	seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
+	seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->evca_resource->start);
+	sz = resource_size(dmadev->evca_resource);
+	seq_printf(s, "dev_evca_size=%pa\n", &sz);
+	return 0;
+}
+
+static int hidma_chan_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_chan_stats, inode->i_private);
+}
+
+static int hidma_dma_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_dma_info, inode->i_private);
+}
+
+static const struct file_operations hidma_chan_fops = {
+	.open = hidma_chan_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static const struct file_operations hidma_dma_fops = {
+	.open = hidma_dma_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void hidma_debug_uninit(struct hidma_dev *dmadev)
+{
+	debugfs_remove_recursive(dmadev->debugfs);
+}
+
+int hidma_debug_init(struct hidma_dev *dmadev)
+{
+	int rc = 0;
+	int chidx = 0;
+	struct list_head *position = NULL;
+
+	dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL);
+	if (!dmadev->debugfs) {
+		rc = -ENODEV;
+		return rc;
+	}
+
+	/* walk through the virtual channel list */
+	list_for_each(position, &dmadev->ddev.channels) {
+		struct hidma_chan *chan;
+
+		chan = list_entry(position, struct hidma_chan,
+				  chan.device_node);
+		sprintf(chan->dbg_name, "chan%d", chidx);
+		chan->debugfs = debugfs_create_dir(chan->dbg_name,
+						   dmadev->debugfs);
+		if (!chan->debugfs) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chan->stats = debugfs_create_file("stats", S_IRUGO,
+						  chan->debugfs, chan,
+						  &hidma_chan_fops);
+		if (!chan->stats) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chidx++;
+	}
+
+	dmadev->stats = debugfs_create_file("stats", S_IRUGO,
+					    dmadev->debugfs, dmadev,
+					    &hidma_dma_fops);
+	if (!dmadev->stats) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+cleanup:
+	hidma_debug_uninit(dmadev);
+	return rc;
+}
diff --git a/drivers/dma/qcom/hidma_ll.c b/drivers/dma/qcom/hidma_ll.c
new file mode 100644
index 0000000..7c6e2ff
--- /dev/null
+++ b/drivers/dma/qcom/hidma_ll.c
@@ -0,0 +1,865 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine low level code
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/atomic.h>
+#include <linux/iopoll.h>
+#include <linux/kfifo.h>
+#include <linux/bitops.h>
+
+#include "hidma.h"
+
+#define HIDMA_EVRE_SIZE			16	/* each EVRE is 16 bytes */
+
+#define HIDMA_TRCA_CTRLSTS_REG			0x000
+#define HIDMA_TRCA_RING_LOW_REG		0x008
+#define HIDMA_TRCA_RING_HIGH_REG		0x00C
+#define HIDMA_TRCA_RING_LEN_REG		0x010
+#define HIDMA_TRCA_DOORBELL_REG		0x400
+
+#define HIDMA_EVCA_CTRLSTS_REG			0x000
+#define HIDMA_EVCA_INTCTRL_REG			0x004
+#define HIDMA_EVCA_RING_LOW_REG		0x008
+#define HIDMA_EVCA_RING_HIGH_REG		0x00C
+#define HIDMA_EVCA_RING_LEN_REG		0x010
+#define HIDMA_EVCA_WRITE_PTR_REG		0x020
+#define HIDMA_EVCA_DOORBELL_REG		0x400
+
+#define HIDMA_EVCA_IRQ_STAT_REG		0x100
+#define HIDMA_EVCA_IRQ_CLR_REG			0x108
+#define HIDMA_EVCA_IRQ_EN_REG			0x110
+
+#define HIDMA_EVRE_CFG_IDX			0
+
+#define HIDMA_EVRE_ERRINFO_BIT_POS		24
+#define HIDMA_EVRE_CODE_BIT_POS		28
+
+#define HIDMA_EVRE_ERRINFO_MASK		GENMASK(3, 0)
+#define HIDMA_EVRE_CODE_MASK			GENMASK(3, 0)
+
+#define HIDMA_CH_CONTROL_MASK			GENMASK(7, 0)
+#define HIDMA_CH_STATE_MASK			GENMASK(7, 0)
+#define HIDMA_CH_STATE_BIT_POS			0x8
+
+#define HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS	0
+#define HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS	1
+#define HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS	9
+#define HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS	10
+#define HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS	11
+#define HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS	14
+
+#define ENABLE_IRQS (BIT(HIDMA_IRQ_EV_CH_EOB_IRQ_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS)	| \
+		     BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS))
+
+#define HIDMA_INCREMENT_ITERATOR(iter, size, ring_size)	\
+do {								\
+	iter += size;						\
+	if (iter >= ring_size)					\
+		iter -= ring_size;				\
+} while (0)
+
+#define HIDMA_CH_STATE(val)	\
+	((val >> HIDMA_CH_STATE_BIT_POS) & HIDMA_CH_STATE_MASK)
+
+#define HIDMA_ERR_INT_MASK				\
+	(BIT(HIDMA_IRQ_TR_CH_INVALID_TRE_BIT_POS)   |	\
+	 BIT(HIDMA_IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) |	\
+	 BIT(HIDMA_IRQ_EV_CH_WR_RESP_BIT_POS)	    |	\
+	 BIT(HIDMA_IRQ_TR_CH_DATA_RD_ER_BIT_POS)    |	\
+	 BIT(HIDMA_IRQ_TR_CH_DATA_WR_ER_BIT_POS))
+
+enum ch_command {
+	HIDMA_CH_DISABLE = 0,
+	HIDMA_CH_ENABLE = 1,
+	HIDMA_CH_SUSPEND = 2,
+	HIDMA_CH_RESET = 9,
+};
+
+enum ch_state {
+	HIDMA_CH_DISABLED = 0,
+	HIDMA_CH_ENABLED = 1,
+	HIDMA_CH_RUNNING = 2,
+	HIDMA_CH_SUSPENDED = 3,
+	HIDMA_CH_STOPPED = 4,
+};
+
+enum err_code {
+	HIDMA_EVRE_STATUS_COMPLETE = 1,
+	HIDMA_EVRE_STATUS_ERROR = 4,
+};
+
+static int hidma_is_chan_enabled(int state)
+{
+	switch (state) {
+	case HIDMA_CH_ENABLED:
+	case HIDMA_CH_RUNNING:
+		return true;
+	default:
+		return false;
+	}
+}
+
+void hidma_ll_free(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev, "trying to free an unused TRE:%d", tre_ch);
+		return;
+	}
+
+	atomic_set(&tre->allocated, 0);
+}
+
+int hidma_ll_request(struct hidma_lldev *lldev, u32 sig, const char *dev_name,
+		     void (*callback)(void *data), void *data, u32 *tre_ch)
+{
+	unsigned int i;
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (!tre_ch || !lldev)
+		return -EINVAL;
+
+	/* need to have at least one empty spot in the queue */
+	for (i = 0; i < lldev->nr_tres - 1; i++) {
+		if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
+			break;
+	}
+
+	if (i == (lldev->nr_tres - 1))
+		return -ENOMEM;
+
+	tre = &lldev->trepool[i];
+	tre->dma_sig = sig;
+	tre->dev_name = dev_name;
+	tre->callback = callback;
+	tre->data = data;
+	tre->idx = i;
+	tre->status = 0;
+	tre->queued = 0;
+	tre->err_code = 0;
+	tre->err_info = 0;
+	tre->lldev = lldev;
+	tre_local = &tre->tre_local[0];
+	tre_local[HIDMA_TRE_CFG_IDX] = (lldev->chidx & 0xFF) << 8;
+	tre_local[HIDMA_TRE_CFG_IDX] |= BIT(16);	/* set IEOB */
+	*tre_ch = i;
+	if (callback)
+		callback(data);
+	return 0;
+}
+
+/*
+ * Multiple TREs may be queued and waiting in the pending queue.
+ */
+static void hidma_ll_tre_complete(unsigned long arg)
+{
+	struct hidma_lldev *lldev = (struct hidma_lldev *)arg;
+	struct hidma_tre *tre;
+
+	while (kfifo_out(&lldev->handoff_fifo, &tre, 1)) {
+		/* call the user if it has been read by the hardware */
+		if (tre->callback)
+			tre->callback(tre->data);
+	}
+}
+
+static int hidma_post_completed(struct hidma_lldev *lldev, u8 err_info,
+				u8 err_code)
+{
+	struct hidma_tre *tre;
+	unsigned long flags;
+	u32 tre_iterator;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+
+	tre_iterator = lldev->tre_processed_off;
+	tre = lldev->pending_tre_list[tre_iterator / HIDMA_TRE_SIZE];
+	if (!tre) {
+		spin_unlock_irqrestore(&lldev->lock, flags);
+		dev_warn(lldev->dev, "tre_index [%d] and tre out of sync\n",
+			 tre_iterator / HIDMA_TRE_SIZE);
+		return -EINVAL;
+	}
+	lldev->pending_tre_list[tre->tre_index] = NULL;
+
+	/*
+	 * Keep track of pending TREs that SW is expecting to receive
+	 * from HW. We got one now. Decrement our counter.
+	 */
+	if (atomic_dec_return(&lldev->pending_tre_count) < 0) {
+		dev_warn(lldev->dev, "tre count mismatch on completion");
+		atomic_set(&lldev->pending_tre_count, 0);
+	}
+
+	HIDMA_INCREMENT_ITERATOR(tre_iterator, HIDMA_TRE_SIZE,
+				 lldev->tre_ring_size);
+	lldev->tre_processed_off = tre_iterator;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	tre->err_info = err_info;
+	tre->err_code = err_code;
+	tre->queued = 0;
+
+	kfifo_put(&lldev->handoff_fifo, tre);
+	tasklet_schedule(&lldev->task);
+
+	return 0;
+}
+
+/*
+ * Called to handle the interrupt for the channel.
+ * Return a positive number if TRE or EVRE were consumed on this run.
+ * Return a positive number if there are pending TREs or EVREs.
+ * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
+ */
+static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
+{
+	u32 evre_ring_size = lldev->evre_ring_size;
+	u32 err_info, err_code, evre_write_off;
+	u32 evre_iterator;
+	u32 num_completed = 0;
+
+	evre_write_off = readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+	evre_iterator = lldev->evre_processed_off;
+
+	if ((evre_write_off > evre_ring_size) ||
+	    (evre_write_off % HIDMA_EVRE_SIZE)) {
+		dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
+		return 0;
+	}
+
+	/*
+	 * By the time control reaches here the number of EVREs and TREs
+	 * may not match. Only consume the ones that hardware told us.
+	 */
+	while ((evre_iterator != evre_write_off)) {
+		u32 *current_evre = lldev->evre_ring + evre_iterator;
+		u32 cfg;
+
+		cfg = current_evre[HIDMA_EVRE_CFG_IDX];
+		err_info = cfg >> HIDMA_EVRE_ERRINFO_BIT_POS;
+		err_info &= HIDMA_EVRE_ERRINFO_MASK;
+		err_code =
+		    (cfg >> HIDMA_EVRE_CODE_BIT_POS) & HIDMA_EVRE_CODE_MASK;
+
+		if (hidma_post_completed(lldev, err_info, err_code))
+			break;
+
+		HIDMA_INCREMENT_ITERATOR(evre_iterator, HIDMA_EVRE_SIZE,
+					 evre_ring_size);
+
+		/*
+		 * Read the new event descriptor written by the HW.
+		 * As we are processing the delivered events, other events
+		 * get queued to the SW for processing.
+		 */
+		evre_write_off =
+		    readl_relaxed(lldev->evca + HIDMA_EVCA_WRITE_PTR_REG);
+		num_completed++;
+
+		/*
+		 * An error interrupt might have arrived while we are processing
+		 * the completed interrupt.
+		 */
+		if (!hidma_ll_isenabled(lldev))
+			break;
+	}
+
+	if (num_completed) {
+		u32 evre_read_off = (lldev->evre_processed_off +
+				     HIDMA_EVRE_SIZE * num_completed);
+		evre_read_off = evre_read_off % evre_ring_size;
+		writel(evre_read_off, lldev->evca + HIDMA_EVCA_DOORBELL_REG);
+
+		/* record the last processed tre offset */
+		lldev->evre_processed_off = evre_read_off;
+	}
+
+	return num_completed;
+}
+
+void hidma_cleanup_pending_tre(struct hidma_lldev *lldev, u8 err_info,
+			       u8 err_code)
+{
+	while (atomic_read(&lldev->pending_tre_count)) {
+		if (hidma_post_completed(lldev, err_info, err_code))
+			break;
+	}
+}
+
+static int hidma_ll_reset(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_RESET << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not reset\n");
+		return ret;
+	}
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_RESET << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	/*
+	 * Delay 10ms after reset to allow DMA logic to quiesce.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_DISABLED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = HIDMA_CH_DISABLED;
+	lldev->evch_state = HIDMA_CH_DISABLED;
+	return 0;
+}
+
+/*
+ * The interrupt handler for HIDMA will try to consume as many pending
+ * EVRE from the event queue as possible. Each EVRE has an associated
+ * TRE that holds the user interface parameters. EVRE reports the
+ * result of the transaction. Hardware guarantees ordering between EVREs
+ * and TREs. We use last processed offset to figure out which TRE is
+ * associated with which EVRE. If two TREs are consumed by HW, the EVREs
+ * are in order in the event ring.
+ *
+ * This handler will do a one pass for consuming EVREs. Other EVREs may
+ * be delivered while we are working. It will try to consume incoming
+ * EVREs one more time and return.
+ *
+ * For unprocessed EVREs, hardware will trigger another interrupt until
+ * all the interrupt bits are cleared.
+ *
+ * Hardware guarantees that by the time interrupt is observed, all data
+ * transactions in flight are delivered to their respective places and
+ * are visible to the CPU.
+ *
+ * On demand paging for IOMMU is only supported for PCIe via PRI
+ * (Page Request Interface) not for HIDMA. All other hardware instances
+ * including HIDMA work on pinned DMA addresses.
+ *
+ * HIDMA is not aware of IOMMU presence since it follows the DMA API. All
+ * IOMMU latency will be built into the data movement time. By the time
+ * interrupt happens, IOMMU lookups + data movement has already taken place.
+ *
+ * While the first read in a typical PCI endpoint ISR flushes all outstanding
+ * requests traditionally to the destination, this concept does not apply
+ * here for this HW.
+ */
+static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev, int cause)
+{
+	unsigned long irqflags;
+
+	if (cause & HIDMA_ERR_INT_MASK) {
+		dev_err(lldev->dev, "error 0x%x, disabling...\n",
+				cause);
+
+		/* Clear out pending interrupts */
+		writel(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+		/* No further submissions. */
+		hidma_ll_disable(lldev);
+
+		/* Driver completes the txn and intimates the client.*/
+		hidma_cleanup_pending_tre(lldev, 0xFF,
+					  HIDMA_EVRE_STATUS_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&lldev->lock, irqflags);
+	writel_relaxed(cause, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	spin_unlock_irqrestore(&lldev->lock, irqflags);
+
+	/*
+	 * Fine tuned for this HW...
+	 *
+	 * This ISR has been designed for this particular hardware. Relaxed
+	 * read and write accessors are used for performance reasons due to
+	 * interrupt delivery guarantees. Do not copy this code blindly and
+	 * expect that to work.
+	 *
+	 * Try to consume as many EVREs as possible.
+	 */
+	hidma_handle_tre_completion(lldev);
+}
+
+irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
+{
+	struct hidma_lldev *lldev = arg;
+	u32 status;
+	u32 enable;
+	u32 cause;
+
+	status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	cause = status & enable;
+
+	while (cause) {
+		hidma_ll_int_handler_internal(lldev, cause);
+
+		/*
+		 * Another interrupt might have arrived while we are
+		 * processing this one. Read the new cause.
+		 */
+		status = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+		enable = readl_relaxed(lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+		cause = status & enable;
+	}
+
+	return IRQ_HANDLED;
+}
+
+irqreturn_t hidma_ll_inthandler_msi(int chirq, void *arg, int cause)
+{
+	struct hidma_lldev *lldev = arg;
+
+	hidma_ll_int_handler_internal(lldev, cause);
+	return IRQ_HANDLED;
+}
+
+int hidma_ll_enable(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_ENABLE << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "event channel did not get enabled\n");
+		return ret;
+	}
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_ENABLE << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 hidma_is_chan_enabled(HIDMA_CH_STATE(val)),
+				 1000, 10000);
+	if (ret) {
+		dev_err(lldev->dev, "transfer channel did not get enabled\n");
+		return ret;
+	}
+
+	lldev->trch_state = HIDMA_CH_ENABLED;
+	lldev->evch_state = HIDMA_CH_ENABLED;
+
+	/* enable irqs */
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	return 0;
+}
+
+void hidma_ll_start(struct hidma_lldev *lldev)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&lldev->lock, irqflags);
+	writel(lldev->tre_write_offset, lldev->trca + HIDMA_TRCA_DOORBELL_REG);
+	spin_unlock_irqrestore(&lldev->lock, irqflags);
+}
+
+bool hidma_ll_isenabled(struct hidma_lldev *lldev)
+{
+	u32 val;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	lldev->trch_state = HIDMA_CH_STATE(val);
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	lldev->evch_state = HIDMA_CH_STATE(val);
+
+	/* both channels have to be enabled before calling this function */
+	if (hidma_is_chan_enabled(lldev->trch_state) &&
+	    hidma_is_chan_enabled(lldev->evch_state))
+		return true;
+
+	return false;
+}
+
+void hidma_ll_queue_request(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	struct hidma_tre *tre;
+	unsigned long flags;
+
+	tre = &lldev->trepool[tre_ch];
+
+	/* copy the TRE into its location in the TRE ring */
+	spin_lock_irqsave(&lldev->lock, flags);
+	tre->tre_index = lldev->tre_write_offset / HIDMA_TRE_SIZE;
+	lldev->pending_tre_list[tre->tre_index] = tre;
+	memcpy(lldev->tre_ring + lldev->tre_write_offset,
+			&tre->tre_local[0], HIDMA_TRE_SIZE);
+	tre->err_code = 0;
+	tre->err_info = 0;
+	tre->queued = 1;
+	atomic_inc(&lldev->pending_tre_count);
+	lldev->tre_write_offset = (lldev->tre_write_offset + HIDMA_TRE_SIZE)
+					% lldev->tre_ring_size;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+}
+
+/*
+ * Note that even though we stop this channel if there is a pending transaction
+ * in flight it will complete and follow the callback. This request will
+ * prevent further requests to be made.
+ */
+int hidma_ll_disable(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int ret;
+
+	/* The channel needs to be in working state */
+	if (!hidma_ll_isenabled(lldev))
+		return 0;
+
+	val = readl(lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_SUSPEND << 16;
+	writel(val, lldev->trca + HIDMA_TRCA_CTRLSTS_REG);
+
+	/*
+	 * Start the wait right after the suspend is confirmed.
+	 * Do a polled read up to 1ms and 10ms maximum.
+	 */
+	ret = readl_poll_timeout(lldev->trca + HIDMA_TRCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	val = readl(lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+	val &= ~(HIDMA_CH_CONTROL_MASK << 16);
+	val |= HIDMA_CH_SUSPEND << 16;
+	writel(val, lldev->evca + HIDMA_EVCA_CTRLSTS_REG);
+
+	/*
+	 * Start the wait right after the suspend is confirmed
+	 * Delay up to 10ms after reset to allow DMA logic to quiesce.
+	 */
+	ret = readl_poll_timeout(lldev->evca + HIDMA_EVCA_CTRLSTS_REG, val,
+				 HIDMA_CH_STATE(val) == HIDMA_CH_SUSPENDED,
+				 1000, 10000);
+	if (ret)
+		return ret;
+
+	lldev->trch_state = HIDMA_CH_SUSPENDED;
+	lldev->evch_state = HIDMA_CH_SUSPENDED;
+
+	/* disable interrupts */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	return 0;
+}
+
+void hidma_ll_set_transfer_params(struct hidma_lldev *lldev, u32 tre_ch,
+				  dma_addr_t src, dma_addr_t dest, u32 len,
+				  u32 flags, u32 txntype)
+{
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (tre_ch >= lldev->nr_tres) {
+		dev_err(lldev->dev, "invalid TRE number in transfer params:%d",
+			tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (atomic_read(&tre->allocated) != true) {
+		dev_err(lldev->dev, "trying to set params on an unused TRE:%d",
+			tre_ch);
+		return;
+	}
+
+	tre_local = &tre->tre_local[0];
+	tre_local[HIDMA_TRE_CFG_IDX] &= ~GENMASK(7, 0);
+	tre_local[HIDMA_TRE_CFG_IDX] |= txntype;
+	tre_local[HIDMA_TRE_LEN_IDX] = len;
+	tre_local[HIDMA_TRE_SRC_LOW_IDX] = lower_32_bits(src);
+	tre_local[HIDMA_TRE_SRC_HI_IDX] = upper_32_bits(src);
+	tre_local[HIDMA_TRE_DEST_LOW_IDX] = lower_32_bits(dest);
+	tre_local[HIDMA_TRE_DEST_HI_IDX] = upper_32_bits(dest);
+	tre->int_flags = flags;
+}
+
+/*
+ * Called during initialization and after an error condition
+ * to restore hardware state.
+ */
+int hidma_ll_setup(struct hidma_lldev *lldev)
+{
+	int rc;
+	u64 addr;
+	u32 val;
+	u32 nr_tres = lldev->nr_tres;
+
+	atomic_set(&lldev->pending_tre_count, 0);
+	lldev->tre_processed_off = 0;
+	lldev->evre_processed_off = 0;
+	lldev->tre_write_offset = 0;
+
+	/* disable interrupts */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	/* clear all pending interrupts */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+	rc = hidma_ll_reset(lldev);
+	if (rc)
+		return rc;
+
+	/*
+	 * Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+
+	/* disable interrupts again after reset */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	addr = lldev->tre_dma;
+	writel(lower_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_LOW_REG);
+	writel(upper_32_bits(addr), lldev->trca + HIDMA_TRCA_RING_HIGH_REG);
+	writel(lldev->tre_ring_size, lldev->trca + HIDMA_TRCA_RING_LEN_REG);
+
+	addr = lldev->evre_dma;
+	writel(lower_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_LOW_REG);
+	writel(upper_32_bits(addr), lldev->evca + HIDMA_EVCA_RING_HIGH_REG);
+	writel(HIDMA_EVRE_SIZE * nr_tres,
+			lldev->evca + HIDMA_EVCA_RING_LEN_REG);
+
+	/* configure interrupts */
+	hidma_ll_setup_irq(lldev, lldev->msi_support);
+
+	rc = hidma_ll_enable(lldev);
+	if (rc)
+		return rc;
+
+	return rc;
+}
+
+void hidma_ll_setup_irq(struct hidma_lldev *lldev, bool msi)
+{
+	u32 val;
+
+	lldev->msi_support = msi;
+
+	/* disable interrupts again after reset */
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+
+	/* support IRQ by default */
+	val = readl(lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+	val &= ~0xF;
+	if (!lldev->msi_support)
+		val = val | 0x1;
+	writel(val, lldev->evca + HIDMA_EVCA_INTCTRL_REG);
+
+	/* clear all pending interrupts and enable them */
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+}
+
+struct hidma_lldev *hidma_ll_init(struct device *dev, u32 nr_tres,
+				  void __iomem *trca, void __iomem *evca,
+				  u8 chidx)
+{
+	u32 required_bytes;
+	struct hidma_lldev *lldev;
+	int rc;
+	size_t sz;
+
+	if (!trca || !evca || !dev || !nr_tres)
+		return NULL;
+
+	/* need at least four TREs */
+	if (nr_tres < 4)
+		return NULL;
+
+	/* need an extra space */
+	nr_tres += 1;
+
+	lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
+	if (!lldev)
+		return NULL;
+
+	lldev->evca = evca;
+	lldev->trca = trca;
+	lldev->dev = dev;
+	sz = sizeof(struct hidma_tre);
+	lldev->trepool = devm_kcalloc(lldev->dev, nr_tres, sz, GFP_KERNEL);
+	if (!lldev->trepool)
+		return NULL;
+
+	required_bytes = sizeof(lldev->pending_tre_list[0]);
+	lldev->pending_tre_list = devm_kcalloc(dev, nr_tres, required_bytes,
+					       GFP_KERNEL);
+	if (!lldev->pending_tre_list)
+		return NULL;
+
+	sz = (HIDMA_TRE_SIZE + 1) * nr_tres;
+	lldev->tre_ring = dmam_alloc_coherent(dev, sz, &lldev->tre_dma,
+					      GFP_KERNEL);
+	if (!lldev->tre_ring)
+		return NULL;
+
+	memset(lldev->tre_ring, 0, (HIDMA_TRE_SIZE + 1) * nr_tres);
+	lldev->tre_ring_size = HIDMA_TRE_SIZE * nr_tres;
+	lldev->nr_tres = nr_tres;
+
+	/* the TRE ring has to be TRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->tre_dma, HIDMA_TRE_SIZE)) {
+		u8 tre_ring_shift;
+
+		tre_ring_shift = lldev->tre_dma % HIDMA_TRE_SIZE;
+		tre_ring_shift = HIDMA_TRE_SIZE - tre_ring_shift;
+		lldev->tre_dma += tre_ring_shift;
+		lldev->tre_ring += tre_ring_shift;
+	}
+
+	sz = (HIDMA_EVRE_SIZE + 1) * nr_tres;
+	lldev->evre_ring = dmam_alloc_coherent(dev, sz, &lldev->evre_dma,
+					       GFP_KERNEL);
+	if (!lldev->evre_ring)
+		return NULL;
+
+	memset(lldev->evre_ring, 0, (HIDMA_EVRE_SIZE + 1) * nr_tres);
+	lldev->evre_ring_size = HIDMA_EVRE_SIZE * nr_tres;
+
+	/* the EVRE ring has to be EVRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->evre_dma, HIDMA_EVRE_SIZE)) {
+		u8 evre_ring_shift;
+
+		evre_ring_shift = lldev->evre_dma % HIDMA_EVRE_SIZE;
+		evre_ring_shift = HIDMA_EVRE_SIZE - evre_ring_shift;
+		lldev->evre_dma += evre_ring_shift;
+		lldev->evre_ring += evre_ring_shift;
+	}
+	lldev->nr_tres = nr_tres;
+	lldev->chidx = chidx;
+
+	sz = nr_tres * sizeof(struct hidma_tre *);
+	rc = kfifo_alloc(&lldev->handoff_fifo, sz, GFP_KERNEL);
+	if (rc)
+		return NULL;
+
+	rc = hidma_ll_setup(lldev);
+	if (rc)
+		return NULL;
+
+	spin_lock_init(&lldev->lock);
+	tasklet_init(&lldev->task, hidma_ll_tre_complete, (unsigned long)lldev);
+	lldev->initialized = 1;
+	writel(ENABLE_IRQS, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	return lldev;
+}
+
+int hidma_ll_uninit(struct hidma_lldev *lldev)
+{
+	u32 required_bytes;
+	int rc = 0;
+	u32 val;
+
+	if (!lldev)
+		return -ENODEV;
+
+	if (!lldev->initialized)
+		return 0;
+
+	lldev->initialized = 0;
+
+	required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
+	tasklet_kill(&lldev->task);
+	memset(lldev->trepool, 0, required_bytes);
+	lldev->trepool = NULL;
+	atomic_set(&lldev->pending_tre_count, 0);
+	lldev->tre_write_offset = 0;
+
+	rc = hidma_ll_reset(lldev);
+
+	/*
+	 * Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl(lldev->evca + HIDMA_EVCA_IRQ_STAT_REG);
+	writel(val, lldev->evca + HIDMA_EVCA_IRQ_CLR_REG);
+	writel(0, lldev->evca + HIDMA_EVCA_IRQ_EN_REG);
+	return rc;
+}
+
+enum dma_status hidma_ll_status(struct hidma_lldev *lldev, u32 tre_ch)
+{
+	enum dma_status ret = DMA_ERROR;
+	struct hidma_tre *tre;
+	unsigned long flags;
+	u8 err_code;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+
+	tre = &lldev->trepool[tre_ch];
+	err_code = tre->err_code;
+
+	if (err_code & HIDMA_EVRE_STATUS_COMPLETE)
+		ret = DMA_COMPLETE;
+	else if (err_code & HIDMA_EVRE_STATUS_ERROR)
+		ret = DMA_ERROR;
+	else
+		ret = DMA_IN_PROGRESS;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	return ret;
+}
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
new file mode 100644
index 0000000..d64edeb
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -0,0 +1,431 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine Management interface
+ *
+ * Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
+
+#include "hidma_mgmt.h"
+
+#define HIDMA_QOS_N_OFFSET		0x700
+#define HIDMA_CFG_OFFSET		0x400
+#define HIDMA_MAX_BUS_REQ_LEN_OFFSET	0x41C
+#define HIDMA_MAX_XACTIONS_OFFSET	0x420
+#define HIDMA_HW_VERSION_OFFSET	0x424
+#define HIDMA_CHRESET_TIMEOUT_OFFSET	0x418
+
+#define HIDMA_MAX_WR_XACTIONS_MASK	GENMASK(4, 0)
+#define HIDMA_MAX_RD_XACTIONS_MASK	GENMASK(4, 0)
+#define HIDMA_WEIGHT_MASK		GENMASK(6, 0)
+#define HIDMA_MAX_BUS_REQ_LEN_MASK	GENMASK(15, 0)
+#define HIDMA_CHRESET_TIMEOUT_MASK	GENMASK(19, 0)
+
+#define HIDMA_MAX_WR_XACTIONS_BIT_POS	16
+#define HIDMA_MAX_BUS_WR_REQ_BIT_POS	16
+#define HIDMA_WRR_BIT_POS		8
+#define HIDMA_PRIORITY_BIT_POS		15
+
+#define HIDMA_AUTOSUSPEND_TIMEOUT	2000
+#define HIDMA_MAX_CHANNEL_WEIGHT	15
+
+static unsigned int max_write_request;
+module_param(max_write_request, uint, 0644);
+MODULE_PARM_DESC(max_write_request,
+		"maximum write burst (default: ACPI/DT value)");
+
+static unsigned int max_read_request;
+module_param(max_read_request, uint, 0644);
+MODULE_PARM_DESC(max_read_request,
+		"maximum read burst (default: ACPI/DT value)");
+
+static unsigned int max_wr_xactions;
+module_param(max_wr_xactions, uint, 0644);
+MODULE_PARM_DESC(max_wr_xactions,
+	"maximum number of write transactions (default: ACPI/DT value)");
+
+static unsigned int max_rd_xactions;
+module_param(max_rd_xactions, uint, 0644);
+MODULE_PARM_DESC(max_rd_xactions,
+	"maximum number of read transactions (default: ACPI/DT value)");
+
+int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev)
+{
+	unsigned int i;
+	u32 val;
+
+	if (!is_power_of_2(mgmtdev->max_write_request) ||
+	    (mgmtdev->max_write_request < 128) ||
+	    (mgmtdev->max_write_request > 1024)) {
+		dev_err(&mgmtdev->pdev->dev, "invalid write request %d\n",
+			mgmtdev->max_write_request);
+		return -EINVAL;
+	}
+
+	if (!is_power_of_2(mgmtdev->max_read_request) ||
+	    (mgmtdev->max_read_request < 128) ||
+	    (mgmtdev->max_read_request > 1024)) {
+		dev_err(&mgmtdev->pdev->dev, "invalid read request %d\n",
+			mgmtdev->max_read_request);
+		return -EINVAL;
+	}
+
+	if (mgmtdev->max_wr_xactions > HIDMA_MAX_WR_XACTIONS_MASK) {
+		dev_err(&mgmtdev->pdev->dev,
+			"max_wr_xactions cannot be bigger than %ld\n",
+			HIDMA_MAX_WR_XACTIONS_MASK);
+		return -EINVAL;
+	}
+
+	if (mgmtdev->max_rd_xactions > HIDMA_MAX_RD_XACTIONS_MASK) {
+		dev_err(&mgmtdev->pdev->dev,
+			"max_rd_xactions cannot be bigger than %ld\n",
+			HIDMA_MAX_RD_XACTIONS_MASK);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < mgmtdev->dma_channels; i++) {
+		if (mgmtdev->priority[i] > 1) {
+			dev_err(&mgmtdev->pdev->dev,
+				"priority can be 0 or 1\n");
+			return -EINVAL;
+		}
+
+		if (mgmtdev->weight[i] > HIDMA_MAX_CHANNEL_WEIGHT) {
+			dev_err(&mgmtdev->pdev->dev,
+				"max value of weight can be %d.\n",
+				HIDMA_MAX_CHANNEL_WEIGHT);
+			return -EINVAL;
+		}
+
+		/* weight needs to be at least one */
+		if (mgmtdev->weight[i] == 0)
+			mgmtdev->weight[i] = 1;
+	}
+
+	pm_runtime_get_sync(&mgmtdev->pdev->dev);
+	val = readl(mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET);
+	val &= ~(HIDMA_MAX_BUS_REQ_LEN_MASK << HIDMA_MAX_BUS_WR_REQ_BIT_POS);
+	val |= mgmtdev->max_write_request << HIDMA_MAX_BUS_WR_REQ_BIT_POS;
+	val &= ~HIDMA_MAX_BUS_REQ_LEN_MASK;
+	val |= mgmtdev->max_read_request;
+	writel(val, mgmtdev->virtaddr + HIDMA_MAX_BUS_REQ_LEN_OFFSET);
+
+	val = readl(mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET);
+	val &= ~(HIDMA_MAX_WR_XACTIONS_MASK << HIDMA_MAX_WR_XACTIONS_BIT_POS);
+	val |= mgmtdev->max_wr_xactions << HIDMA_MAX_WR_XACTIONS_BIT_POS;
+	val &= ~HIDMA_MAX_RD_XACTIONS_MASK;
+	val |= mgmtdev->max_rd_xactions;
+	writel(val, mgmtdev->virtaddr + HIDMA_MAX_XACTIONS_OFFSET);
+
+	mgmtdev->hw_version =
+	    readl(mgmtdev->virtaddr + HIDMA_HW_VERSION_OFFSET);
+	mgmtdev->hw_version_major = (mgmtdev->hw_version >> 28) & 0xF;
+	mgmtdev->hw_version_minor = (mgmtdev->hw_version >> 16) & 0xF;
+
+	for (i = 0; i < mgmtdev->dma_channels; i++) {
+		u32 weight = mgmtdev->weight[i];
+		u32 priority = mgmtdev->priority[i];
+
+		val = readl(mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i));
+		val &= ~(1 << HIDMA_PRIORITY_BIT_POS);
+		val |= (priority & 0x1) << HIDMA_PRIORITY_BIT_POS;
+		val &= ~(HIDMA_WEIGHT_MASK << HIDMA_WRR_BIT_POS);
+		val |= (weight & HIDMA_WEIGHT_MASK) << HIDMA_WRR_BIT_POS;
+		writel(val, mgmtdev->virtaddr + HIDMA_QOS_N_OFFSET + (4 * i));
+	}
+
+	val = readl(mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET);
+	val &= ~HIDMA_CHRESET_TIMEOUT_MASK;
+	val |= mgmtdev->chreset_timeout_cycles & HIDMA_CHRESET_TIMEOUT_MASK;
+	writel(val, mgmtdev->virtaddr + HIDMA_CHRESET_TIMEOUT_OFFSET);
+
+	pm_runtime_mark_last_busy(&mgmtdev->pdev->dev);
+	pm_runtime_put_autosuspend(&mgmtdev->pdev->dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_mgmt_setup);
+
+static int hidma_mgmt_probe(struct platform_device *pdev)
+{
+	struct hidma_mgmt_dev *mgmtdev;
+	struct resource *res;
+	void __iomem *virtaddr;
+	int irq;
+	int rc;
+	u32 val;
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, HIDMA_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	virtaddr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(virtaddr)) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "irq resources not found\n");
+		rc = irq;
+		goto out;
+	}
+
+	mgmtdev = devm_kzalloc(&pdev->dev, sizeof(*mgmtdev), GFP_KERNEL);
+	if (!mgmtdev) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	mgmtdev->pdev = pdev;
+	mgmtdev->addrsize = resource_size(res);
+	mgmtdev->virtaddr = virtaddr;
+
+	rc = device_property_read_u32(&pdev->dev, "dma-channels",
+				      &mgmtdev->dma_channels);
+	if (rc) {
+		dev_err(&pdev->dev, "number of channels missing\n");
+		goto out;
+	}
+
+	rc = device_property_read_u32(&pdev->dev,
+				      "channel-reset-timeout-cycles",
+				      &mgmtdev->chreset_timeout_cycles);
+	if (rc) {
+		dev_err(&pdev->dev, "channel reset timeout missing\n");
+		goto out;
+	}
+
+	rc = device_property_read_u32(&pdev->dev, "max-write-burst-bytes",
+				      &mgmtdev->max_write_request);
+	if (rc) {
+		dev_err(&pdev->dev, "max-write-burst-bytes missing\n");
+		goto out;
+	}
+
+	if (max_write_request &&
+			(max_write_request != mgmtdev->max_write_request)) {
+		dev_info(&pdev->dev, "overriding max-write-burst-bytes: %d\n",
+			max_write_request);
+		mgmtdev->max_write_request = max_write_request;
+	} else
+		max_write_request = mgmtdev->max_write_request;
+
+	rc = device_property_read_u32(&pdev->dev, "max-read-burst-bytes",
+				      &mgmtdev->max_read_request);
+	if (rc) {
+		dev_err(&pdev->dev, "max-read-burst-bytes missing\n");
+		goto out;
+	}
+	if (max_read_request &&
+			(max_read_request != mgmtdev->max_read_request)) {
+		dev_info(&pdev->dev, "overriding max-read-burst-bytes: %d\n",
+			max_read_request);
+		mgmtdev->max_read_request = max_read_request;
+	} else
+		max_read_request = mgmtdev->max_read_request;
+
+	rc = device_property_read_u32(&pdev->dev, "max-write-transactions",
+				      &mgmtdev->max_wr_xactions);
+	if (rc) {
+		dev_err(&pdev->dev, "max-write-transactions missing\n");
+		goto out;
+	}
+	if (max_wr_xactions &&
+			(max_wr_xactions != mgmtdev->max_wr_xactions)) {
+		dev_info(&pdev->dev, "overriding max-write-transactions: %d\n",
+			max_wr_xactions);
+		mgmtdev->max_wr_xactions = max_wr_xactions;
+	} else
+		max_wr_xactions = mgmtdev->max_wr_xactions;
+
+	rc = device_property_read_u32(&pdev->dev, "max-read-transactions",
+				      &mgmtdev->max_rd_xactions);
+	if (rc) {
+		dev_err(&pdev->dev, "max-read-transactions missing\n");
+		goto out;
+	}
+	if (max_rd_xactions &&
+			(max_rd_xactions != mgmtdev->max_rd_xactions)) {
+		dev_info(&pdev->dev, "overriding max-read-transactions: %d\n",
+			max_rd_xactions);
+		mgmtdev->max_rd_xactions = max_rd_xactions;
+	} else
+		max_rd_xactions = mgmtdev->max_rd_xactions;
+
+	mgmtdev->priority = devm_kcalloc(&pdev->dev,
+					 mgmtdev->dma_channels,
+					 sizeof(*mgmtdev->priority),
+					 GFP_KERNEL);
+	if (!mgmtdev->priority) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	mgmtdev->weight = devm_kcalloc(&pdev->dev,
+				       mgmtdev->dma_channels,
+				       sizeof(*mgmtdev->weight), GFP_KERNEL);
+	if (!mgmtdev->weight) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	rc = hidma_mgmt_setup(mgmtdev);
+	if (rc) {
+		dev_err(&pdev->dev, "setup failed\n");
+		goto out;
+	}
+
+	/* start the HW */
+	val = readl(mgmtdev->virtaddr + HIDMA_CFG_OFFSET);
+	val |= 1;
+	writel(val, mgmtdev->virtaddr + HIDMA_CFG_OFFSET);
+
+	rc = hidma_mgmt_init_sys(mgmtdev);
+	if (rc) {
+		dev_err(&pdev->dev, "sysfs setup failed\n");
+		goto out;
+	}
+
+	dev_info(&pdev->dev,
+		 "HW rev: %d.%d @ %pa with %d physical channels\n",
+		 mgmtdev->hw_version_major, mgmtdev->hw_version_minor,
+		 &res->start, mgmtdev->dma_channels);
+
+	platform_set_drvdata(pdev, mgmtdev);
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
+	return 0;
+out:
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return rc;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hidma_mgmt_acpi_ids[] = {
+	{"QCOM8060"},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, hidma_mgmt_acpi_ids);
+#endif
+
+static const struct of_device_id hidma_mgmt_match[] = {
+	{.compatible = "qcom,hidma-mgmt-1.0",},
+	{},
+};
+MODULE_DEVICE_TABLE(of, hidma_mgmt_match);
+
+static struct platform_driver hidma_mgmt_driver = {
+	.probe = hidma_mgmt_probe,
+	.driver = {
+		   .name = "hidma-mgmt",
+		   .of_match_table = hidma_mgmt_match,
+		   .acpi_match_table = ACPI_PTR(hidma_mgmt_acpi_ids),
+	},
+};
+
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+static int object_counter;
+
+static int __init hidma_mgmt_of_populate_channels(struct device_node *np)
+{
+	struct platform_device *pdev_parent = of_find_device_by_node(np);
+	struct platform_device_info pdevinfo;
+	struct device_node *child;
+	struct resource *res;
+	int ret = 0;
+
+	/* allocate a resource array */
+	res = kcalloc(3, sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
+
+	for_each_available_child_of_node(np, child) {
+		struct platform_device *new_pdev;
+
+		ret = of_address_to_resource(child, 0, &res[0]);
+		if (!ret)
+			goto out;
+
+		ret = of_address_to_resource(child, 1, &res[1]);
+		if (!ret)
+			goto out;
+
+		ret = of_irq_to_resource(child, 0, &res[2]);
+		if (ret <= 0)
+			goto out;
+
+		memset(&pdevinfo, 0, sizeof(pdevinfo));
+		pdevinfo.fwnode = &child->fwnode;
+		pdevinfo.parent = pdev_parent ? &pdev_parent->dev : NULL;
+		pdevinfo.name = child->name;
+		pdevinfo.id = object_counter++;
+		pdevinfo.res = res;
+		pdevinfo.num_res = 3;
+		pdevinfo.data = NULL;
+		pdevinfo.size_data = 0;
+		pdevinfo.dma_mask = DMA_BIT_MASK(64);
+		new_pdev = platform_device_register_full(&pdevinfo);
+		if (IS_ERR(new_pdev)) {
+			ret = PTR_ERR(new_pdev);
+			goto out;
+		}
+		of_node_get(child);
+		new_pdev->dev.of_node = child;
+		of_dma_configure(&new_pdev->dev, child, true);
+		/*
+		 * It is assumed that calling of_msi_configure is safe on
+		 * platforms with or without MSI support.
+		 */
+		of_msi_configure(&new_pdev->dev, child);
+		of_node_put(child);
+	}
+out:
+	kfree(res);
+
+	return ret;
+}
+#endif
+
+static int __init hidma_mgmt_init(void)
+{
+#if defined(CONFIG_OF) && defined(CONFIG_OF_IRQ)
+	struct device_node *child;
+
+	for_each_matching_node(child, hidma_mgmt_match) {
+		/* device tree based firmware here */
+		hidma_mgmt_of_populate_channels(child);
+	}
+#endif
+	platform_driver_register(&hidma_mgmt_driver);
+
+	return 0;
+}
+module_init(hidma_mgmt_init);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom/hidma_mgmt.h b/drivers/dma/qcom/hidma_mgmt.h
new file mode 100644
index 0000000..f7daf33
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt.h
@@ -0,0 +1,39 @@
+/*
+ * Qualcomm Technologies HIDMA Management common header
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+struct hidma_mgmt_dev {
+	u8 hw_version_major;
+	u8 hw_version_minor;
+
+	u32 max_wr_xactions;
+	u32 max_rd_xactions;
+	u32 max_write_request;
+	u32 max_read_request;
+	u32 dma_channels;
+	u32 chreset_timeout_cycles;
+	u32 hw_version;
+	u32 *priority;
+	u32 *weight;
+
+	/* Hardware device constants */
+	void __iomem *virtaddr;
+	resource_size_t addrsize;
+
+	struct kobject **chroots;
+	struct platform_device *pdev;
+};
+
+int hidma_mgmt_init_sys(struct hidma_mgmt_dev *dev);
+int hidma_mgmt_setup(struct hidma_mgmt_dev *mgmtdev);
diff --git a/drivers/dma/qcom/hidma_mgmt_sys.c b/drivers/dma/qcom/hidma_mgmt_sys.c
new file mode 100644
index 0000000..cbb89ea
--- /dev/null
+++ b/drivers/dma/qcom/hidma_mgmt_sys.c
@@ -0,0 +1,293 @@
+/*
+ * Qualcomm Technologies HIDMA Management SYS interface
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/platform_device.h>
+
+#include "hidma_mgmt.h"
+
+struct hidma_chan_attr {
+	struct hidma_mgmt_dev *mdev;
+	int index;
+	struct kobj_attribute attr;
+};
+
+struct hidma_mgmt_fileinfo {
+	char *name;
+	int mode;
+	int (*get)(struct hidma_mgmt_dev *mdev);
+	int (*set)(struct hidma_mgmt_dev *mdev, u64 val);
+};
+
+#define IMPLEMENT_GETSET(name)					\
+static int get_##name(struct hidma_mgmt_dev *mdev)		\
+{								\
+	return mdev->name;					\
+}								\
+static int set_##name(struct hidma_mgmt_dev *mdev, u64 val)	\
+{								\
+	u64 tmp;						\
+	int rc;							\
+								\
+	tmp = mdev->name;					\
+	mdev->name = val;					\
+	rc = hidma_mgmt_setup(mdev);				\
+	if (rc)							\
+		mdev->name = tmp;				\
+	return rc;						\
+}
+
+#define DECLARE_ATTRIBUTE(name, mode)				\
+	{#name, mode, get_##name, set_##name}
+
+IMPLEMENT_GETSET(hw_version_major)
+IMPLEMENT_GETSET(hw_version_minor)
+IMPLEMENT_GETSET(max_wr_xactions)
+IMPLEMENT_GETSET(max_rd_xactions)
+IMPLEMENT_GETSET(max_write_request)
+IMPLEMENT_GETSET(max_read_request)
+IMPLEMENT_GETSET(dma_channels)
+IMPLEMENT_GETSET(chreset_timeout_cycles)
+
+static int set_priority(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val)
+{
+	u64 tmp;
+	int rc;
+
+	if (i >= mdev->dma_channels)
+		return -EINVAL;
+
+	tmp = mdev->priority[i];
+	mdev->priority[i] = val;
+	rc = hidma_mgmt_setup(mdev);
+	if (rc)
+		mdev->priority[i] = tmp;
+	return rc;
+}
+
+static int set_weight(struct hidma_mgmt_dev *mdev, unsigned int i, u64 val)
+{
+	u64 tmp;
+	int rc;
+
+	if (i >= mdev->dma_channels)
+		return -EINVAL;
+
+	tmp = mdev->weight[i];
+	mdev->weight[i] = val;
+	rc = hidma_mgmt_setup(mdev);
+	if (rc)
+		mdev->weight[i] = tmp;
+	return rc;
+}
+
+static struct hidma_mgmt_fileinfo hidma_mgmt_files[] = {
+	DECLARE_ATTRIBUTE(hw_version_major, S_IRUGO),
+	DECLARE_ATTRIBUTE(hw_version_minor, S_IRUGO),
+	DECLARE_ATTRIBUTE(dma_channels, S_IRUGO),
+	DECLARE_ATTRIBUTE(chreset_timeout_cycles, S_IRUGO),
+	DECLARE_ATTRIBUTE(max_wr_xactions, S_IRUGO),
+	DECLARE_ATTRIBUTE(max_rd_xactions, S_IRUGO),
+	DECLARE_ATTRIBUTE(max_write_request, S_IRUGO),
+	DECLARE_ATTRIBUTE(max_read_request, S_IRUGO),
+};
+
+static ssize_t show_values(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev);
+	unsigned int i;
+
+	buf[0] = 0;
+
+	for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+		if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) {
+			sprintf(buf, "%d\n", hidma_mgmt_files[i].get(mdev));
+			break;
+		}
+	}
+	return strlen(buf);
+}
+
+static ssize_t set_values(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t count)
+{
+	struct hidma_mgmt_dev *mdev = dev_get_drvdata(dev);
+	unsigned long tmp;
+	unsigned int i;
+	int rc;
+
+	rc = kstrtoul(buf, 0, &tmp);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+		if (strcmp(attr->attr.name, hidma_mgmt_files[i].name) == 0) {
+			rc = hidma_mgmt_files[i].set(mdev, tmp);
+			if (rc)
+				return rc;
+
+			break;
+		}
+	}
+	return count;
+}
+
+static ssize_t show_values_channel(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	struct hidma_chan_attr *chattr;
+	struct hidma_mgmt_dev *mdev;
+
+	buf[0] = 0;
+	chattr = container_of(attr, struct hidma_chan_attr, attr);
+	mdev = chattr->mdev;
+	if (strcmp(attr->attr.name, "priority") == 0)
+		sprintf(buf, "%d\n", mdev->priority[chattr->index]);
+	else if (strcmp(attr->attr.name, "weight") == 0)
+		sprintf(buf, "%d\n", mdev->weight[chattr->index]);
+
+	return strlen(buf);
+}
+
+static ssize_t set_values_channel(struct kobject *kobj,
+				  struct kobj_attribute *attr, const char *buf,
+				  size_t count)
+{
+	struct hidma_chan_attr *chattr;
+	struct hidma_mgmt_dev *mdev;
+	unsigned long tmp;
+	int rc;
+
+	chattr = container_of(attr, struct hidma_chan_attr, attr);
+	mdev = chattr->mdev;
+
+	rc = kstrtoul(buf, 0, &tmp);
+	if (rc)
+		return rc;
+
+	if (strcmp(attr->attr.name, "priority") == 0) {
+		rc = set_priority(mdev, chattr->index, tmp);
+		if (rc)
+			return rc;
+	} else if (strcmp(attr->attr.name, "weight") == 0) {
+		rc = set_weight(mdev, chattr->index, tmp);
+		if (rc)
+			return rc;
+	}
+	return count;
+}
+
+static int create_sysfs_entry(struct hidma_mgmt_dev *dev, char *name, int mode)
+{
+	struct device_attribute *attrs;
+	char *name_copy;
+
+	attrs = devm_kmalloc(&dev->pdev->dev,
+			     sizeof(struct device_attribute), GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+
+	name_copy = devm_kstrdup(&dev->pdev->dev, name, GFP_KERNEL);
+	if (!name_copy)
+		return -ENOMEM;
+
+	attrs->attr.name = name_copy;
+	attrs->attr.mode = mode;
+	attrs->show = show_values;
+	attrs->store = set_values;
+	sysfs_attr_init(&attrs->attr);
+
+	return device_create_file(&dev->pdev->dev, attrs);
+}
+
+static int create_sysfs_entry_channel(struct hidma_mgmt_dev *mdev, char *name,
+				      int mode, int index,
+				      struct kobject *parent)
+{
+	struct hidma_chan_attr *chattr;
+	char *name_copy;
+
+	chattr = devm_kmalloc(&mdev->pdev->dev, sizeof(*chattr), GFP_KERNEL);
+	if (!chattr)
+		return -ENOMEM;
+
+	name_copy = devm_kstrdup(&mdev->pdev->dev, name, GFP_KERNEL);
+	if (!name_copy)
+		return -ENOMEM;
+
+	chattr->mdev = mdev;
+	chattr->index = index;
+	chattr->attr.attr.name = name_copy;
+	chattr->attr.attr.mode = mode;
+	chattr->attr.show = show_values_channel;
+	chattr->attr.store = set_values_channel;
+	sysfs_attr_init(&chattr->attr.attr);
+
+	return sysfs_create_file(parent, &chattr->attr.attr);
+}
+
+int hidma_mgmt_init_sys(struct hidma_mgmt_dev *mdev)
+{
+	unsigned int i;
+	int rc;
+	int required;
+	struct kobject *chanops;
+
+	required = sizeof(*mdev->chroots) * mdev->dma_channels;
+	mdev->chroots = devm_kmalloc(&mdev->pdev->dev, required, GFP_KERNEL);
+	if (!mdev->chroots)
+		return -ENOMEM;
+
+	chanops = kobject_create_and_add("chanops", &mdev->pdev->dev.kobj);
+	if (!chanops)
+		return -ENOMEM;
+
+	/* create each channel directory here */
+	for (i = 0; i < mdev->dma_channels; i++) {
+		char name[20];
+
+		snprintf(name, sizeof(name), "chan%d", i);
+		mdev->chroots[i] = kobject_create_and_add(name, chanops);
+		if (!mdev->chroots[i])
+			return -ENOMEM;
+	}
+
+	/* populate common parameters */
+	for (i = 0; i < ARRAY_SIZE(hidma_mgmt_files); i++) {
+		rc = create_sysfs_entry(mdev, hidma_mgmt_files[i].name,
+					hidma_mgmt_files[i].mode);
+		if (rc)
+			return rc;
+	}
+
+	/* populate parameters that are per channel */
+	for (i = 0; i < mdev->dma_channels; i++) {
+		rc = create_sysfs_entry_channel(mdev, "priority",
+						(S_IRUGO | S_IWUGO), i,
+						mdev->chroots[i]);
+		if (rc)
+			return rc;
+
+		rc = create_sysfs_entry_channel(mdev, "weight",
+						(S_IRUGO | S_IWUGO), i,
+						mdev->chroots[i]);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_mgmt_init_sys);
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 0000000..64744eb
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1435 @@
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS	8
+
+#define S3C24XX_DISRC			0x00
+#define S3C24XX_DISRCC			0x04
+#define S3C24XX_DISRCC_INC_INCREMENT	0
+#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB		0
+#define S3C24XX_DISRCC_LOC_APB		BIT(1)
+
+#define S3C24XX_DIDST			0x08
+#define S3C24XX_DIDSTC			0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT	0
+#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB		0
+#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0		0
+#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
+
+#define S3C24XX_DCON			0x10
+
+#define S3C24XX_DCON_TC_MASK		0xfffff
+#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
+#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
+#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT		20
+#define S3C24XX_DCON_AUTORELOAD		0
+#define S3C24XX_DCON_NORELOAD		BIT(22)
+#define S3C24XX_DCON_HWTRIG		BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT	24
+#define S3C24XX_DCON_SERV_SINGLE	0
+#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT		0
+#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
+#define S3C24XX_DCON_INT		BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK		0
+#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
+#define S3C24XX_DCON_DEMAND		0
+#define S3C24XX_DCON_HANDSHAKE		BIT(31)
+
+#define S3C24XX_DSTAT			0x14
+#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
+
+#define S3C24XX_DMASKTRIG		0x20
+#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
+#define S3C24XX_DMASKTRIG_ON		BIT(1)
+#define S3C24XX_DMASKTRIG_STOP		BIT(2)
+
+#define S3C24XX_DMAREQSEL		0x24
+#define S3C24XX_DMAREQSEL_HW		BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH		4
+#define S3C24XX_CHANSEL_VALID		BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK	7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+	int stride;
+	bool has_reqsel;
+	bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+	S3C24XX_DMA_CHAN_IDLE,
+	S3C24XX_DMA_CHAN_RUNNING,
+	S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ * @cyclic: indicate cyclic transfer
+ */
+struct s3c24xx_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	struct list_head *at;
+	u8 width;
+	u32 disrcc;
+	u32 didstc;
+	u32 dcon;
+	bool cyclic;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+	unsigned int			id;
+	bool				valid;
+	void __iomem			*base;
+	int				irq;
+	struct clk			*clk;
+	spinlock_t			lock;
+	struct s3c24xx_dma_chan		*serving;
+	struct s3c24xx_dma_engine	*host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrappped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+	int id;
+	const char *name;
+	struct virt_dma_chan vc;
+	struct s3c24xx_dma_phy *phy;
+	struct dma_slave_config cfg;
+	struct s3c24xx_txd *at;
+	struct s3c24xx_dma_engine *host;
+	enum s3c24xx_dma_chan_state state;
+	bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+	struct platform_device			*pdev;
+	const struct s3c24xx_dma_platdata	*pdata;
+	struct soc_data				*sdata;
+	void __iomem				*base;
+	struct dma_device			slave;
+	struct dma_device			memcpy;
+	struct s3c24xx_dma_phy			*phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+	return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+				  struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	int phyvalid;
+
+	/* every phy is valid for memcopy channels */
+	if (!s3cchan->slave)
+		return true;
+
+	/* On newer variants all phys can be used for all virtual channels */
+	if (s3cdma->sdata->has_reqsel)
+		return true;
+
+	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = NULL;
+	unsigned long flags;
+	int i;
+	int ret;
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		phy = &s3cdma->phy_chans[i];
+
+		if (!phy->valid)
+			continue;
+
+		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+			continue;
+
+		spin_lock_irqsave(&phy->lock, flags);
+
+		if (!phy->serving) {
+			phy->serving = s3cchan;
+			spin_unlock_irqrestore(&phy->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&phy->lock, flags);
+	}
+
+	/* No physical channel available, cope with it */
+	if (i == s3cdma->pdata->num_phy_channels) {
+		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+		return NULL;
+	}
+
+	/* start the phy clock */
+	if (s3cdma->sdata->has_clocks) {
+		ret = clk_enable(phy->clk);
+		if (ret) {
+			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+				phy->id, ret);
+			phy->serving = NULL;
+			return NULL;
+		}
+	}
+
+	return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+	if (s3cdma->sdata->has_clocks)
+		clk_disable(phy->clk);
+
+	phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct s3c24xx_txd *txd = s3cchan->at;
+	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+	return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+	int ret = 0;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	if (!s3cchan->slave) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	s3cchan->cfg = *config;
+
+out:
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+	return ret;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		INIT_LIST_HEAD(&txd->dsg_list);
+		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+	}
+
+	return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_sg *dsg, *_dsg;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+				       struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+	u32 dcon = txd->dcon;
+	u32 val;
+
+	/* transfer-size and -count from len and width */
+	switch (txd->width) {
+	case 1:
+		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+		break;
+	case 2:
+		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+		break;
+	case 4:
+		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+		break;
+	}
+
+	if (s3cchan->slave) {
+		struct s3c24xx_dma_channel *cdata =
+					&pdata->channels[s3cchan->id];
+
+		if (s3cdma->sdata->has_reqsel) {
+			writel_relaxed((cdata->chansel << 1) |
+							S3C24XX_DMAREQSEL_HW,
+					phy->base + S3C24XX_DMAREQSEL);
+		} else {
+			int csel = cdata->chansel >> (phy->id *
+							S3C24XX_CHANSEL_WIDTH);
+
+			csel &= S3C24XX_CHANSEL_REQ_MASK;
+			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+			dcon |= S3C24XX_DCON_HWTRIG;
+		}
+	} else {
+		if (s3cdma->sdata->has_reqsel)
+			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+	}
+
+	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+	val &= ~S3C24XX_DMASKTRIG_STOP;
+	val |= S3C24XX_DMASKTRIG_ON;
+
+	/* trigger the dma operation for memcpy transfers */
+	if (!s3cchan->slave)
+		val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+	writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+	list_del(&txd->vd.node);
+
+	s3cchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (s3c24xx_dma_phy_busy(phy))
+		cpu_relax();
+
+	/* point to the first element of the sg list */
+	txd->at = txd->dsg_list.next;
+	s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+static void s3c24xx_dma_free_txd_list(struct s3c24xx_dma_engine *s3cdma,
+				struct s3c24xx_dma_chan *s3cchan)
+{
+	LIST_HEAD(head);
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy;
+
+	phy = s3c24xx_dma_get_phy(s3cchan);
+	if (!phy) {
+		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+			s3cchan->name);
+		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+	struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	phy->serving = s3cchan;
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_chan *p, *next;
+
+retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next) {
+		list_for_each_entry(p, &s3cdma->slave.channels,
+				    vc.chan.device_node)
+			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == S3C24XX_DMA_CHAN_WAITING;
+		if (success)
+			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		s3c24xx_dma_put_phy(s3cchan->phy);
+	}
+
+	s3cchan->phy = NULL;
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+	if (!s3cchan->slave)
+		dma_descriptor_unmap(&vd->tx);
+
+	s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+	struct s3c24xx_dma_phy *phy = data;
+	struct s3c24xx_dma_chan *s3cchan = phy->serving;
+	struct s3c24xx_txd *txd;
+
+	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+	/*
+	 * Interrupts happen to notify the completion of a transfer and the
+	 * channel should have moved into its stop state already on its own.
+	 * Therefore interrupts on channels not bound to a virtual channel
+	 * should never happen. Nevertheless send a terminate command to the
+	 * channel if the unlikely case happens.
+	 */
+	if (unlikely(!s3cchan)) {
+		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+			phy->id);
+
+		s3c24xx_dma_terminate_phy(phy);
+
+		return IRQ_HANDLED;
+	}
+
+	spin_lock(&s3cchan->vc.lock);
+	txd = s3cchan->at;
+	if (txd) {
+		/* when more sg's are in this txd, start the next one */
+		if (!list_is_last(txd->at, &txd->dsg_list)) {
+			txd->at = txd->at->next;
+			if (txd->cyclic)
+				vchan_cyclic_callback(&txd->vd);
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		} else if (!txd->cyclic) {
+			s3cchan->at = NULL;
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * And start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&s3cchan->vc))
+				s3c24xx_dma_start_next_txd(s3cchan);
+			else
+				s3c24xx_dma_phy_free(s3cchan);
+		} else {
+			vchan_cyclic_callback(&txd->vd);
+
+			/* Cyclic: reset at beginning */
+			txd->at = txd->dsg_list.next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		}
+	}
+	spin_unlock(&s3cchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	if (!s3cchan->phy && !s3cchan->at) {
+		dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+			s3cchan->id);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+	/* Mark physical channel as free */
+	if (s3cchan->phy)
+		s3c24xx_dma_phy_free(s3cchan);
+
+	/* Dequeue current job */
+	if (s3cchan->at) {
+		vchan_terminate_vdesc(&s3cchan->at->vd);
+		s3cchan->at = NULL;
+	}
+
+	/* Dequeue jobs not yet fired as well */
+	s3c24xx_dma_free_txd_list(s3cdma, s3cchan);
+unlock:
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	return ret;
+}
+
+static void s3c24xx_dma_synchronize(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+
+	vchan_synchronize(&s3cchan->vc);
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (ret == DMA_COMPLETE || !txstate) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	vd = vchan_find_desc(&s3cchan->vc, cookie);
+	if (vd) {
+		/* On the issued list, so hasn't been processed yet */
+		txd = to_s3c24xx_txd(&vd->tx);
+
+		list_for_each_entry(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+	} else {
+		/*
+		 * Currently running, so sum over the pending sg's and
+		 * the currently active one.
+		 */
+		txd = s3cchan->at;
+
+		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+		list_for_each_entry_from(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+
+		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	int src_mod, dest_mod;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n",
+			len, s3cchan->name);
+
+	if ((len & S3C24XX_DCON_TC_MASK) != len) {
+		dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+	if (!dsg) {
+		s3c24xx_dma_free_txd(txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/*
+	 * Determine a suitable transfer width.
+	 * The DMA controller cannot fetch/store information which is not
+	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+	 * an address divisible by 4 - more generally addr % width must be 0.
+	 */
+	src_mod = src % 4;
+	dest_mod = dest % 4;
+	switch (len % 4) {
+	case 0:
+		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+		break;
+	case 2:
+		txd->width = ((src_mod == 2 || src_mod == 0) &&
+			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+		break;
+	default:
+		txd->width = 1;
+		break;
+	}
+
+	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+		     S3C24XX_DCON_SERV_WHOLE;
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	unsigned sg_len;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int i;
+
+	dev_dbg(&s3cdma->pdev->dev,
+		"prepare cyclic transaction of %zu bytes with period %zu from %s\n",
+		size, period, s3cchan->name);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = 1;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	}
+
+	sg_len = size / period;
+
+	for (i = 0; i < sg_len; i++) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = period;
+		/* Check last period length */
+		if (i == sg_len - 1)
+			dsg->len = size - period * i;
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = addr + period * i;
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = addr + period * i;
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int tmp;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+			sg_dma_len(sgl), s3cchan->name);
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	} else {
+		s3c24xx_dma_free_txd(txd);
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_dma_address(sg);
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_dma_address(sg);
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	if (vchan_issue_pending(&s3cchan->vc)) {
+		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+			s3c24xx_dma_phy_alloc_and_start(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct s3c24xx_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan)
+			return -ENOMEM;
+
+		chan->id = i;
+		chan->host = s3cdma;
+		chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		} else {
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		}
+		dev_dbg(dmadev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = s3c24xx_dma_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct s3c24xx_dma_chan *chan = NULL;
+	struct s3c24xx_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		tasklet_kill(&chan->vc.task);
+	}
+}
+
+/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */
+static struct soc_data soc_s3c2410 = {
+	.stride = 0x40,
+	.has_reqsel = false,
+	.has_clocks = false,
+};
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+	.stride = 0x40,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+	.stride = 0x100,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
+	{
+		.name		= "s3c2410-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
+	}, {
+		.name		= "s3c2412-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
+	}, {
+		.name		= "s3c2443-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
+	},
+	{ },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+	return (struct soc_data *)
+			 platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma;
+	struct soc_data *sdata;
+	struct resource *res;
+	int ret;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	/* Basic sanity check */
+	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+		dev_err(&pdev->dev, "to many dma channels %d, max %d\n",
+			pdata->num_phy_channels, MAX_DMA_CHANNELS);
+		return -EINVAL;
+	}
+
+	sdata = s3c24xx_dma_get_soc_data(pdev);
+	if (!sdata)
+		return -EINVAL;
+
+	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+	if (!s3cdma)
+		return -ENOMEM;
+
+	s3cdma->pdev = pdev;
+	s3cdma->pdata = pdata;
+	s3cdma->sdata = sdata;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(s3cdma->base))
+		return PTR_ERR(s3cdma->base);
+
+	s3cdma->phy_chans = devm_kcalloc(&pdev->dev,
+					      pdata->num_phy_channels,
+					      sizeof(struct s3c24xx_dma_phy),
+					      GFP_KERNEL);
+	if (!s3cdma->phy_chans)
+		return -ENOMEM;
+
+	/* acquire irqs and clocks for all physical channels */
+	for (i = 0; i < pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+		char clk_name[6];
+
+		phy->id = i;
+		phy->base = s3cdma->base + (i * sdata->stride);
+		phy->host = s3cdma;
+
+		phy->irq = platform_get_irq(pdev, i);
+		if (phy->irq < 0) {
+			dev_err(&pdev->dev, "failed to get irq %d, err %d\n",
+				i, phy->irq);
+			continue;
+		}
+
+		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+				       0, pdev->name, phy);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+				i, ret);
+			continue;
+		}
+
+		if (sdata->has_clocks) {
+			sprintf(clk_name, "dma.%d", i);
+			phy->clk = devm_clk_get(&pdev->dev, clk_name);
+			if (IS_ERR(phy->clk) && sdata->has_clocks) {
+				dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
+					i, PTR_ERR(phy->clk));
+				continue;
+			}
+
+			ret = clk_prepare(phy->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+					i, ret);
+				continue;
+			}
+		}
+
+		spin_lock_init(&phy->lock);
+		phy->valid = true;
+
+		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+	s3cdma->memcpy.dev = &pdev->dev;
+	s3cdma->memcpy.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config;
+	s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all;
+	s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize;
+
+	/* Initialize slave engine for SoC internal dedicated peripherals */
+	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+	s3cdma->slave.dev = &pdev->dev;
+	s3cdma->slave.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
+	s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config;
+	s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all;
+	s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize;
+	s3cdma->slave.filter.map = pdata->slave_map;
+	s3cdma->slave.filter.mapcnt = pdata->slavecnt;
+	s3cdma->slave.filter.fn = s3c24xx_dma_filter;
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+						pdata->num_phy_channels, false);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto err_memcpy;
+	}
+
+	/* Register slave channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+				pdata->num_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto err_slave;
+	}
+
+	ret = dma_async_device_register(&s3cdma->memcpy);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto err_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&s3cdma->slave);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, s3cdma);
+	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+		 pdata->num_phy_channels);
+
+	return 0;
+
+err_slave_reg:
+	dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return ret;
+}
+
+static void s3c24xx_dma_free_irq(struct platform_device *pdev,
+				struct s3c24xx_dma_engine *s3cdma)
+{
+	int i;
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+
+		devm_free_irq(&pdev->dev, phy->irq, phy);
+	}
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+	int i;
+
+	dma_async_device_unregister(&s3cdma->slave);
+	dma_async_device_unregister(&s3cdma->memcpy);
+
+	s3c24xx_dma_free_irq(pdev, s3cdma);
+
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+	.driver		= {
+		.name	= "s3c24xx-dma",
+	},
+	.id_table	= s3c24xx_dma_driver_ids,
+	.probe		= s3c24xx_dma_probe,
+	.remove		= s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct s3c24xx_dma_chan *s3cchan;
+
+	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+		return false;
+
+	s3cchan = to_s3c24xx_dma_chan(chan);
+
+	return s3cchan->id == (uintptr_t)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
new file mode 100644
index 0000000..b31d07c
--- /dev/null
+++ b/drivers/dma/sa11x0-dma.c
@@ -0,0 +1,1117 @@
+/*
+ * SA11x0 DMAengine support
+ *
+ * Copyright (C) 2012 Russell King
+ *   Derived in part from arch/arm/mach-sa1100/dma.c,
+ *   Copyright (C) 2000, 2001 by Nicolas Pitre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sa11x0-dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+#define NR_PHY_CHAN	6
+#define DMA_ALIGN	3
+#define DMA_MAX_SIZE	0x1fff
+#define DMA_CHUNK_SIZE	0x1000
+
+#define DMA_DDAR	0x00
+#define DMA_DCSR_S	0x04
+#define DMA_DCSR_C	0x08
+#define DMA_DCSR_R	0x0c
+#define DMA_DBSA	0x10
+#define DMA_DBTA	0x14
+#define DMA_DBSB	0x18
+#define DMA_DBTB	0x1c
+#define DMA_SIZE	0x20
+
+#define DCSR_RUN	(1 << 0)
+#define DCSR_IE		(1 << 1)
+#define DCSR_ERROR	(1 << 2)
+#define DCSR_DONEA	(1 << 3)
+#define DCSR_STRTA	(1 << 4)
+#define DCSR_DONEB	(1 << 5)
+#define DCSR_STRTB	(1 << 6)
+#define DCSR_BIU	(1 << 7)
+
+#define DDAR_RW		(1 << 0)	/* 0 = W, 1 = R */
+#define DDAR_E		(1 << 1)	/* 0 = LE, 1 = BE */
+#define DDAR_BS		(1 << 2)	/* 0 = BS4, 1 = BS8 */
+#define DDAR_DW		(1 << 3)	/* 0 = 8b, 1 = 16b */
+#define DDAR_Ser0UDCTr	(0x0 << 4)
+#define DDAR_Ser0UDCRc	(0x1 << 4)
+#define DDAR_Ser1SDLCTr	(0x2 << 4)
+#define DDAR_Ser1SDLCRc	(0x3 << 4)
+#define DDAR_Ser1UARTTr	(0x4 << 4)
+#define DDAR_Ser1UARTRc	(0x5 << 4)
+#define DDAR_Ser2ICPTr	(0x6 << 4)
+#define DDAR_Ser2ICPRc	(0x7 << 4)
+#define DDAR_Ser3UARTTr	(0x8 << 4)
+#define DDAR_Ser3UARTRc	(0x9 << 4)
+#define DDAR_Ser4MCP0Tr	(0xa << 4)
+#define DDAR_Ser4MCP0Rc	(0xb << 4)
+#define DDAR_Ser4MCP1Tr	(0xc << 4)
+#define DDAR_Ser4MCP1Rc	(0xd << 4)
+#define DDAR_Ser4SSPTr	(0xe << 4)
+#define DDAR_Ser4SSPRc	(0xf << 4)
+
+struct sa11x0_dma_sg {
+	u32			addr;
+	u32			len;
+};
+
+struct sa11x0_dma_desc {
+	struct virt_dma_desc	vd;
+
+	u32			ddar;
+	size_t			size;
+	unsigned		period;
+	bool			cyclic;
+
+	unsigned		sglen;
+	struct sa11x0_dma_sg	sg[0];
+};
+
+struct sa11x0_dma_phy;
+
+struct sa11x0_dma_chan {
+	struct virt_dma_chan	vc;
+
+	/* protected by c->vc.lock */
+	struct sa11x0_dma_phy	*phy;
+	enum dma_status		status;
+
+	/* protected by d->lock */
+	struct list_head	node;
+
+	u32			ddar;
+	const char		*name;
+};
+
+struct sa11x0_dma_phy {
+	void __iomem		*base;
+	struct sa11x0_dma_dev	*dev;
+	unsigned		num;
+
+	struct sa11x0_dma_chan	*vchan;
+
+	/* Protected by c->vc.lock */
+	unsigned		sg_load;
+	struct sa11x0_dma_desc	*txd_load;
+	unsigned		sg_done;
+	struct sa11x0_dma_desc	*txd_done;
+	u32			dbs[2];
+	u32			dbt[2];
+	u32			dcsr;
+};
+
+struct sa11x0_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	spinlock_t		lock;
+	struct tasklet_struct	task;
+	struct list_head	chan_pending;
+	struct sa11x0_dma_phy	phy[NR_PHY_CHAN];
+};
+
+static struct sa11x0_dma_chan *to_sa11x0_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sa11x0_dma_chan, vc.chan);
+}
+
+static struct sa11x0_dma_dev *to_sa11x0_dma(struct dma_device *dmadev)
+{
+	return container_of(dmadev, struct sa11x0_dma_dev, slave);
+}
+
+static struct sa11x0_dma_desc *sa11x0_dma_next_desc(struct sa11x0_dma_chan *c)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	return vd ? container_of(vd, struct sa11x0_dma_desc, vd) : NULL;
+}
+
+static void sa11x0_dma_free_desc(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct sa11x0_dma_desc, vd));
+}
+
+static void sa11x0_dma_start_desc(struct sa11x0_dma_phy *p, struct sa11x0_dma_desc *txd)
+{
+	list_del(&txd->vd.node);
+	p->txd_load = txd;
+	p->sg_load = 0;
+
+	dev_vdbg(p->dev->slave.dev, "pchan %u: txd %p[%x]: starting: DDAR:%x\n",
+		p->num, &txd->vd, txd->vd.tx.cookie, txd->ddar);
+}
+
+static void noinline sa11x0_dma_start_sg(struct sa11x0_dma_phy *p,
+	struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = p->txd_load;
+	struct sa11x0_dma_sg *sg;
+	void __iomem *base = p->base;
+	unsigned dbsx, dbtx;
+	u32 dcsr;
+
+	if (!txd)
+		return;
+
+	dcsr = readl_relaxed(base + DMA_DCSR_R);
+
+	/* Don't try to load the next transfer if both buffers are started */
+	if ((dcsr & (DCSR_STRTA | DCSR_STRTB)) == (DCSR_STRTA | DCSR_STRTB))
+		return;
+
+	if (p->sg_load == txd->sglen) {
+		if (!txd->cyclic) {
+			struct sa11x0_dma_desc *txn = sa11x0_dma_next_desc(c);
+
+			/*
+			 * We have reached the end of the current descriptor.
+			 * Peek at the next descriptor, and if compatible with
+			 * the current, start processing it.
+			 */
+			if (txn && txn->ddar == txd->ddar) {
+				txd = txn;
+				sa11x0_dma_start_desc(p, txn);
+			} else {
+				p->txd_load = NULL;
+				return;
+			}
+		} else {
+			/* Cyclic: reset back to beginning */
+			p->sg_load = 0;
+		}
+	}
+
+	sg = &txd->sg[p->sg_load++];
+
+	/* Select buffer to load according to channel status */
+	if (((dcsr & (DCSR_BIU | DCSR_STRTB)) == (DCSR_BIU | DCSR_STRTB)) ||
+	    ((dcsr & (DCSR_BIU | DCSR_STRTA)) == 0)) {
+		dbsx = DMA_DBSA;
+		dbtx = DMA_DBTA;
+		dcsr = DCSR_STRTA | DCSR_IE | DCSR_RUN;
+	} else {
+		dbsx = DMA_DBSB;
+		dbtx = DMA_DBTB;
+		dcsr = DCSR_STRTB | DCSR_IE | DCSR_RUN;
+	}
+
+	writel_relaxed(sg->addr, base + dbsx);
+	writel_relaxed(sg->len, base + dbtx);
+	writel(dcsr, base + DMA_DCSR_S);
+
+	dev_dbg(p->dev->slave.dev, "pchan %u: load: DCSR:%02x DBS%c:%08x DBT%c:%08x\n",
+		p->num, dcsr,
+		'A' + (dbsx == DMA_DBSB), sg->addr,
+		'A' + (dbtx == DMA_DBTB), sg->len);
+}
+
+static void noinline sa11x0_dma_complete(struct sa11x0_dma_phy *p,
+	struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = p->txd_done;
+
+	if (++p->sg_done == txd->sglen) {
+		if (!txd->cyclic) {
+			vchan_cookie_complete(&txd->vd);
+
+			p->sg_done = 0;
+			p->txd_done = p->txd_load;
+
+			if (!p->txd_done)
+				tasklet_schedule(&p->dev->task);
+		} else {
+			if ((p->sg_done % txd->period) == 0)
+				vchan_cyclic_callback(&txd->vd);
+
+			/* Cyclic: reset back to beginning */
+			p->sg_done = 0;
+		}
+	}
+
+	sa11x0_dma_start_sg(p, c);
+}
+
+static irqreturn_t sa11x0_dma_irq(int irq, void *dev_id)
+{
+	struct sa11x0_dma_phy *p = dev_id;
+	struct sa11x0_dma_dev *d = p->dev;
+	struct sa11x0_dma_chan *c;
+	u32 dcsr;
+
+	dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+	if (!(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB)))
+		return IRQ_NONE;
+
+	/* Clear reported status bits */
+	writel_relaxed(dcsr & (DCSR_ERROR | DCSR_DONEA | DCSR_DONEB),
+		p->base + DMA_DCSR_C);
+
+	dev_dbg(d->slave.dev, "pchan %u: irq: DCSR:%02x\n", p->num, dcsr);
+
+	if (dcsr & DCSR_ERROR) {
+		dev_err(d->slave.dev, "pchan %u: error. DCSR:%02x DDAR:%08x DBSA:%08x DBTA:%08x DBSB:%08x DBTB:%08x\n",
+			p->num, dcsr,
+			readl_relaxed(p->base + DMA_DDAR),
+			readl_relaxed(p->base + DMA_DBSA),
+			readl_relaxed(p->base + DMA_DBTA),
+			readl_relaxed(p->base + DMA_DBSB),
+			readl_relaxed(p->base + DMA_DBTB));
+	}
+
+	c = p->vchan;
+	if (c) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&c->vc.lock, flags);
+		/*
+		 * Now that we're holding the lock, check that the vchan
+		 * really is associated with this pchan before touching the
+		 * hardware.  This should always succeed, because we won't
+		 * change p->vchan or c->phy while the channel is actively
+		 * transferring.
+		 */
+		if (c->phy == p) {
+			if (dcsr & DCSR_DONEA)
+				sa11x0_dma_complete(p, c);
+			if (dcsr & DCSR_DONEB)
+				sa11x0_dma_complete(p, c);
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void sa11x0_dma_start_txd(struct sa11x0_dma_chan *c)
+{
+	struct sa11x0_dma_desc *txd = sa11x0_dma_next_desc(c);
+
+	/* If the issued list is empty, we have no further txds to process */
+	if (txd) {
+		struct sa11x0_dma_phy *p = c->phy;
+
+		sa11x0_dma_start_desc(p, txd);
+		p->txd_done = txd;
+		p->sg_done = 0;
+
+		/* The channel should not have any transfers started */
+		WARN_ON(readl_relaxed(p->base + DMA_DCSR_R) &
+				      (DCSR_STRTA | DCSR_STRTB));
+
+		/* Clear the run and start bits before changing DDAR */
+		writel_relaxed(DCSR_RUN | DCSR_STRTA | DCSR_STRTB,
+			       p->base + DMA_DCSR_C);
+		writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+		/* Try to start both buffers */
+		sa11x0_dma_start_sg(p, c);
+		sa11x0_dma_start_sg(p, c);
+	}
+}
+
+static void sa11x0_dma_tasklet(unsigned long arg)
+{
+	struct sa11x0_dma_dev *d = (struct sa11x0_dma_dev *)arg;
+	struct sa11x0_dma_phy *p;
+	struct sa11x0_dma_chan *c;
+	unsigned pch, pch_alloc = 0;
+
+	dev_dbg(d->slave.dev, "tasklet enter\n");
+
+	list_for_each_entry(c, &d->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&c->vc.lock);
+		p = c->phy;
+		if (p && !p->txd_done) {
+			sa11x0_dma_start_txd(c);
+			if (!p->txd_done) {
+				/* No current txd associated with this channel */
+				dev_dbg(d->slave.dev, "pchan %u: free\n", p->num);
+
+				/* Mark this channel free */
+				c->phy = NULL;
+				p->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&c->vc.lock);
+	}
+
+	spin_lock_irq(&d->lock);
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		p = &d->phy[pch];
+
+		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
+			c = list_first_entry(&d->chan_pending,
+				struct sa11x0_dma_chan, node);
+			list_del_init(&c->node);
+
+			pch_alloc |= 1 << pch;
+
+			/* Mark this channel allocated */
+			p->vchan = c;
+
+			dev_dbg(d->slave.dev, "pchan %u: alloc vchan %p\n", pch, &c->vc);
+		}
+	}
+	spin_unlock_irq(&d->lock);
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+
+			spin_lock_irq(&c->vc.lock);
+			c->phy = p;
+
+			sa11x0_dma_start_txd(c);
+			spin_unlock_irq(&c->vc.lock);
+		}
+	}
+
+	dev_dbg(d->slave.dev, "tasklet exit\n");
+}
+
+
+static void sa11x0_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+}
+
+static dma_addr_t sa11x0_dma_pos(struct sa11x0_dma_phy *p)
+{
+	unsigned reg;
+	u32 dcsr;
+
+	dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+	if ((dcsr & (DCSR_BIU | DCSR_STRTA)) == DCSR_STRTA ||
+	    (dcsr & (DCSR_BIU | DCSR_STRTB)) == DCSR_BIU)
+		reg = DMA_DBSA;
+	else
+		reg = DMA_DBSB;
+
+	return readl_relaxed(p->base + reg);
+}
+
+static enum dma_status sa11x0_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	if (!state)
+		return c->status;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		state->residue = container_of(vd, struct sa11x0_dma_desc, vd)->size;
+	} else if (!p) {
+		state->residue = 0;
+	} else {
+		struct sa11x0_dma_desc *txd;
+		size_t bytes = 0;
+
+		if (p->txd_done && p->txd_done->vd.tx.cookie == cookie)
+			txd = p->txd_done;
+		else if (p->txd_load && p->txd_load->vd.tx.cookie == cookie)
+			txd = p->txd_load;
+		else
+			txd = NULL;
+
+		ret = c->status;
+		if (txd) {
+			dma_addr_t addr = sa11x0_dma_pos(p);
+			unsigned i;
+
+			dev_vdbg(d->slave.dev, "tx_status: addr:%pad\n", &addr);
+
+			for (i = 0; i < txd->sglen; i++) {
+				dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x\n",
+					i, txd->sg[i].addr, txd->sg[i].len);
+				if (addr >= txd->sg[i].addr &&
+				    addr < txd->sg[i].addr + txd->sg[i].len) {
+					unsigned len;
+
+					len = txd->sg[i].len -
+						(addr - txd->sg[i].addr);
+					dev_vdbg(d->slave.dev, "tx_status: [%u] +%x\n",
+						i, len);
+					bytes += len;
+					i++;
+					break;
+				}
+			}
+			for (; i < txd->sglen; i++) {
+				dev_vdbg(d->slave.dev, "tx_status: [%u] %x+%x ++\n",
+					i, txd->sg[i].addr, txd->sg[i].len);
+				bytes += txd->sg[i].len;
+			}
+		}
+		state->residue = bytes;
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	dev_vdbg(d->slave.dev, "tx_status: bytes 0x%x\n", state->residue);
+
+	return ret;
+}
+
+/*
+ * Move pending txds to the issued list, and re-init pending list.
+ * If not already pending, add this channel to the list of pending
+ * channels and trigger the tasklet to run.
+ */
+static void sa11x0_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (vchan_issue_pending(&c->vc)) {
+		if (!c->phy) {
+			spin_lock(&d->lock);
+			if (list_empty(&c->node)) {
+				list_add_tail(&c->node, &d->chan_pending);
+				tasklet_schedule(&d->task);
+				dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+			}
+			spin_unlock(&d->lock);
+		}
+	} else
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *sa11x0_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sg, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_desc *txd;
+	struct scatterlist *sgent;
+	unsigned i, j = sglen;
+	size_t size = 0;
+
+	/* SA11x0 channels can only operate in their native direction */
+	if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) {
+		dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n",
+			&c->vc, c->ddar, dir);
+		return NULL;
+	}
+
+	/* Do not allow zero-sized txds */
+	if (sglen == 0)
+		return NULL;
+
+	for_each_sg(sg, sgent, sglen, i) {
+		dma_addr_t addr = sg_dma_address(sgent);
+		unsigned int len = sg_dma_len(sgent);
+
+		if (len > DMA_MAX_SIZE)
+			j += DIV_ROUND_UP(len, DMA_MAX_SIZE & ~DMA_ALIGN) - 1;
+		if (addr & DMA_ALIGN) {
+			dev_dbg(chan->device->dev, "vchan %p: bad buffer alignment: %pad\n",
+				&c->vc, &addr);
+			return NULL;
+		}
+	}
+
+	txd = kzalloc(struct_size(txd, sg, j), GFP_ATOMIC);
+	if (!txd) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
+		return NULL;
+	}
+
+	j = 0;
+	for_each_sg(sg, sgent, sglen, i) {
+		dma_addr_t addr = sg_dma_address(sgent);
+		unsigned len = sg_dma_len(sgent);
+
+		size += len;
+
+		do {
+			unsigned tlen = len;
+
+			/*
+			 * Check whether the transfer will fit.  If not, try
+			 * to split the transfer up such that we end up with
+			 * equal chunks - but make sure that we preserve the
+			 * alignment.  This avoids small segments.
+			 */
+			if (tlen > DMA_MAX_SIZE) {
+				unsigned mult = DIV_ROUND_UP(tlen,
+					DMA_MAX_SIZE & ~DMA_ALIGN);
+
+				tlen = (tlen / mult) & ~DMA_ALIGN;
+			}
+
+			txd->sg[j].addr = addr;
+			txd->sg[j].len = tlen;
+
+			addr += tlen;
+			len -= tlen;
+			j++;
+		} while (len);
+	}
+
+	txd->ddar = c->ddar;
+	txd->size = size;
+	txd->sglen = j;
+
+	dev_dbg(chan->device->dev, "vchan %p: txd %p: size %zu nr %u\n",
+		&c->vc, &txd->vd, txd->size, txd->sglen);
+
+	return vchan_tx_prep(&c->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *sa11x0_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+	enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_desc *txd;
+	unsigned i, j, k, sglen, sgperiod;
+
+	/* SA11x0 channels can only operate in their native direction */
+	if (dir != (c->ddar & DDAR_RW ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV)) {
+		dev_err(chan->device->dev, "vchan %p: bad DMA direction: DDAR:%08x dir:%u\n",
+			&c->vc, c->ddar, dir);
+		return NULL;
+	}
+
+	sgperiod = DIV_ROUND_UP(period, DMA_MAX_SIZE & ~DMA_ALIGN);
+	sglen = size * sgperiod / period;
+
+	/* Do not allow zero-sized txds */
+	if (sglen == 0)
+		return NULL;
+
+	txd = kzalloc(struct_size(txd, sg, sglen), GFP_ATOMIC);
+	if (!txd) {
+		dev_dbg(chan->device->dev, "vchan %p: kzalloc failed\n", &c->vc);
+		return NULL;
+	}
+
+	for (i = k = 0; i < size / period; i++) {
+		size_t tlen, len = period;
+
+		for (j = 0; j < sgperiod; j++, k++) {
+			tlen = len;
+
+			if (tlen > DMA_MAX_SIZE) {
+				unsigned mult = DIV_ROUND_UP(tlen, DMA_MAX_SIZE & ~DMA_ALIGN);
+				tlen = (tlen / mult) & ~DMA_ALIGN;
+			}
+
+			txd->sg[k].addr = addr;
+			txd->sg[k].len = tlen;
+			addr += tlen;
+			len -= tlen;
+		}
+
+		WARN_ON(len != 0);
+	}
+
+	WARN_ON(k != sglen);
+
+	txd->ddar = c->ddar;
+	txd->size = size;
+	txd->sglen = sglen;
+	txd->cyclic = 1;
+	txd->period = sgperiod;
+
+	return vchan_tx_prep(&c->vc, &txd->vd, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+}
+
+static int sa11x0_dma_device_config(struct dma_chan *chan,
+				    struct dma_slave_config *cfg)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	u32 ddar = c->ddar & ((0xf << 4) | DDAR_RW);
+	dma_addr_t addr;
+	enum dma_slave_buswidth width;
+	u32 maxburst;
+
+	if (ddar & DDAR_RW) {
+		addr = cfg->src_addr;
+		width = cfg->src_addr_width;
+		maxburst = cfg->src_maxburst;
+	} else {
+		addr = cfg->dst_addr;
+		width = cfg->dst_addr_width;
+		maxburst = cfg->dst_maxburst;
+	}
+
+	if ((width != DMA_SLAVE_BUSWIDTH_1_BYTE &&
+	     width != DMA_SLAVE_BUSWIDTH_2_BYTES) ||
+	    (maxburst != 4 && maxburst != 8))
+		return -EINVAL;
+
+	if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		ddar |= DDAR_DW;
+	if (maxburst == 8)
+		ddar |= DDAR_BS;
+
+	dev_dbg(c->vc.chan.device->dev, "vchan %p: dma_slave_config addr %pad width %u burst %u\n",
+		&c->vc, &addr, width, maxburst);
+
+	c->ddar = ddar | (addr & 0xf0000000) | (addr & 0x003ffffc) << 6;
+
+	return 0;
+}
+
+static int sa11x0_dma_device_pause(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	LIST_HEAD(head);
+	unsigned long flags;
+
+	dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (c->status == DMA_IN_PROGRESS) {
+		c->status = DMA_PAUSED;
+
+		p = c->phy;
+		if (p) {
+			writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+		} else {
+			spin_lock(&d->lock);
+			list_del_init(&c->node);
+			spin_unlock(&d->lock);
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return 0;
+}
+
+static int sa11x0_dma_device_resume(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	LIST_HEAD(head);
+	unsigned long flags;
+
+	dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (c->status == DMA_PAUSED) {
+		c->status = DMA_IN_PROGRESS;
+
+		p = c->phy;
+		if (p) {
+			writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_S);
+		} else if (!list_empty(&c->vc.desc_issued)) {
+			spin_lock(&d->lock);
+			list_add_tail(&c->node, &d->chan_pending);
+			spin_unlock(&d->lock);
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return 0;
+}
+
+static int sa11x0_dma_device_terminate_all(struct dma_chan *chan)
+{
+	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
+	struct sa11x0_dma_phy *p;
+	LIST_HEAD(head);
+	unsigned long flags;
+
+	dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+	/* Clear the tx descriptor lists */
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vchan_get_all_descriptors(&c->vc, &head);
+
+	p = c->phy;
+	if (p) {
+		dev_dbg(d->slave.dev, "pchan %u: terminating\n", p->num);
+		/* vchan is assigned to a pchan - stop the channel */
+		writel(DCSR_RUN | DCSR_IE |
+		       DCSR_STRTA | DCSR_DONEA |
+		       DCSR_STRTB | DCSR_DONEB,
+		       p->base + DMA_DCSR_C);
+
+		if (p->txd_load) {
+			if (p->txd_load != p->txd_done)
+				list_add_tail(&p->txd_load->vd.node, &head);
+			p->txd_load = NULL;
+		}
+		if (p->txd_done) {
+			list_add_tail(&p->txd_done->vd.node, &head);
+			p->txd_done = NULL;
+		}
+		c->phy = NULL;
+		spin_lock(&d->lock);
+		p->vchan = NULL;
+		spin_unlock(&d->lock);
+		tasklet_schedule(&d->task);
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+struct sa11x0_dma_channel_desc {
+	u32 ddar;
+	const char *name;
+};
+
+#define CD(d1, d2) { .ddar = DDAR_##d1 | d2, .name = #d1 }
+static const struct sa11x0_dma_channel_desc chan_desc[] = {
+	CD(Ser0UDCTr, 0),
+	CD(Ser0UDCRc, DDAR_RW),
+	CD(Ser1SDLCTr, 0),
+	CD(Ser1SDLCRc, DDAR_RW),
+	CD(Ser1UARTTr, 0),
+	CD(Ser1UARTRc, DDAR_RW),
+	CD(Ser2ICPTr, 0),
+	CD(Ser2ICPRc, DDAR_RW),
+	CD(Ser3UARTTr, 0),
+	CD(Ser3UARTRc, DDAR_RW),
+	CD(Ser4MCP0Tr, 0),
+	CD(Ser4MCP0Rc, DDAR_RW),
+	CD(Ser4MCP1Tr, 0),
+	CD(Ser4MCP1Rc, DDAR_RW),
+	CD(Ser4SSPTr, 0),
+	CD(Ser4SSPRc, DDAR_RW),
+};
+
+static const struct dma_slave_map sa11x0_dma_map[] = {
+	{ "sa11x0-ir", "tx", "Ser2ICPTr" },
+	{ "sa11x0-ir", "rx", "Ser2ICPRc" },
+	{ "sa11x0-ssp", "tx", "Ser4SSPTr" },
+	{ "sa11x0-ssp", "rx", "Ser4SSPRc" },
+};
+
+static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
+	struct device *dev)
+{
+	unsigned i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+	dmadev->dev = dev;
+	dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources;
+	dmadev->device_config = sa11x0_dma_device_config;
+	dmadev->device_pause = sa11x0_dma_device_pause;
+	dmadev->device_resume = sa11x0_dma_device_resume;
+	dmadev->device_terminate_all = sa11x0_dma_device_terminate_all;
+	dmadev->device_tx_status = sa11x0_dma_tx_status;
+	dmadev->device_issue_pending = sa11x0_dma_issue_pending;
+
+	for (i = 0; i < ARRAY_SIZE(chan_desc); i++) {
+		struct sa11x0_dma_chan *c;
+
+		c = kzalloc(sizeof(*c), GFP_KERNEL);
+		if (!c) {
+			dev_err(dev, "no memory for channel %u\n", i);
+			return -ENOMEM;
+		}
+
+		c->status = DMA_IN_PROGRESS;
+		c->ddar = chan_desc[i].ddar;
+		c->name = chan_desc[i].name;
+		INIT_LIST_HEAD(&c->node);
+
+		c->vc.desc_free = sa11x0_dma_free_desc;
+		vchan_init(&c->vc, dmadev);
+	}
+
+	return dma_async_device_register(dmadev);
+}
+
+static int sa11x0_dma_request_irq(struct platform_device *pdev, int nr,
+	void *data)
+{
+	int irq = platform_get_irq(pdev, nr);
+
+	if (irq <= 0)
+		return -ENXIO;
+
+	return request_irq(irq, sa11x0_dma_irq, 0, dev_name(&pdev->dev), data);
+}
+
+static void sa11x0_dma_free_irq(struct platform_device *pdev, int nr,
+	void *data)
+{
+	int irq = platform_get_irq(pdev, nr);
+	if (irq > 0)
+		free_irq(irq, data);
+}
+
+static void sa11x0_dma_free_channels(struct dma_device *dmadev)
+{
+	struct sa11x0_dma_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels, vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+		kfree(c);
+	}
+}
+
+static int sa11x0_dma_probe(struct platform_device *pdev)
+{
+	struct sa11x0_dma_dev *d;
+	struct resource *res;
+	unsigned i;
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENXIO;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d) {
+		ret = -ENOMEM;
+		goto err_alloc;
+	}
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+
+	d->slave.filter.fn = sa11x0_dma_filter_fn;
+	d->slave.filter.mapcnt = ARRAY_SIZE(sa11x0_dma_map);
+	d->slave.filter.map = sa11x0_dma_map;
+
+	d->base = ioremap(res->start, resource_size(res));
+	if (!d->base) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	tasklet_init(&d->task, sa11x0_dma_tasklet, (unsigned long)d);
+
+	for (i = 0; i < NR_PHY_CHAN; i++) {
+		struct sa11x0_dma_phy *p = &d->phy[i];
+
+		p->dev = d;
+		p->num = i;
+		p->base = d->base + i * DMA_SIZE;
+		writel_relaxed(DCSR_RUN | DCSR_IE | DCSR_ERROR |
+			DCSR_DONEA | DCSR_STRTA | DCSR_DONEB | DCSR_STRTB,
+			p->base + DMA_DCSR_C);
+		writel_relaxed(0, p->base + DMA_DDAR);
+
+		ret = sa11x0_dma_request_irq(pdev, i, p);
+		if (ret) {
+			while (i) {
+				i--;
+				sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+			}
+			goto err_irq;
+		}
+	}
+
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
+	d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg;
+	d->slave.device_prep_dma_cyclic = sa11x0_dma_prep_dma_cyclic;
+	d->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	d->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+				   BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+	d->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+				   BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+	ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev);
+	if (ret) {
+		dev_warn(d->slave.dev, "failed to register slave async device: %d\n",
+			ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, d);
+	return 0;
+
+ err_slave_reg:
+	sa11x0_dma_free_channels(&d->slave);
+	for (i = 0; i < NR_PHY_CHAN; i++)
+		sa11x0_dma_free_irq(pdev, i, &d->phy[i]);
+ err_irq:
+	tasklet_kill(&d->task);
+	iounmap(d->base);
+ err_ioremap:
+	kfree(d);
+ err_alloc:
+	return ret;
+}
+
+static int sa11x0_dma_remove(struct platform_device *pdev)
+{
+	struct sa11x0_dma_dev *d = platform_get_drvdata(pdev);
+	unsigned pch;
+
+	dma_async_device_unregister(&d->slave);
+
+	sa11x0_dma_free_channels(&d->slave);
+	for (pch = 0; pch < NR_PHY_CHAN; pch++)
+		sa11x0_dma_free_irq(pdev, pch, &d->phy[pch]);
+	tasklet_kill(&d->task);
+	iounmap(d->base);
+	kfree(d);
+
+	return 0;
+}
+
+static int sa11x0_dma_suspend(struct device *dev)
+{
+	struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+	unsigned pch;
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		struct sa11x0_dma_phy *p = &d->phy[pch];
+		u32 dcsr, saved_dcsr;
+
+		dcsr = saved_dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+		if (dcsr & DCSR_RUN) {
+			writel(DCSR_RUN | DCSR_IE, p->base + DMA_DCSR_C);
+			dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+		}
+
+		saved_dcsr &= DCSR_RUN | DCSR_IE;
+		if (dcsr & DCSR_BIU) {
+			p->dbs[0] = readl_relaxed(p->base + DMA_DBSB);
+			p->dbt[0] = readl_relaxed(p->base + DMA_DBTB);
+			p->dbs[1] = readl_relaxed(p->base + DMA_DBSA);
+			p->dbt[1] = readl_relaxed(p->base + DMA_DBTA);
+			saved_dcsr |= (dcsr & DCSR_STRTA ? DCSR_STRTB : 0) |
+				      (dcsr & DCSR_STRTB ? DCSR_STRTA : 0);
+		} else {
+			p->dbs[0] = readl_relaxed(p->base + DMA_DBSA);
+			p->dbt[0] = readl_relaxed(p->base + DMA_DBTA);
+			p->dbs[1] = readl_relaxed(p->base + DMA_DBSB);
+			p->dbt[1] = readl_relaxed(p->base + DMA_DBTB);
+			saved_dcsr |= dcsr & (DCSR_STRTA | DCSR_STRTB);
+		}
+		p->dcsr = saved_dcsr;
+
+		writel(DCSR_STRTA | DCSR_STRTB, p->base + DMA_DCSR_C);
+	}
+
+	return 0;
+}
+
+static int sa11x0_dma_resume(struct device *dev)
+{
+	struct sa11x0_dma_dev *d = dev_get_drvdata(dev);
+	unsigned pch;
+
+	for (pch = 0; pch < NR_PHY_CHAN; pch++) {
+		struct sa11x0_dma_phy *p = &d->phy[pch];
+		struct sa11x0_dma_desc *txd = NULL;
+		u32 dcsr = readl_relaxed(p->base + DMA_DCSR_R);
+
+		WARN_ON(dcsr & (DCSR_BIU | DCSR_STRTA | DCSR_STRTB | DCSR_RUN));
+
+		if (p->txd_done)
+			txd = p->txd_done;
+		else if (p->txd_load)
+			txd = p->txd_load;
+
+		if (!txd)
+			continue;
+
+		writel_relaxed(txd->ddar, p->base + DMA_DDAR);
+
+		writel_relaxed(p->dbs[0], p->base + DMA_DBSA);
+		writel_relaxed(p->dbt[0], p->base + DMA_DBTA);
+		writel_relaxed(p->dbs[1], p->base + DMA_DBSB);
+		writel_relaxed(p->dbt[1], p->base + DMA_DBTB);
+		writel_relaxed(p->dcsr, p->base + DMA_DCSR_S);
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops sa11x0_dma_pm_ops = {
+	.suspend_noirq = sa11x0_dma_suspend,
+	.resume_noirq = sa11x0_dma_resume,
+	.freeze_noirq = sa11x0_dma_suspend,
+	.thaw_noirq = sa11x0_dma_resume,
+	.poweroff_noirq = sa11x0_dma_suspend,
+	.restore_noirq = sa11x0_dma_resume,
+};
+
+static struct platform_driver sa11x0_dma_driver = {
+	.driver = {
+		.name	= "sa11x0-dma",
+		.pm	= &sa11x0_dma_pm_ops,
+	},
+	.probe		= sa11x0_dma_probe,
+	.remove		= sa11x0_dma_remove,
+};
+
+bool sa11x0_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &sa11x0_dma_driver.driver) {
+		struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
+		const char *p = param;
+
+		return !strcmp(c->name, p);
+	}
+	return false;
+}
+EXPORT_SYMBOL(sa11x0_dma_filter_fn);
+
+static int __init sa11x0_dma_init(void)
+{
+	return platform_driver_register(&sa11x0_dma_driver);
+}
+subsys_initcall(sa11x0_dma_init);
+
+static void __exit sa11x0_dma_exit(void)
+{
+	platform_driver_unregister(&sa11x0_dma_driver);
+}
+module_exit(sa11x0_dma_exit);
+
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("SA-11x0 DMA driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:sa11x0-dma");
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig
new file mode 100644
index 0000000..6e0685f
--- /dev/null
+++ b/drivers/dma/sh/Kconfig
@@ -0,0 +1,63 @@
+#
+# DMA engine configuration for sh
+#
+
+config RENESAS_DMA
+	bool
+	select DMA_ENGINE
+
+#
+# DMA Engine Helpers
+#
+
+config SH_DMAE_BASE
+	bool "Renesas SuperH DMA Engine support"
+	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
+	depends on !SUPERH || SH_DMA
+	depends on !SH_DMA_API
+	default y
+	select RENESAS_DMA
+	help
+	  Enable support for the Renesas SuperH DMA controllers.
+
+#
+# DMA Controllers
+#
+
+config SH_DMAE
+	tristate "Renesas SuperH DMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas SuperH DMA controllers.
+
+if SH_DMAE
+
+config SH_DMAE_R8A73A4
+	def_bool y
+	depends on ARCH_R8A73A4
+	depends on OF
+
+endif
+
+config RCAR_DMAC
+	tristate "Renesas R-Car Gen2 DMA Controller"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select RENESAS_DMA
+	help
+	  This driver supports the general purpose DMA controller found in the
+	  Renesas R-Car second generation SoCs.
+
+config RENESAS_USB_DMAC
+	tristate "Renesas USB-DMA Controller"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select RENESAS_DMA
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  This driver supports the USB-DMA controller found in the Renesas
+	  SoCs.
+
+config SUDMAC
+	tristate "Renesas SUDMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas SUDMAC controllers.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile
new file mode 100644
index 0000000..7d7c949
--- /dev/null
+++ b/drivers/dma/sh/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# DMA Engine Helpers
+#
+
+obj-$(CONFIG_SH_DMAE_BASE) += shdma-base.o shdma-of.o
+
+#
+# DMA Controllers
+#
+
+shdma-y := shdmac.o
+shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o
+shdma-objs := $(shdma-y)
+obj-$(CONFIG_SH_DMAE) += shdma.o
+
+obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
+obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
+obj-$(CONFIG_SUDMAC) += sudmac.o
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
new file mode 100644
index 0000000..48ee35e
--- /dev/null
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -0,0 +1,1939 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas R-Car Gen2 DMA Controller Driver
+ *
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ *
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/*
+ * struct rcar_dmac_xfer_chunk - Descriptor for a hardware transfer
+ * @node: entry in the parent's chunks list
+ * @src_addr: device source address
+ * @dst_addr: device destination address
+ * @size: transfer size in bytes
+ */
+struct rcar_dmac_xfer_chunk {
+	struct list_head node;
+
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	u32 size;
+};
+
+/*
+ * struct rcar_dmac_hw_desc - Hardware descriptor for a transfer chunk
+ * @sar: value of the SAR register (source address)
+ * @dar: value of the DAR register (destination address)
+ * @tcr: value of the TCR register (transfer count)
+ */
+struct rcar_dmac_hw_desc {
+	u32 sar;
+	u32 dar;
+	u32 tcr;
+	u32 reserved;
+} __attribute__((__packed__));
+
+/*
+ * struct rcar_dmac_desc - R-Car Gen2 DMA Transfer Descriptor
+ * @async_tx: base DMA asynchronous transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @xfer_shift: log2 of the transfer size
+ * @chcr: value of the channel configuration register for this transfer
+ * @node: entry in the channel's descriptors lists
+ * @chunks: list of transfer chunks for this transfer
+ * @running: the transfer chunk being currently processed
+ * @nchunks: number of transfer chunks for this transfer
+ * @hwdescs.use: whether the transfer descriptor uses hardware descriptors
+ * @hwdescs.mem: hardware descriptors memory for the transfer
+ * @hwdescs.dma: device address of the hardware descriptors memory
+ * @hwdescs.size: size of the hardware descriptors in bytes
+ * @size: transfer size in bytes
+ * @cyclic: when set indicates that the DMA transfer is cyclic
+ */
+struct rcar_dmac_desc {
+	struct dma_async_tx_descriptor async_tx;
+	enum dma_transfer_direction direction;
+	unsigned int xfer_shift;
+	u32 chcr;
+
+	struct list_head node;
+	struct list_head chunks;
+	struct rcar_dmac_xfer_chunk *running;
+	unsigned int nchunks;
+
+	struct {
+		bool use;
+		struct rcar_dmac_hw_desc *mem;
+		dma_addr_t dma;
+		size_t size;
+	} hwdescs;
+
+	unsigned int size;
+	bool cyclic;
+};
+
+#define to_rcar_dmac_desc(d)	container_of(d, struct rcar_dmac_desc, async_tx)
+
+/*
+ * struct rcar_dmac_desc_page - One page worth of descriptors
+ * @node: entry in the channel's pages list
+ * @descs: array of DMA descriptors
+ * @chunks: array of transfer chunk descriptors
+ */
+struct rcar_dmac_desc_page {
+	struct list_head node;
+
+	union {
+		struct rcar_dmac_desc descs[0];
+		struct rcar_dmac_xfer_chunk chunks[0];
+	};
+};
+
+#define RCAR_DMAC_DESCS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, descs)) /	\
+	sizeof(struct rcar_dmac_desc))
+#define RCAR_DMAC_XFER_CHUNKS_PER_PAGE					\
+	((PAGE_SIZE - offsetof(struct rcar_dmac_desc_page, chunks)) /	\
+	sizeof(struct rcar_dmac_xfer_chunk))
+
+/*
+ * struct rcar_dmac_chan_slave - Slave configuration
+ * @slave_addr: slave memory address
+ * @xfer_size: size (in bytes) of hardware transfers
+ */
+struct rcar_dmac_chan_slave {
+	phys_addr_t slave_addr;
+	unsigned int xfer_size;
+};
+
+/*
+ * struct rcar_dmac_chan_map - Map of slave device phys to dma address
+ * @addr: slave dma address
+ * @dir: direction of mapping
+ * @slave: slave configuration that is mapped
+ */
+struct rcar_dmac_chan_map {
+	dma_addr_t addr;
+	enum dma_data_direction dir;
+	struct rcar_dmac_chan_slave slave;
+};
+
+/*
+ * struct rcar_dmac_chan - R-Car Gen2 DMA Controller Channel
+ * @chan: base DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: channel IRQ
+ * @src: slave memory address and size on the source side
+ * @dst: slave memory address and size on the destination side
+ * @mid_rid: hardware MID/RID for the DMA client using this channel
+ * @lock: protects the channel CHCR register and the desc members
+ * @desc.free: list of free descriptors
+ * @desc.pending: list of pending descriptors (submitted with tx_submit)
+ * @desc.active: list of active descriptors (activated with issue_pending)
+ * @desc.done: list of completed descriptors
+ * @desc.wait: list of descriptors waiting for an ack
+ * @desc.running: the descriptor being processed (a member of the active list)
+ * @desc.chunks_free: list of free transfer chunk descriptors
+ * @desc.pages: list of pages used by allocated descriptors
+ */
+struct rcar_dmac_chan {
+	struct dma_chan chan;
+	void __iomem *iomem;
+	unsigned int index;
+	int irq;
+
+	struct rcar_dmac_chan_slave src;
+	struct rcar_dmac_chan_slave dst;
+	struct rcar_dmac_chan_map map;
+	int mid_rid;
+
+	spinlock_t lock;
+
+	struct {
+		struct list_head free;
+		struct list_head pending;
+		struct list_head active;
+		struct list_head done;
+		struct list_head wait;
+		struct rcar_dmac_desc *running;
+
+		struct list_head chunks_free;
+
+		struct list_head pages;
+	} desc;
+};
+
+#define to_rcar_dmac_chan(c)	container_of(c, struct rcar_dmac_chan, chan)
+
+/*
+ * struct rcar_dmac - R-Car Gen2 DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ * @modules: bitmask of client modules in use
+ */
+struct rcar_dmac {
+	struct dma_device engine;
+	struct device *dev;
+	void __iomem *iomem;
+
+	unsigned int n_channels;
+	struct rcar_dmac_chan *channels;
+
+	DECLARE_BITMAP(modules, 256);
+};
+
+#define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define RCAR_DMAC_CHAN_OFFSET(i)	(0x8000 + 0x80 * (i))
+
+#define RCAR_DMAISTA			0x0020
+#define RCAR_DMASEC			0x0030
+#define RCAR_DMAOR			0x0060
+#define RCAR_DMAOR_PRI_FIXED		(0 << 8)
+#define RCAR_DMAOR_PRI_ROUND_ROBIN	(3 << 8)
+#define RCAR_DMAOR_AE			(1 << 2)
+#define RCAR_DMAOR_DME			(1 << 0)
+#define RCAR_DMACHCLR			0x0080
+#define RCAR_DMADPSEC			0x00a0
+
+#define RCAR_DMASAR			0x0000
+#define RCAR_DMADAR			0x0004
+#define RCAR_DMATCR			0x0008
+#define RCAR_DMATCR_MASK		0x00ffffff
+#define RCAR_DMATSR			0x0028
+#define RCAR_DMACHCR			0x000c
+#define RCAR_DMACHCR_CAE		(1 << 31)
+#define RCAR_DMACHCR_CAIE		(1 << 30)
+#define RCAR_DMACHCR_DPM_DISABLED	(0 << 28)
+#define RCAR_DMACHCR_DPM_ENABLED	(1 << 28)
+#define RCAR_DMACHCR_DPM_REPEAT		(2 << 28)
+#define RCAR_DMACHCR_DPM_INFINITE	(3 << 28)
+#define RCAR_DMACHCR_RPT_SAR		(1 << 27)
+#define RCAR_DMACHCR_RPT_DAR		(1 << 26)
+#define RCAR_DMACHCR_RPT_TCR		(1 << 25)
+#define RCAR_DMACHCR_DPB		(1 << 22)
+#define RCAR_DMACHCR_DSE		(1 << 19)
+#define RCAR_DMACHCR_DSIE		(1 << 18)
+#define RCAR_DMACHCR_TS_1B		((0 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_2B		((0 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_4B		((0 << 20) | (2 << 3))
+#define RCAR_DMACHCR_TS_16B		((0 << 20) | (3 << 3))
+#define RCAR_DMACHCR_TS_32B		((1 << 20) | (0 << 3))
+#define RCAR_DMACHCR_TS_64B		((1 << 20) | (1 << 3))
+#define RCAR_DMACHCR_TS_8B		((1 << 20) | (3 << 3))
+#define RCAR_DMACHCR_DM_FIXED		(0 << 14)
+#define RCAR_DMACHCR_DM_INC		(1 << 14)
+#define RCAR_DMACHCR_DM_DEC		(2 << 14)
+#define RCAR_DMACHCR_SM_FIXED		(0 << 12)
+#define RCAR_DMACHCR_SM_INC		(1 << 12)
+#define RCAR_DMACHCR_SM_DEC		(2 << 12)
+#define RCAR_DMACHCR_RS_AUTO		(4 << 8)
+#define RCAR_DMACHCR_RS_DMARS		(8 << 8)
+#define RCAR_DMACHCR_IE			(1 << 2)
+#define RCAR_DMACHCR_TE			(1 << 1)
+#define RCAR_DMACHCR_DE			(1 << 0)
+#define RCAR_DMATCRB			0x0018
+#define RCAR_DMATSRB			0x0038
+#define RCAR_DMACHCRB			0x001c
+#define RCAR_DMACHCRB_DCNT(n)		((n) << 24)
+#define RCAR_DMACHCRB_DPTR_MASK		(0xff << 16)
+#define RCAR_DMACHCRB_DPTR_SHIFT	16
+#define RCAR_DMACHCRB_DRST		(1 << 15)
+#define RCAR_DMACHCRB_DTS		(1 << 8)
+#define RCAR_DMACHCRB_SLM_NORMAL	(0 << 4)
+#define RCAR_DMACHCRB_SLM_CLK(n)	((8 | (n)) << 4)
+#define RCAR_DMACHCRB_PRI(n)		((n) << 0)
+#define RCAR_DMARS			0x0040
+#define RCAR_DMABUFCR			0x0048
+#define RCAR_DMABUFCR_MBU(n)		((n) << 16)
+#define RCAR_DMABUFCR_ULB(n)		((n) << 0)
+#define RCAR_DMADPBASE			0x0050
+#define RCAR_DMADPBASE_MASK		0xfffffff0
+#define RCAR_DMADPBASE_SEL		(1 << 0)
+#define RCAR_DMADPCR			0x0054
+#define RCAR_DMADPCR_DIPT(n)		((n) << 24)
+#define RCAR_DMAFIXSAR			0x0010
+#define RCAR_DMAFIXDAR			0x0014
+#define RCAR_DMAFIXDPBASE		0x0060
+
+/* Hardcode the MEMCPY transfer size to 4 bytes. */
+#define RCAR_DMAC_MEMCPY_XFER_SIZE	4
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMAOR)
+		writew(data, dmac->iomem + reg);
+	else
+		writel(data, dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg)
+{
+	if (reg == RCAR_DMAOR)
+		return readw(dmac->iomem + reg);
+	else
+		return readl(dmac->iomem + reg);
+}
+
+static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg)
+{
+	if (reg == RCAR_DMARS)
+		return readw(chan->iomem + reg);
+	else
+		return readl(chan->iomem + reg);
+}
+
+static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data)
+{
+	if (reg == RCAR_DMARS)
+		writew(data, chan->iomem + reg);
+	else
+		writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool rcar_dmac_chan_is_busy(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	return !!(chcr & (RCAR_DMACHCR_DE | RCAR_DMACHCR_TE));
+}
+
+static void rcar_dmac_chan_start_xfer(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	u32 chcr = desc->chcr;
+
+	WARN_ON_ONCE(rcar_dmac_chan_is_busy(chan));
+
+	if (chan->mid_rid >= 0)
+		rcar_dmac_chan_write(chan, RCAR_DMARS, chan->mid_rid);
+
+	if (desc->hwdescs.use) {
+		struct rcar_dmac_xfer_chunk *chunk =
+			list_first_entry(&desc->chunks,
+					 struct rcar_dmac_xfer_chunk, node);
+
+		dev_dbg(chan->chan.device->dev,
+			"chan%u: queue desc %p: %u@%pad\n",
+			chan->index, desc, desc->nchunks, &desc->hwdescs.dma);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+				     chunk->src_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+				     chunk->dst_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDPBASE,
+				     desc->hwdescs.dma >> 32);
+#endif
+		rcar_dmac_chan_write(chan, RCAR_DMADPBASE,
+				     (desc->hwdescs.dma & 0xfffffff0) |
+				     RCAR_DMADPBASE_SEL);
+		rcar_dmac_chan_write(chan, RCAR_DMACHCRB,
+				     RCAR_DMACHCRB_DCNT(desc->nchunks - 1) |
+				     RCAR_DMACHCRB_DRST);
+
+		/*
+		 * Errata: When descriptor memory is accessed through an IOMMU
+		 * the DMADAR register isn't initialized automatically from the
+		 * first descriptor at beginning of transfer by the DMAC like it
+		 * should. Initialize it manually with the destination address
+		 * of the first chunk.
+		 */
+		rcar_dmac_chan_write(chan, RCAR_DMADAR,
+				     chunk->dst_addr & 0xffffffff);
+
+		/*
+		 * Program the descriptor stage interrupt to occur after the end
+		 * of the first stage.
+		 */
+		rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(1));
+
+		chcr |= RCAR_DMACHCR_RPT_SAR | RCAR_DMACHCR_RPT_DAR
+		     |  RCAR_DMACHCR_RPT_TCR | RCAR_DMACHCR_DPB;
+
+		/*
+		 * If the descriptor isn't cyclic enable normal descriptor mode
+		 * and the transfer completion interrupt.
+		 */
+		if (!desc->cyclic)
+			chcr |= RCAR_DMACHCR_DPM_ENABLED | RCAR_DMACHCR_IE;
+		/*
+		 * If the descriptor is cyclic and has a callback enable the
+		 * descriptor stage interrupt in infinite repeat mode.
+		 */
+		else if (desc->async_tx.callback)
+			chcr |= RCAR_DMACHCR_DPM_INFINITE | RCAR_DMACHCR_DSIE;
+		/*
+		 * Otherwise just select infinite repeat mode without any
+		 * interrupt.
+		 */
+		else
+			chcr |= RCAR_DMACHCR_DPM_INFINITE;
+	} else {
+		struct rcar_dmac_xfer_chunk *chunk = desc->running;
+
+		dev_dbg(chan->chan.device->dev,
+			"chan%u: queue chunk %p: %u@%pad -> %pad\n",
+			chan->index, chunk, chunk->size, &chunk->src_addr,
+			&chunk->dst_addr);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXSAR,
+				     chunk->src_addr >> 32);
+		rcar_dmac_chan_write(chan, RCAR_DMAFIXDAR,
+				     chunk->dst_addr >> 32);
+#endif
+		rcar_dmac_chan_write(chan, RCAR_DMASAR,
+				     chunk->src_addr & 0xffffffff);
+		rcar_dmac_chan_write(chan, RCAR_DMADAR,
+				     chunk->dst_addr & 0xffffffff);
+		rcar_dmac_chan_write(chan, RCAR_DMATCR,
+				     chunk->size >> desc->xfer_shift);
+
+		chcr |= RCAR_DMACHCR_DPM_DISABLED | RCAR_DMACHCR_IE;
+	}
+
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR,
+			     chcr | RCAR_DMACHCR_DE | RCAR_DMACHCR_CAIE);
+}
+
+static int rcar_dmac_init(struct rcar_dmac *dmac)
+{
+	u16 dmaor;
+
+	/* Clear all channels and enable the DMAC globally. */
+	rcar_dmac_write(dmac, RCAR_DMACHCLR, GENMASK(dmac->n_channels - 1, 0));
+	rcar_dmac_write(dmac, RCAR_DMAOR,
+			RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME);
+
+	dmaor = rcar_dmac_read(dmac, RCAR_DMAOR);
+	if ((dmaor & (RCAR_DMAOR_AE | RCAR_DMAOR_DME)) != RCAR_DMAOR_DME) {
+		dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors submission
+ */
+
+static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct rcar_dmac_chan *chan = to_rcar_dmac_chan(tx->chan);
+	struct rcar_dmac_desc *desc = to_rcar_dmac_desc(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	dev_dbg(chan->chan.device->dev, "chan%u: submit #%d@%p\n",
+		chan->index, tx->cookie, desc);
+
+	list_add_tail(&desc->node, &chan->desc.pending);
+	desc->running = list_first_entry(&desc->chunks,
+					 struct rcar_dmac_xfer_chunk, node);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return cookie;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+
+/*
+ * rcar_dmac_desc_alloc - Allocate a page worth of DMA descriptors
+ * @chan: the DMA channel
+ * @gfp: allocation flags
+ */
+static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
+{
+	struct rcar_dmac_desc_page *page;
+	unsigned long flags;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(gfp);
+	if (!page)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_DESCS_PER_PAGE; ++i) {
+		struct rcar_dmac_desc *desc = &page->descs[i];
+
+		dma_async_tx_descriptor_init(&desc->async_tx, &chan->chan);
+		desc->async_tx.tx_submit = rcar_dmac_tx_submit;
+		INIT_LIST_HEAD(&desc->chunks);
+
+		list_add_tail(&desc->node, &list);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_splice_tail(&list, &chan->desc.free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_desc_put - Release a DMA transfer descriptor
+ * @chan: the DMA channel
+ * @desc: the descriptor
+ *
+ * Put the descriptor and its transfer chunk descriptors back in the channel's
+ * free descriptors lists. The descriptor's chunks list will be reinitialized to
+ * an empty list as a result.
+ *
+ * The descriptor must have been removed from the channel's lists before calling
+ * this function.
+ */
+static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
+			       struct rcar_dmac_desc *desc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_splice_tail_init(&desc->chunks, &chan->desc.chunks_free);
+	list_add(&desc->node, &chan->desc.free);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	LIST_HEAD(list);
+
+	/*
+	 * We have to temporarily move all descriptors from the wait list to a
+	 * local list as iterating over the wait list, even with
+	 * list_for_each_entry_safe, isn't safe if we release the channel lock
+	 * around the rcar_dmac_desc_put() call.
+	 */
+	spin_lock_irqsave(&chan->lock, flags);
+	list_splice_init(&chan->desc.wait, &list);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &list, node) {
+		if (async_tx_test_ack(&desc->async_tx)) {
+			list_del(&desc->node);
+			rcar_dmac_desc_put(chan, desc);
+		}
+	}
+
+	if (list_empty(&list))
+		return;
+
+	/* Put the remaining descriptors back in the wait list. */
+	spin_lock_irqsave(&chan->lock, flags);
+	list_splice(&list, &chan->desc.wait);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/*
+ * rcar_dmac_desc_get - Allocate a descriptor for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated descriptor or NULL if no descriptor can
+ * be allocated.
+ */
+static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc;
+	unsigned long flags;
+	int ret;
+
+	/* Recycle acked descriptors before attempting allocation. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	while (list_empty(&chan->desc.free)) {
+		/*
+		 * No free descriptors, allocate a page worth of them and try
+		 * again, as someone else could race us to get the newly
+		 * allocated descriptors. If the allocation fails return an
+		 * error.
+		 */
+		spin_unlock_irqrestore(&chan->lock, flags);
+		ret = rcar_dmac_desc_alloc(chan, GFP_NOWAIT);
+		if (ret < 0)
+			return NULL;
+		spin_lock_irqsave(&chan->lock, flags);
+	}
+
+	desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc, node);
+	list_del(&desc->node);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return desc;
+}
+
+/*
+ * rcar_dmac_xfer_chunk_alloc - Allocate a page worth of transfer chunks
+ * @chan: the DMA channel
+ * @gfp: allocation flags
+ */
+static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
+{
+	struct rcar_dmac_desc_page *page;
+	unsigned long flags;
+	LIST_HEAD(list);
+	unsigned int i;
+
+	page = (void *)get_zeroed_page(gfp);
+	if (!page)
+		return -ENOMEM;
+
+	for (i = 0; i < RCAR_DMAC_XFER_CHUNKS_PER_PAGE; ++i) {
+		struct rcar_dmac_xfer_chunk *chunk = &page->chunks[i];
+
+		list_add_tail(&chunk->node, &list);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	list_splice_tail(&list, &chan->desc.chunks_free);
+	list_add_tail(&page->node, &chan->desc.pages);
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return 0;
+}
+
+/*
+ * rcar_dmac_xfer_chunk_get - Allocate a transfer chunk for a DMA transfer
+ * @chan: the DMA channel
+ *
+ * Locking: This function must be called in a non-atomic context.
+ *
+ * Return: A pointer to the allocated transfer chunk descriptor or NULL if no
+ * descriptor can be allocated.
+ */
+static struct rcar_dmac_xfer_chunk *
+rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_xfer_chunk *chunk;
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	while (list_empty(&chan->desc.chunks_free)) {
+		/*
+		 * No free descriptors, allocate a page worth of them and try
+		 * again, as someone else could race us to get the newly
+		 * allocated descriptors. If the allocation fails return an
+		 * error.
+		 */
+		spin_unlock_irqrestore(&chan->lock, flags);
+		ret = rcar_dmac_xfer_chunk_alloc(chan, GFP_NOWAIT);
+		if (ret < 0)
+			return NULL;
+		spin_lock_irqsave(&chan->lock, flags);
+	}
+
+	chunk = list_first_entry(&chan->desc.chunks_free,
+				 struct rcar_dmac_xfer_chunk, node);
+	list_del(&chunk->node);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return chunk;
+}
+
+static void rcar_dmac_realloc_hwdesc(struct rcar_dmac_chan *chan,
+				     struct rcar_dmac_desc *desc, size_t size)
+{
+	/*
+	 * dma_alloc_coherent() allocates memory in page size increments. To
+	 * avoid reallocating the hardware descriptors when the allocated size
+	 * wouldn't change align the requested size to a multiple of the page
+	 * size.
+	 */
+	size = PAGE_ALIGN(size);
+
+	if (desc->hwdescs.size == size)
+		return;
+
+	if (desc->hwdescs.mem) {
+		dma_free_coherent(chan->chan.device->dev, desc->hwdescs.size,
+				  desc->hwdescs.mem, desc->hwdescs.dma);
+		desc->hwdescs.mem = NULL;
+		desc->hwdescs.size = 0;
+	}
+
+	if (!size)
+		return;
+
+	desc->hwdescs.mem = dma_alloc_coherent(chan->chan.device->dev, size,
+					       &desc->hwdescs.dma, GFP_NOWAIT);
+	if (!desc->hwdescs.mem)
+		return;
+
+	desc->hwdescs.size = size;
+}
+
+static int rcar_dmac_fill_hwdesc(struct rcar_dmac_chan *chan,
+				 struct rcar_dmac_desc *desc)
+{
+	struct rcar_dmac_xfer_chunk *chunk;
+	struct rcar_dmac_hw_desc *hwdesc;
+
+	rcar_dmac_realloc_hwdesc(chan, desc, desc->nchunks * sizeof(*hwdesc));
+
+	hwdesc = desc->hwdescs.mem;
+	if (!hwdesc)
+		return -ENOMEM;
+
+	list_for_each_entry(chunk, &desc->chunks, node) {
+		hwdesc->sar = chunk->src_addr;
+		hwdesc->dar = chunk->dst_addr;
+		hwdesc->tcr = chunk->size >> desc->xfer_shift;
+		hwdesc++;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+static void rcar_dmac_chcr_de_barrier(struct rcar_dmac_chan *chan)
+{
+	u32 chcr;
+	unsigned int i;
+
+	/*
+	 * Ensure that the setting of the DE bit is actually 0 after
+	 * clearing it.
+	 */
+	for (i = 0; i < 1024; i++) {
+		chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+		if (!(chcr & RCAR_DMACHCR_DE))
+			return;
+		udelay(1);
+	}
+
+	dev_err(chan->chan.device->dev, "CHCR DE check error\n");
+}
+
+static void rcar_dmac_clear_chcr_de(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	/* set DE=0 and flush remaining data */
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, (chcr & ~RCAR_DMACHCR_DE));
+
+	/* make sure all remaining data was flushed */
+	rcar_dmac_chcr_de_barrier(chan);
+}
+
+static void rcar_dmac_chan_halt(struct rcar_dmac_chan *chan)
+{
+	u32 chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+
+	chcr &= ~(RCAR_DMACHCR_DSE | RCAR_DMACHCR_DSIE | RCAR_DMACHCR_IE |
+		  RCAR_DMACHCR_TE | RCAR_DMACHCR_DE |
+		  RCAR_DMACHCR_CAE | RCAR_DMACHCR_CAIE);
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr);
+	rcar_dmac_chcr_de_barrier(chan);
+}
+
+static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* Move all non-free descriptors to the local lists. */
+	list_splice_init(&chan->desc.pending, &descs);
+	list_splice_init(&chan->desc.active, &descs);
+	list_splice_init(&chan->desc.done, &descs);
+	list_splice_init(&chan->desc.wait, &descs);
+
+	chan->desc.running = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, _desc, &descs, node) {
+		list_del(&desc->node);
+		rcar_dmac_desc_put(chan, desc);
+	}
+}
+
+static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac)
+{
+	unsigned int i;
+
+	/* Stop all channels. */
+	for (i = 0; i < dmac->n_channels; ++i) {
+		struct rcar_dmac_chan *chan = &dmac->channels[i];
+
+		/* Stop and reinitialize the channel. */
+		spin_lock_irq(&chan->lock);
+		rcar_dmac_chan_halt(chan);
+		spin_unlock_irq(&chan->lock);
+	}
+}
+
+static int rcar_dmac_chan_pause(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	rcar_dmac_clear_chcr_de(rchan);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rcar_dmac_chan_configure_desc(struct rcar_dmac_chan *chan,
+					  struct rcar_dmac_desc *desc)
+{
+	static const u32 chcr_ts[] = {
+		RCAR_DMACHCR_TS_1B, RCAR_DMACHCR_TS_2B,
+		RCAR_DMACHCR_TS_4B, RCAR_DMACHCR_TS_8B,
+		RCAR_DMACHCR_TS_16B, RCAR_DMACHCR_TS_32B,
+		RCAR_DMACHCR_TS_64B,
+	};
+
+	unsigned int xfer_size;
+	u32 chcr;
+
+	switch (desc->direction) {
+	case DMA_DEV_TO_MEM:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_FIXED
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->src.xfer_size;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		chcr = RCAR_DMACHCR_DM_FIXED | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_DMARS;
+		xfer_size = chan->dst.xfer_size;
+		break;
+
+	case DMA_MEM_TO_MEM:
+	default:
+		chcr = RCAR_DMACHCR_DM_INC | RCAR_DMACHCR_SM_INC
+		     | RCAR_DMACHCR_RS_AUTO;
+		xfer_size = RCAR_DMAC_MEMCPY_XFER_SIZE;
+		break;
+	}
+
+	desc->xfer_shift = ilog2(xfer_size);
+	desc->chcr = chcr | chcr_ts[desc->xfer_shift];
+}
+
+/*
+ * rcar_dmac_chan_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *
+rcar_dmac_chan_prep_sg(struct rcar_dmac_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, dma_addr_t dev_addr,
+		       enum dma_transfer_direction dir, unsigned long dma_flags,
+		       bool cyclic)
+{
+	struct rcar_dmac_xfer_chunk *chunk;
+	struct rcar_dmac_desc *desc;
+	struct scatterlist *sg;
+	unsigned int nchunks = 0;
+	unsigned int max_chunk_size;
+	unsigned int full_size = 0;
+	bool cross_boundary = false;
+	unsigned int i;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32 high_dev_addr;
+	u32 high_mem_addr;
+#endif
+
+	desc = rcar_dmac_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	desc->async_tx.flags = dma_flags;
+	desc->async_tx.cookie = -EBUSY;
+
+	desc->cyclic = cyclic;
+	desc->direction = dir;
+
+	rcar_dmac_chan_configure_desc(chan, desc);
+
+	max_chunk_size = RCAR_DMATCR_MASK << desc->xfer_shift;
+
+	/*
+	 * Allocate and fill the transfer chunk descriptors. We own the only
+	 * reference to the DMA descriptor, there's no need for locking.
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t mem_addr = sg_dma_address(sg);
+		unsigned int len = sg_dma_len(sg);
+
+		full_size += len;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		if (i == 0) {
+			high_dev_addr = dev_addr >> 32;
+			high_mem_addr = mem_addr >> 32;
+		}
+
+		if ((dev_addr >> 32 != high_dev_addr) ||
+		    (mem_addr >> 32 != high_mem_addr))
+			cross_boundary = true;
+#endif
+		while (len) {
+			unsigned int size = min(len, max_chunk_size);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			/*
+			 * Prevent individual transfers from crossing 4GB
+			 * boundaries.
+			 */
+			if (dev_addr >> 32 != (dev_addr + size - 1) >> 32) {
+				size = ALIGN(dev_addr, 1ULL << 32) - dev_addr;
+				cross_boundary = true;
+			}
+			if (mem_addr >> 32 != (mem_addr + size - 1) >> 32) {
+				size = ALIGN(mem_addr, 1ULL << 32) - mem_addr;
+				cross_boundary = true;
+			}
+#endif
+
+			chunk = rcar_dmac_xfer_chunk_get(chan);
+			if (!chunk) {
+				rcar_dmac_desc_put(chan, desc);
+				return NULL;
+			}
+
+			if (dir == DMA_DEV_TO_MEM) {
+				chunk->src_addr = dev_addr;
+				chunk->dst_addr = mem_addr;
+			} else {
+				chunk->src_addr = mem_addr;
+				chunk->dst_addr = dev_addr;
+			}
+
+			chunk->size = size;
+
+			dev_dbg(chan->chan.device->dev,
+				"chan%u: chunk %p/%p sgl %u@%p, %u/%u %pad -> %pad\n",
+				chan->index, chunk, desc, i, sg, size, len,
+				&chunk->src_addr, &chunk->dst_addr);
+
+			mem_addr += size;
+			if (dir == DMA_MEM_TO_MEM)
+				dev_addr += size;
+
+			len -= size;
+
+			list_add_tail(&chunk->node, &desc->chunks);
+			nchunks++;
+		}
+	}
+
+	desc->nchunks = nchunks;
+	desc->size = full_size;
+
+	/*
+	 * Use hardware descriptor lists if possible when more than one chunk
+	 * needs to be transferred (otherwise they don't make much sense).
+	 *
+	 * Source/Destination address should be located in same 4GiB region
+	 * in the 40bit address space when it uses Hardware descriptor,
+	 * and cross_boundary is checking it.
+	 */
+	desc->hwdescs.use = !cross_boundary && nchunks > 1;
+	if (desc->hwdescs.use) {
+		if (rcar_dmac_fill_hwdesc(chan, desc) < 0)
+			desc->hwdescs.use = false;
+	}
+
+	return &desc->async_tx;
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rcar_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	int ret;
+
+	INIT_LIST_HEAD(&rchan->desc.chunks_free);
+	INIT_LIST_HEAD(&rchan->desc.pages);
+
+	/* Preallocate descriptors. */
+	ret = rcar_dmac_xfer_chunk_alloc(rchan, GFP_KERNEL);
+	if (ret < 0)
+		return -ENOMEM;
+
+	ret = rcar_dmac_desc_alloc(rchan, GFP_KERNEL);
+	if (ret < 0)
+		return -ENOMEM;
+
+	return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void rcar_dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct rcar_dmac_chan_map *map = &rchan->map;
+	struct rcar_dmac_desc_page *page, *_page;
+	struct rcar_dmac_desc *desc;
+	LIST_HEAD(list);
+
+	/* Protect against ISR */
+	spin_lock_irq(&rchan->lock);
+	rcar_dmac_chan_halt(rchan);
+	spin_unlock_irq(&rchan->lock);
+
+	/*
+	 * Now no new interrupts will occur, but one might already be
+	 * running. Wait for it to finish before freeing resources.
+	 */
+	synchronize_irq(rchan->irq);
+
+	if (rchan->mid_rid >= 0) {
+		/* The caller is holding dma_list_mutex */
+		clear_bit(rchan->mid_rid, dmac->modules);
+		rchan->mid_rid = -EINVAL;
+	}
+
+	list_splice_init(&rchan->desc.free, &list);
+	list_splice_init(&rchan->desc.pending, &list);
+	list_splice_init(&rchan->desc.active, &list);
+	list_splice_init(&rchan->desc.done, &list);
+	list_splice_init(&rchan->desc.wait, &list);
+
+	rchan->desc.running = NULL;
+
+	list_for_each_entry(desc, &list, node)
+		rcar_dmac_realloc_hwdesc(rchan, desc, 0);
+
+	list_for_each_entry_safe(page, _page, &rchan->desc.pages, node) {
+		list_del(&page->node);
+		free_page((unsigned long)page);
+	}
+
+	/* Remove slave mapping if present. */
+	if (map->slave.xfer_size) {
+		dma_unmap_resource(chan->device->dev, map->addr,
+				   map->slave.xfer_size, map->dir, 0);
+		map->slave.xfer_size = 0;
+	}
+
+	pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			  dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct scatterlist sgl;
+
+	if (!len)
+		return NULL;
+
+	sg_init_table(&sgl, 1);
+	sg_set_page(&sgl, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sgl) = dma_src;
+	sg_dma_len(&sgl) = len;
+
+	return rcar_dmac_chan_prep_sg(rchan, &sgl, 1, dma_dest,
+				      DMA_MEM_TO_MEM, flags, false);
+}
+
+static int rcar_dmac_map_slave_addr(struct dma_chan *chan,
+				    enum dma_transfer_direction dir)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct rcar_dmac_chan_map *map = &rchan->map;
+	phys_addr_t dev_addr;
+	size_t dev_size;
+	enum dma_data_direction dev_dir;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_addr = rchan->src.slave_addr;
+		dev_size = rchan->src.xfer_size;
+		dev_dir = DMA_TO_DEVICE;
+	} else {
+		dev_addr = rchan->dst.slave_addr;
+		dev_size = rchan->dst.xfer_size;
+		dev_dir = DMA_FROM_DEVICE;
+	}
+
+	/* Reuse current map if possible. */
+	if (dev_addr == map->slave.slave_addr &&
+	    dev_size == map->slave.xfer_size &&
+	    dev_dir == map->dir)
+		return 0;
+
+	/* Remove old mapping if present. */
+	if (map->slave.xfer_size)
+		dma_unmap_resource(chan->device->dev, map->addr,
+				   map->slave.xfer_size, map->dir, 0);
+	map->slave.xfer_size = 0;
+
+	/* Create new slave address map. */
+	map->addr = dma_map_resource(chan->device->dev, dev_addr, dev_size,
+				     dev_dir, 0);
+
+	if (dma_mapping_error(chan->device->dev, map->addr)) {
+		dev_err(chan->device->dev,
+			"chan%u: failed to map %zx@%pap", rchan->index,
+			dev_size, &dev_addr);
+		return -EIO;
+	}
+
+	dev_dbg(chan->device->dev, "chan%u: map %zx@%pap to %pad dir: %s\n",
+		rchan->index, dev_size, &dev_addr, &map->addr,
+		dev_dir == DMA_TO_DEVICE ? "DMA_TO_DEVICE" : "DMA_FROM_DEVICE");
+
+	map->slave.slave_addr = dev_addr;
+	map->slave.xfer_size = dev_size;
+	map->dir = dev_dir;
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags, void *context)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || !sg_len) {
+		dev_warn(chan->device->dev,
+			 "%s: bad parameter: len=%d, id=%d\n",
+			 __func__, sg_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	if (rcar_dmac_map_slave_addr(chan, dir))
+		return NULL;
+
+	return rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr,
+				      dir, flags, false);
+}
+
+#define RCAR_DMAC_MAX_SG_LEN	32
+
+static struct dma_async_tx_descriptor *
+rcar_dmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
+			  size_t buf_len, size_t period_len,
+			  enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sgl;
+	unsigned int sg_len;
+	unsigned int i;
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (rchan->mid_rid < 0 || buf_len < period_len) {
+		dev_warn(chan->device->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, rchan->mid_rid);
+		return NULL;
+	}
+
+	if (rcar_dmac_map_slave_addr(chan, dir))
+		return NULL;
+
+	sg_len = buf_len / period_len;
+	if (sg_len > RCAR_DMAC_MAX_SG_LEN) {
+		dev_err(chan->device->dev,
+			"chan%u: sg length %d exceds limit %d",
+			rchan->index, sg_len, RCAR_DMAC_MAX_SG_LEN);
+		return NULL;
+	}
+
+	/*
+	 * Allocate the sg list dynamically as it would consume too much stack
+	 * space.
+	 */
+	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_NOWAIT);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, sg_len);
+
+	for (i = 0; i < sg_len; ++i) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	desc = rcar_dmac_chan_prep_sg(rchan, sgl, sg_len, rchan->map.addr,
+				      dir, flags, true);
+
+	kfree(sgl);
+	return desc;
+}
+
+static int rcar_dmac_device_config(struct dma_chan *chan,
+				   struct dma_slave_config *cfg)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	/*
+	 * We could lock this, but you shouldn't be configuring the
+	 * channel, while using it...
+	 */
+	rchan->src.slave_addr = cfg->src_addr;
+	rchan->dst.slave_addr = cfg->dst_addr;
+	rchan->src.xfer_size = cfg->src_addr_width;
+	rchan->dst.xfer_size = cfg->dst_addr_width;
+
+	return 0;
+}
+
+static int rcar_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	rcar_dmac_chan_halt(rchan);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	/*
+	 * FIXME: No new interrupt can occur now, but the IRQ thread might still
+	 * be running.
+	 */
+
+	rcar_dmac_chan_reinit(rchan);
+
+	return 0;
+}
+
+static unsigned int rcar_dmac_chan_get_residue(struct rcar_dmac_chan *chan,
+					       dma_cookie_t cookie)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	struct rcar_dmac_xfer_chunk *running = NULL;
+	struct rcar_dmac_xfer_chunk *chunk;
+	enum dma_status status;
+	unsigned int residue = 0;
+	unsigned int dptr = 0;
+
+	if (!desc)
+		return 0;
+
+	/*
+	 * If the cookie corresponds to a descriptor that has been completed
+	 * there is no residue. The same check has already been performed by the
+	 * caller but without holding the channel lock, so the descriptor could
+	 * now be complete.
+	 */
+	status = dma_cookie_status(&chan->chan, cookie, NULL);
+	if (status == DMA_COMPLETE)
+		return 0;
+
+	/*
+	 * If the cookie doesn't correspond to the currently running transfer
+	 * then the descriptor hasn't been processed yet, and the residue is
+	 * equal to the full descriptor size.
+	 * Also, a client driver is possible to call this function before
+	 * rcar_dmac_isr_channel_thread() runs. In this case, the "desc.running"
+	 * will be the next descriptor, and the done list will appear. So, if
+	 * the argument cookie matches the done list's cookie, we can assume
+	 * the residue is zero.
+	 */
+	if (cookie != desc->async_tx.cookie) {
+		list_for_each_entry(desc, &chan->desc.done, node) {
+			if (cookie == desc->async_tx.cookie)
+				return 0;
+		}
+		list_for_each_entry(desc, &chan->desc.pending, node) {
+			if (cookie == desc->async_tx.cookie)
+				return desc->size;
+		}
+		list_for_each_entry(desc, &chan->desc.active, node) {
+			if (cookie == desc->async_tx.cookie)
+				return desc->size;
+		}
+
+		/*
+		 * No descriptor found for the cookie, there's thus no residue.
+		 * This shouldn't happen if the calling driver passes a correct
+		 * cookie value.
+		 */
+		WARN(1, "No descriptor for cookie!");
+		return 0;
+	}
+
+	/*
+	 * In descriptor mode the descriptor running pointer is not maintained
+	 * by the interrupt handler, find the running descriptor from the
+	 * descriptor pointer field in the CHCRB register. In non-descriptor
+	 * mode just use the running descriptor pointer.
+	 */
+	if (desc->hwdescs.use) {
+		dptr = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+			RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+		if (dptr == 0)
+			dptr = desc->nchunks;
+		dptr--;
+		WARN_ON(dptr >= desc->nchunks);
+	} else {
+		running = desc->running;
+	}
+
+	/* Compute the size of all chunks still to be transferred. */
+	list_for_each_entry_reverse(chunk, &desc->chunks, node) {
+		if (chunk == running || ++dptr == desc->nchunks)
+			break;
+
+		residue += chunk->size;
+	}
+
+	/* Add the residue for the current chunk. */
+	residue += rcar_dmac_chan_read(chan, RCAR_DMATCRB) << desc->xfer_shift;
+
+	return residue;
+}
+
+static enum dma_status rcar_dmac_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+	unsigned int residue;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	if (status == DMA_COMPLETE || !txstate)
+		return status;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+	residue = rcar_dmac_chan_get_residue(rchan, cookie);
+	spin_unlock_irqrestore(&rchan->lock, flags);
+
+	/* if there's no residue, the cookie is complete */
+	if (!residue)
+		return DMA_COMPLETE;
+
+	dma_set_residue(txstate, residue);
+
+	return status;
+}
+
+static void rcar_dmac_issue_pending(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rchan->lock, flags);
+
+	if (list_empty(&rchan->desc.pending))
+		goto done;
+
+	/* Append the pending list to the active list. */
+	list_splice_tail_init(&rchan->desc.pending, &rchan->desc.active);
+
+	/*
+	 * If no transfer is running pick the first descriptor from the active
+	 * list and start the transfer.
+	 */
+	if (!rchan->desc.running) {
+		struct rcar_dmac_desc *desc;
+
+		desc = list_first_entry(&rchan->desc.active,
+					struct rcar_dmac_desc, node);
+		rchan->desc.running = desc;
+
+		rcar_dmac_chan_start_xfer(rchan);
+	}
+
+done:
+	spin_unlock_irqrestore(&rchan->lock, flags);
+}
+
+static void rcar_dmac_device_synchronize(struct dma_chan *chan)
+{
+	struct rcar_dmac_chan *rchan = to_rcar_dmac_chan(chan);
+
+	synchronize_irq(rchan->irq);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static irqreturn_t rcar_dmac_isr_desc_stage_end(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	unsigned int stage;
+
+	if (WARN_ON(!desc || !desc->cyclic)) {
+		/*
+		 * This should never happen, there should always be a running
+		 * cyclic descriptor when a descriptor stage end interrupt is
+		 * triggered. Warn and return.
+		 */
+		return IRQ_NONE;
+	}
+
+	/* Program the interrupt pointer to the next stage. */
+	stage = (rcar_dmac_chan_read(chan, RCAR_DMACHCRB) &
+		 RCAR_DMACHCRB_DPTR_MASK) >> RCAR_DMACHCRB_DPTR_SHIFT;
+	rcar_dmac_chan_write(chan, RCAR_DMADPCR, RCAR_DMADPCR_DIPT(stage));
+
+	return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t rcar_dmac_isr_transfer_end(struct rcar_dmac_chan *chan)
+{
+	struct rcar_dmac_desc *desc = chan->desc.running;
+	irqreturn_t ret = IRQ_WAKE_THREAD;
+
+	if (WARN_ON_ONCE(!desc)) {
+		/*
+		 * This should never happen, there should always be a running
+		 * descriptor when a transfer end interrupt is triggered. Warn
+		 * and return.
+		 */
+		return IRQ_NONE;
+	}
+
+	/*
+	 * The transfer end interrupt isn't generated for each chunk when using
+	 * descriptor mode. Only update the running chunk pointer in
+	 * non-descriptor mode.
+	 */
+	if (!desc->hwdescs.use) {
+		/*
+		 * If we haven't completed the last transfer chunk simply move
+		 * to the next one. Only wake the IRQ thread if the transfer is
+		 * cyclic.
+		 */
+		if (!list_is_last(&desc->running->node, &desc->chunks)) {
+			desc->running = list_next_entry(desc->running, node);
+			if (!desc->cyclic)
+				ret = IRQ_HANDLED;
+			goto done;
+		}
+
+		/*
+		 * We've completed the last transfer chunk. If the transfer is
+		 * cyclic, move back to the first one.
+		 */
+		if (desc->cyclic) {
+			desc->running =
+				list_first_entry(&desc->chunks,
+						 struct rcar_dmac_xfer_chunk,
+						 node);
+			goto done;
+		}
+	}
+
+	/* The descriptor is complete, move it to the done list. */
+	list_move_tail(&desc->node, &chan->desc.done);
+
+	/* Queue the next descriptor, if any. */
+	if (!list_empty(&chan->desc.active))
+		chan->desc.running = list_first_entry(&chan->desc.active,
+						      struct rcar_dmac_desc,
+						      node);
+	else
+		chan->desc.running = NULL;
+
+done:
+	if (chan->desc.running)
+		rcar_dmac_chan_start_xfer(chan);
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
+{
+	u32 mask = RCAR_DMACHCR_DSE | RCAR_DMACHCR_TE;
+	struct rcar_dmac_chan *chan = dev;
+	irqreturn_t ret = IRQ_NONE;
+	bool reinit = false;
+	u32 chcr;
+
+	spin_lock(&chan->lock);
+
+	chcr = rcar_dmac_chan_read(chan, RCAR_DMACHCR);
+	if (chcr & RCAR_DMACHCR_CAE) {
+		struct rcar_dmac *dmac = to_rcar_dmac(chan->chan.device);
+
+		/*
+		 * We don't need to call rcar_dmac_chan_halt()
+		 * because channel is already stopped in error case.
+		 * We need to clear register and check DE bit as recovery.
+		 */
+		rcar_dmac_write(dmac, RCAR_DMACHCLR, 1 << chan->index);
+		rcar_dmac_chcr_de_barrier(chan);
+		reinit = true;
+		goto spin_lock_end;
+	}
+
+	if (chcr & RCAR_DMACHCR_TE)
+		mask |= RCAR_DMACHCR_DE;
+	rcar_dmac_chan_write(chan, RCAR_DMACHCR, chcr & ~mask);
+	if (mask & RCAR_DMACHCR_DE)
+		rcar_dmac_chcr_de_barrier(chan);
+
+	if (chcr & RCAR_DMACHCR_DSE)
+		ret |= rcar_dmac_isr_desc_stage_end(chan);
+
+	if (chcr & RCAR_DMACHCR_TE)
+		ret |= rcar_dmac_isr_transfer_end(chan);
+
+spin_lock_end:
+	spin_unlock(&chan->lock);
+
+	if (reinit) {
+		dev_err(chan->chan.device->dev, "Channel Address Error\n");
+
+		rcar_dmac_chan_reinit(chan);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static irqreturn_t rcar_dmac_isr_channel_thread(int irq, void *dev)
+{
+	struct rcar_dmac_chan *chan = dev;
+	struct rcar_dmac_desc *desc;
+	struct dmaengine_desc_callback cb;
+
+	spin_lock_irq(&chan->lock);
+
+	/* For cyclic transfers notify the user after every chunk. */
+	if (chan->desc.running && chan->desc.running->cyclic) {
+		desc = chan->desc.running;
+		dmaengine_desc_get_callback(&desc->async_tx, &cb);
+
+		if (dmaengine_desc_callback_valid(&cb)) {
+			spin_unlock_irq(&chan->lock);
+			dmaengine_desc_callback_invoke(&cb, NULL);
+			spin_lock_irq(&chan->lock);
+		}
+	}
+
+	/*
+	 * Call the callback function for all descriptors on the done list and
+	 * move them to the ack wait list.
+	 */
+	while (!list_empty(&chan->desc.done)) {
+		desc = list_first_entry(&chan->desc.done, struct rcar_dmac_desc,
+					node);
+		dma_cookie_complete(&desc->async_tx);
+		list_del(&desc->node);
+
+		dmaengine_desc_get_callback(&desc->async_tx, &cb);
+		if (dmaengine_desc_callback_valid(&cb)) {
+			spin_unlock_irq(&chan->lock);
+			/*
+			 * We own the only reference to this descriptor, we can
+			 * safely dereference it without holding the channel
+			 * lock.
+			 */
+			dmaengine_desc_callback_invoke(&cb, NULL);
+			spin_lock_irq(&chan->lock);
+		}
+
+		list_add_tail(&desc->node, &chan->desc.wait);
+	}
+
+	spin_unlock_irq(&chan->lock);
+
+	/* Recycle all acked descriptors. */
+	rcar_dmac_desc_recycle_acked(chan);
+
+	return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rcar_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rcar_dmac *dmac = to_rcar_dmac(chan->device);
+	struct of_phandle_args *dma_spec = arg;
+
+	/*
+	 * FIXME: Using a filter on OF platforms is a nonsense. The OF xlate
+	 * function knows from which device it wants to allocate a channel from,
+	 * and would be perfectly capable of selecting the channel it wants.
+	 * Forcing it to call dma_request_channel() and iterate through all
+	 * channels from all controllers is just pointless.
+	 */
+	if (chan->device->device_config != rcar_dmac_device_config ||
+	    dma_spec->np != chan->device->dev->of_node)
+		return false;
+
+	return !test_and_set_bit(dma_spec->args[0], dmac->modules);
+}
+
+static struct dma_chan *rcar_dmac_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct rcar_dmac_chan *rchan;
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, rcar_dmac_chan_filter, dma_spec);
+	if (!chan)
+		return NULL;
+
+	rchan = to_rcar_dmac_chan(chan);
+	rchan->mid_rid = dma_spec->args[0];
+
+	return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#ifdef CONFIG_PM
+static int rcar_dmac_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int rcar_dmac_runtime_resume(struct device *dev)
+{
+	struct rcar_dmac *dmac = dev_get_drvdata(dev);
+
+	return rcar_dmac_init(dmac);
+}
+#endif
+
+static const struct dev_pm_ops rcar_dmac_pm = {
+	/*
+	 * TODO for system sleep/resume:
+	 *   - Wait for the current transfer to complete and stop the device,
+	 *   - Resume transfers, if any.
+	 */
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				      pm_runtime_force_resume)
+	SET_RUNTIME_PM_OPS(rcar_dmac_runtime_suspend, rcar_dmac_runtime_resume,
+			   NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
+				struct rcar_dmac_chan *rchan,
+				unsigned int index)
+{
+	struct platform_device *pdev = to_platform_device(dmac->dev);
+	struct dma_chan *chan = &rchan->chan;
+	char pdev_irqname[5];
+	char *irqname;
+	int ret;
+
+	rchan->index = index;
+	rchan->iomem = dmac->iomem + RCAR_DMAC_CHAN_OFFSET(index);
+	rchan->mid_rid = -EINVAL;
+
+	spin_lock_init(&rchan->lock);
+
+	INIT_LIST_HEAD(&rchan->desc.free);
+	INIT_LIST_HEAD(&rchan->desc.pending);
+	INIT_LIST_HEAD(&rchan->desc.active);
+	INIT_LIST_HEAD(&rchan->desc.done);
+	INIT_LIST_HEAD(&rchan->desc.wait);
+
+	/* Request the channel interrupt. */
+	sprintf(pdev_irqname, "ch%u", index);
+	rchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+	if (rchan->irq < 0) {
+		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+		return -ENODEV;
+	}
+
+	irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+				 dev_name(dmac->dev), index);
+	if (!irqname)
+		return -ENOMEM;
+
+	/*
+	 * Initialize the DMA engine channel and add it to the DMA engine
+	 * channels list.
+	 */
+	chan->device = &dmac->engine;
+	dma_cookie_init(chan);
+
+	list_add_tail(&chan->device_node, &dmac->engine.channels);
+
+	ret = devm_request_threaded_irq(dmac->dev, rchan->irq,
+					rcar_dmac_isr_channel,
+					rcar_dmac_isr_channel_thread, 0,
+					irqname, rchan);
+	if (ret) {
+		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+			rchan->irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rcar_dmac_parse_of(struct device *dev, struct rcar_dmac *dmac)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+	if (ret < 0) {
+		dev_err(dev, "unable to read dma-channels property\n");
+		return ret;
+	}
+
+	if (dmac->n_channels <= 0 || dmac->n_channels >= 100) {
+		dev_err(dev, "invalid number of channels %u\n",
+			dmac->n_channels);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rcar_dmac_probe(struct platform_device *pdev)
+{
+	const enum dma_slave_buswidth widths = DMA_SLAVE_BUSWIDTH_1_BYTE |
+		DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES |
+		DMA_SLAVE_BUSWIDTH_8_BYTES | DMA_SLAVE_BUSWIDTH_16_BYTES |
+		DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES;
+	unsigned int channels_offset = 0;
+	struct dma_device *engine;
+	struct rcar_dmac *dmac;
+	struct resource *mem;
+	unsigned int i;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dmac);
+	dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+
+	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * A still unconfirmed hardware bug prevents the IPMMU microTLB 0 to be
+	 * flushed correctly, resulting in memory corruption. DMAC 0 channel 0
+	 * is connected to microTLB 0 on currently supported platforms, so we
+	 * can't use it with the IPMMU. As the IOMMU API operates at the device
+	 * level we can't disable it selectively, so ignore channel 0 for now if
+	 * the device is part of an IOMMU group.
+	 */
+	if (pdev->dev.iommu_group) {
+		dmac->n_channels--;
+		channels_offset = 1;
+	}
+
+	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+				      sizeof(*dmac->channels), GFP_KERNEL);
+	if (!dmac->channels)
+		return -ENOMEM;
+
+	/* Request resources. */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dmac->iomem))
+		return PTR_ERR(dmac->iomem);
+
+	/* Enable runtime PM and initialize the device. */
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = rcar_dmac_init(dmac);
+	pm_runtime_put(&pdev->dev);
+
+	if (ret) {
+		dev_err(&pdev->dev, "failed to reset device\n");
+		goto error;
+	}
+
+	/* Initialize engine */
+	engine = &dmac->engine;
+
+	dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+	engine->dev		= &pdev->dev;
+	engine->copy_align	= ilog2(RCAR_DMAC_MEMCPY_XFER_SIZE);
+
+	engine->src_addr_widths	= widths;
+	engine->dst_addr_widths	= widths;
+	engine->directions	= BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	engine->device_alloc_chan_resources	= rcar_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources	= rcar_dmac_free_chan_resources;
+	engine->device_prep_dma_memcpy		= rcar_dmac_prep_dma_memcpy;
+	engine->device_prep_slave_sg		= rcar_dmac_prep_slave_sg;
+	engine->device_prep_dma_cyclic		= rcar_dmac_prep_dma_cyclic;
+	engine->device_config			= rcar_dmac_device_config;
+	engine->device_pause			= rcar_dmac_chan_pause;
+	engine->device_terminate_all		= rcar_dmac_chan_terminate_all;
+	engine->device_tx_status		= rcar_dmac_tx_status;
+	engine->device_issue_pending		= rcar_dmac_issue_pending;
+	engine->device_synchronize		= rcar_dmac_device_synchronize;
+
+	INIT_LIST_HEAD(&engine->channels);
+
+	for (i = 0; i < dmac->n_channels; ++i) {
+		ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i],
+					   i + channels_offset);
+		if (ret < 0)
+			goto error;
+	}
+
+	/* Register the DMAC as a DMA provider for DT. */
+	ret = of_dma_controller_register(pdev->dev.of_node, rcar_dmac_of_xlate,
+					 NULL);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Register the DMA engine device.
+	 *
+	 * Default transfer size of 32 bytes requires 32-byte alignment.
+	 */
+	ret = dma_async_device_register(engine);
+	if (ret < 0)
+		goto error;
+
+	return 0;
+
+error:
+	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int rcar_dmac_remove(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&dmac->engine);
+
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static void rcar_dmac_shutdown(struct platform_device *pdev)
+{
+	struct rcar_dmac *dmac = platform_get_drvdata(pdev);
+
+	rcar_dmac_stop_all_chan(dmac);
+}
+
+static const struct of_device_id rcar_dmac_of_ids[] = {
+	{ .compatible = "renesas,rcar-dmac", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rcar_dmac_of_ids);
+
+static struct platform_driver rcar_dmac_driver = {
+	.driver		= {
+		.pm	= &rcar_dmac_pm,
+		.name	= "rcar-dmac",
+		.of_match_table = rcar_dmac_of_ids,
+	},
+	.probe		= rcar_dmac_probe,
+	.remove		= rcar_dmac_remove,
+	.shutdown	= rcar_dmac_shutdown,
+};
+
+module_platform_driver(rcar_dmac_driver);
+
+MODULE_DESCRIPTION("R-Car Gen2 DMA Controller Driver");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h
new file mode 100644
index 0000000..a1b0ef4
--- /dev/null
+++ b/drivers/dma/sh/shdma-arm.h
@@ -0,0 +1,51 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2013 Renesas Electronics, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 the GNU General Public License as published by the Free
+ * Software Foundation.
+ */
+
+#ifndef SHDMA_ARM_H
+#define SHDMA_ARM_H
+
+#include "shdma.h"
+
+/* Transmit sizes and respective CHCR register values */
+enum {
+	XMIT_SZ_8BIT		= 0,
+	XMIT_SZ_16BIT		= 1,
+	XMIT_SZ_32BIT		= 2,
+	XMIT_SZ_64BIT		= 7,
+	XMIT_SZ_128BIT		= 3,
+	XMIT_SZ_256BIT		= 4,
+	XMIT_SZ_512BIT		= 5,
+};
+
+/* log2(size / 8) - used to calculate number of transfers */
+#define SH_DMAE_TS_SHIFT {		\
+	[XMIT_SZ_8BIT]		= 0,	\
+	[XMIT_SZ_16BIT]		= 1,	\
+	[XMIT_SZ_32BIT]		= 2,	\
+	[XMIT_SZ_64BIT]		= 3,	\
+	[XMIT_SZ_128BIT]	= 4,	\
+	[XMIT_SZ_256BIT]	= 5,	\
+	[XMIT_SZ_512BIT]	= 6,	\
+}
+
+#define TS_LOW_BIT	0x3 /* --xx */
+#define TS_HI_BIT	0xc /* xx-- */
+
+#define TS_LOW_SHIFT	(3)
+#define TS_HI_SHIFT	(20 - 2)	/* 2 bits for shifted low TS */
+
+#define TS_INDEX2VAL(i) \
+	((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\
+	 (((i) & TS_HI_BIT)  << TS_HI_SHIFT))
+
+#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz)))
+#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz)))
+
+#endif
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
new file mode 100644
index 0000000..6b5626e
--- /dev/null
+++ b/drivers/dma/sh/shdma-base.c
@@ -0,0 +1,1065 @@
+/*
+ * Dmaengine driver base library for DMA controllers, found on SH-based SoCs
+ *
+ * extracted from shdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/shdma-base.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+
+/* DMA descriptor control */
+enum shdma_desc_status {
+	DESC_IDLE,
+	DESC_PREPARED,
+	DESC_SUBMITTED,
+	DESC_COMPLETED,	/* completed, have to call callback */
+	DESC_WAITING,	/* callback called, waiting for ack / re-submit */
+};
+
+#define NR_DESCS_PER_CHANNEL 32
+
+#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
+#define to_shdma_dev(d) container_of(d, struct shdma_dev, dma_dev)
+
+/*
+ * For slave DMA we assume, that there is a finite number of DMA slaves in the
+ * system, and that each such slave can only use a finite number of channels.
+ * We use slave channel IDs to make sure, that no such slave channel ID is
+ * allocated more than once.
+ */
+static unsigned int slave_num = 256;
+module_param(slave_num, uint, 0444);
+
+/* A bitmask with slave_num bits */
+static unsigned long *shdma_slave_used;
+
+/* Called under spin_lock_irq(&schan->chan_lock") */
+static void shdma_chan_xfer_ld_queue(struct shdma_chan *schan)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *sdesc;
+
+	/* DMA work check */
+	if (ops->channel_busy(schan))
+		return;
+
+	/* Find the first not transferred descriptor */
+	list_for_each_entry(sdesc, &schan->ld_queue, node)
+		if (sdesc->mark == DESC_SUBMITTED) {
+			ops->start_xfer(schan, sdesc);
+			break;
+		}
+}
+
+static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct shdma_desc *chunk, *c, *desc =
+		container_of(tx, struct shdma_desc, async_tx);
+	struct shdma_chan *schan = to_shdma_chan(tx->chan);
+	dma_async_tx_callback callback = tx->callback;
+	dma_cookie_t cookie;
+	bool power_up;
+
+	spin_lock_irq(&schan->chan_lock);
+
+	power_up = list_empty(&schan->ld_queue);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Mark all chunks of this descriptor as submitted, move to the queue */
+	list_for_each_entry_safe(chunk, c, desc->node.prev, node) {
+		/*
+		 * All chunks are on the global ld_free, so, we have to find
+		 * the end of the chain ourselves
+		 */
+		if (chunk != desc && (chunk->mark == DESC_IDLE ||
+				      chunk->async_tx.cookie > 0 ||
+				      chunk->async_tx.cookie == -EBUSY ||
+				      &chunk->node == &schan->ld_free))
+			break;
+		chunk->mark = DESC_SUBMITTED;
+		if (chunk->chunks == 1) {
+			chunk->async_tx.callback = callback;
+			chunk->async_tx.callback_param = tx->callback_param;
+		} else {
+			/* Callback goes to the last chunk */
+			chunk->async_tx.callback = NULL;
+		}
+		chunk->cookie = cookie;
+		list_move_tail(&chunk->node, &schan->ld_queue);
+
+		dev_dbg(schan->dev, "submit #%d@%p on %d\n",
+			tx->cookie, &chunk->async_tx, schan->id);
+	}
+
+	if (power_up) {
+		int ret;
+		schan->pm_state = SHDMA_PM_BUSY;
+
+		ret = pm_runtime_get(schan->dev);
+
+		spin_unlock_irq(&schan->chan_lock);
+		if (ret < 0)
+			dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret);
+
+		pm_runtime_barrier(schan->dev);
+
+		spin_lock_irq(&schan->chan_lock);
+
+		/* Have we been reset, while waiting? */
+		if (schan->pm_state != SHDMA_PM_ESTABLISHED) {
+			struct shdma_dev *sdev =
+				to_shdma_dev(schan->dma_chan.device);
+			const struct shdma_ops *ops = sdev->ops;
+			dev_dbg(schan->dev, "Bring up channel %d\n",
+				schan->id);
+			/*
+			 * TODO: .xfer_setup() might fail on some platforms.
+			 * Make it int then, on error remove chunks from the
+			 * queue again
+			 */
+			ops->setup_xfer(schan, schan->slave_id);
+
+			if (schan->pm_state == SHDMA_PM_PENDING)
+				shdma_chan_xfer_ld_queue(schan);
+			schan->pm_state = SHDMA_PM_ESTABLISHED;
+		}
+	} else {
+		/*
+		 * Tell .device_issue_pending() not to run the queue, interrupts
+		 * will do it anyway
+		 */
+		schan->pm_state = SHDMA_PM_PENDING;
+	}
+
+	spin_unlock_irq(&schan->chan_lock);
+
+	return cookie;
+}
+
+/* Called with desc_lock held */
+static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan)
+{
+	struct shdma_desc *sdesc;
+
+	list_for_each_entry(sdesc, &schan->ld_free, node)
+		if (sdesc->mark != DESC_PREPARED) {
+			BUG_ON(sdesc->mark != DESC_IDLE);
+			list_del(&sdesc->node);
+			return sdesc;
+		}
+
+	return NULL;
+}
+
+static int shdma_setup_slave(struct shdma_chan *schan, dma_addr_t slave_addr)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	int ret, match;
+
+	if (schan->dev->of_node) {
+		match = schan->hw_req;
+		ret = ops->set_slave(schan, match, slave_addr, true);
+		if (ret < 0)
+			return ret;
+	} else {
+		match = schan->real_slave_id;
+	}
+
+	if (schan->real_slave_id < 0 || schan->real_slave_id >= slave_num)
+		return -EINVAL;
+
+	if (test_and_set_bit(schan->real_slave_id, shdma_slave_used))
+		return -EBUSY;
+
+	ret = ops->set_slave(schan, match, slave_addr, false);
+	if (ret < 0) {
+		clear_bit(schan->real_slave_id, shdma_slave_used);
+		return ret;
+	}
+
+	schan->slave_id = schan->real_slave_id;
+
+	return 0;
+}
+
+static int shdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *desc;
+	struct shdma_slave *slave = chan->private;
+	int ret, i;
+
+	/*
+	 * This relies on the guarantee from dmaengine that alloc_chan_resources
+	 * never runs concurrently with itself or free_chan_resources.
+	 */
+	if (slave) {
+		/* Legacy mode: .private is set in filter */
+		schan->real_slave_id = slave->slave_id;
+		ret = shdma_setup_slave(schan, 0);
+		if (ret < 0)
+			goto esetslave;
+	} else {
+		/* Normal mode: real_slave_id was set by filter */
+		schan->slave_id = -EINVAL;
+	}
+
+	schan->desc = kcalloc(NR_DESCS_PER_CHANNEL,
+			      sdev->desc_size, GFP_KERNEL);
+	if (!schan->desc) {
+		ret = -ENOMEM;
+		goto edescalloc;
+	}
+	schan->desc_num = NR_DESCS_PER_CHANNEL;
+
+	for (i = 0; i < NR_DESCS_PER_CHANNEL; i++) {
+		desc = ops->embedded_desc(schan->desc, i);
+		dma_async_tx_descriptor_init(&desc->async_tx,
+					     &schan->dma_chan);
+		desc->async_tx.tx_submit = shdma_tx_submit;
+		desc->mark = DESC_IDLE;
+
+		list_add(&desc->node, &schan->ld_free);
+	}
+
+	return NR_DESCS_PER_CHANNEL;
+
+edescalloc:
+	if (slave)
+esetslave:
+		clear_bit(slave->slave_id, shdma_slave_used);
+	chan->private = NULL;
+	return ret;
+}
+
+/*
+ * This is the standard shdma filter function to be used as a replacement to the
+ * "old" method, using the .private pointer.
+ * You always have to pass a valid slave id as the argument, old drivers that
+ * pass ERR_PTR(-EINVAL) as a filter parameter and set it up in dma_slave_config
+ * need to be updated so we can remove the slave_id field from dma_slave_config.
+ * parameter. If this filter is used, the slave driver, after calling
+ * dma_request_channel(), will also have to call dmaengine_slave_config() with
+ * .direction, and either .src_addr or .dst_addr set.
+ *
+ * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
+ * capability! If this becomes a requirement, hardware glue drivers, using this
+ * services would have to provide their own filters, which first would check
+ * the device driver, similar to how other DMAC drivers, e.g., sa11x0-dma.c, do
+ * this, and only then, in case of a match, call this common filter.
+ * NOTE 2: This filter function is also used in the DT case by shdma_of_xlate().
+ * In that case the MID-RID value is used for slave channel filtering and is
+ * passed to this function in the "arg" parameter.
+ */
+bool shdma_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct shdma_chan *schan;
+	struct shdma_dev *sdev;
+	int slave_id = (long)arg;
+	int ret;
+
+	/* Only support channels handled by this driver. */
+	if (chan->device->device_alloc_chan_resources !=
+	    shdma_alloc_chan_resources)
+		return false;
+
+	schan = to_shdma_chan(chan);
+	sdev = to_shdma_dev(chan->device);
+
+	/*
+	 * For DT, the schan->slave_id field is generated by the
+	 * set_slave function from the slave ID that is passed in
+	 * from xlate. For the non-DT case, the slave ID is
+	 * directly passed into the filter function by the driver
+	 */
+	if (schan->dev->of_node) {
+		ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+		if (ret < 0)
+			return false;
+
+		schan->real_slave_id = schan->slave_id;
+		return true;
+	}
+
+	if (slave_id < 0) {
+		/* No slave requested - arbitrary channel */
+		dev_warn(sdev->dma_dev.dev, "invalid slave ID passed to dma_request_slave\n");
+		return true;
+	}
+
+	if (slave_id >= slave_num)
+		return false;
+
+	ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+	if (ret < 0)
+		return false;
+
+	schan->real_slave_id = slave_id;
+
+	return true;
+}
+EXPORT_SYMBOL(shdma_chan_filter);
+
+static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
+{
+	struct shdma_desc *desc, *_desc;
+	/* Is the "exposed" head of a chain acked? */
+	bool head_acked = false;
+	dma_cookie_t cookie = 0;
+	dma_async_tx_callback callback = NULL;
+	struct dmaengine_desc_callback cb;
+	unsigned long flags;
+	LIST_HEAD(cyclic_list);
+
+	memset(&cb, 0, sizeof(cb));
+	spin_lock_irqsave(&schan->chan_lock, flags);
+	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
+		struct dma_async_tx_descriptor *tx = &desc->async_tx;
+
+		BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie);
+		BUG_ON(desc->mark != DESC_SUBMITTED &&
+		       desc->mark != DESC_COMPLETED &&
+		       desc->mark != DESC_WAITING);
+
+		/*
+		 * queue is ordered, and we use this loop to (1) clean up all
+		 * completed descriptors, and to (2) update descriptor flags of
+		 * any chunks in a (partially) completed chain
+		 */
+		if (!all && desc->mark == DESC_SUBMITTED &&
+		    desc->cookie != cookie)
+			break;
+
+		if (tx->cookie > 0)
+			cookie = tx->cookie;
+
+		if (desc->mark == DESC_COMPLETED && desc->chunks == 1) {
+			if (schan->dma_chan.completed_cookie != desc->cookie - 1)
+				dev_dbg(schan->dev,
+					"Completing cookie %d, expected %d\n",
+					desc->cookie,
+					schan->dma_chan.completed_cookie + 1);
+			schan->dma_chan.completed_cookie = desc->cookie;
+		}
+
+		/* Call callback on the last chunk */
+		if (desc->mark == DESC_COMPLETED && tx->callback) {
+			desc->mark = DESC_WAITING;
+			dmaengine_desc_get_callback(tx, &cb);
+			callback = tx->callback;
+			dev_dbg(schan->dev, "descriptor #%d@%p on %d callback\n",
+				tx->cookie, tx, schan->id);
+			BUG_ON(desc->chunks != 1);
+			break;
+		}
+
+		if (tx->cookie > 0 || tx->cookie == -EBUSY) {
+			if (desc->mark == DESC_COMPLETED) {
+				BUG_ON(tx->cookie < 0);
+				desc->mark = DESC_WAITING;
+			}
+			head_acked = async_tx_test_ack(tx);
+		} else {
+			switch (desc->mark) {
+			case DESC_COMPLETED:
+				desc->mark = DESC_WAITING;
+				/* Fall through */
+			case DESC_WAITING:
+				if (head_acked)
+					async_tx_ack(&desc->async_tx);
+			}
+		}
+
+		dev_dbg(schan->dev, "descriptor %p #%d completed.\n",
+			tx, tx->cookie);
+
+		if (((desc->mark == DESC_COMPLETED ||
+		      desc->mark == DESC_WAITING) &&
+		     async_tx_test_ack(&desc->async_tx)) || all) {
+
+			if (all || !desc->cyclic) {
+				/* Remove from ld_queue list */
+				desc->mark = DESC_IDLE;
+				list_move(&desc->node, &schan->ld_free);
+			} else {
+				/* reuse as cyclic */
+				desc->mark = DESC_SUBMITTED;
+				list_move_tail(&desc->node, &cyclic_list);
+			}
+
+			if (list_empty(&schan->ld_queue)) {
+				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+				pm_runtime_put(schan->dev);
+				schan->pm_state = SHDMA_PM_ESTABLISHED;
+			} else if (schan->pm_state == SHDMA_PM_PENDING) {
+				shdma_chan_xfer_ld_queue(schan);
+			}
+		}
+	}
+
+	if (all && !callback)
+		/*
+		 * Terminating and the loop completed normally: forgive
+		 * uncompleted cookies
+		 */
+		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
+
+	list_splice_tail(&cyclic_list, &schan->ld_queue);
+
+	spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+	dmaengine_desc_callback_invoke(&cb, NULL);
+
+	return callback;
+}
+
+/*
+ * shdma_chan_ld_cleanup - Clean up link descriptors
+ *
+ * Clean up the ld_queue of DMA channel.
+ */
+static void shdma_chan_ld_cleanup(struct shdma_chan *schan, bool all)
+{
+	while (__ld_cleanup(schan, all))
+		;
+}
+
+/*
+ * shdma_free_chan_resources - Free all resources of the channel.
+ */
+static void shdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(chan->device);
+	const struct shdma_ops *ops = sdev->ops;
+	LIST_HEAD(list);
+
+	/* Protect against ISR */
+	spin_lock_irq(&schan->chan_lock);
+	ops->halt_channel(schan);
+	spin_unlock_irq(&schan->chan_lock);
+
+	/* Now no new interrupts will occur */
+
+	/* Prepared and not submitted descriptors can still be on the queue */
+	if (!list_empty(&schan->ld_queue))
+		shdma_chan_ld_cleanup(schan, true);
+
+	if (schan->slave_id >= 0) {
+		/* The caller is holding dma_list_mutex */
+		clear_bit(schan->slave_id, shdma_slave_used);
+		chan->private = NULL;
+	}
+
+	schan->real_slave_id = 0;
+
+	spin_lock_irq(&schan->chan_lock);
+
+	list_splice_init(&schan->ld_free, &list);
+	schan->desc_num = 0;
+
+	spin_unlock_irq(&schan->chan_lock);
+
+	kfree(schan->desc);
+}
+
+/**
+ * shdma_add_desc - get, set up and return one transfer descriptor
+ * @schan:	DMA channel
+ * @flags:	DMA transfer flags
+ * @dst:	destination DMA address, incremented when direction equals
+ *		DMA_DEV_TO_MEM or DMA_MEM_TO_MEM
+ * @src:	source DMA address, incremented when direction equals
+ *		DMA_MEM_TO_DEV or DMA_MEM_TO_MEM
+ * @len:	DMA transfer length
+ * @first:	if NULL, set to the current descriptor and cookie set to -EBUSY
+ * @direction:	needed for slave DMA to decide which address to keep constant,
+ *		equals DMA_MEM_TO_MEM for MEMCPY
+ * Returns 0 or an error
+ * Locks: called with desc_lock held
+ */
+static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan,
+	unsigned long flags, dma_addr_t *dst, dma_addr_t *src, size_t *len,
+	struct shdma_desc **first, enum dma_transfer_direction direction)
+{
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_desc *new;
+	size_t copy_size = *len;
+
+	if (!copy_size)
+		return NULL;
+
+	/* Allocate the link descriptor from the free list */
+	new = shdma_get_desc(schan);
+	if (!new) {
+		dev_err(schan->dev, "No free link descriptor available\n");
+		return NULL;
+	}
+
+	ops->desc_setup(schan, new, *src, *dst, &copy_size);
+
+	if (!*first) {
+		/* First desc */
+		new->async_tx.cookie = -EBUSY;
+		*first = new;
+	} else {
+		/* Other desc - invisible to the user */
+		new->async_tx.cookie = -EINVAL;
+	}
+
+	dev_dbg(schan->dev,
+		"chaining (%zu/%zu)@%pad -> %pad with %p, cookie %d\n",
+		copy_size, *len, src, dst, &new->async_tx,
+		new->async_tx.cookie);
+
+	new->mark = DESC_PREPARED;
+	new->async_tx.flags = flags;
+	new->direction = direction;
+	new->partial = 0;
+
+	*len -= copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_MEM_TO_DEV)
+		*src += copy_size;
+	if (direction == DMA_MEM_TO_MEM || direction == DMA_DEV_TO_MEM)
+		*dst += copy_size;
+
+	return new;
+}
+
+/*
+ * shdma_prep_sg - prepare transfer descriptors from an SG list
+ *
+ * Common routine for public (MEMCPY) and slave DMA. The MEMCPY case is also
+ * converted to scatter-gather to guarantee consistent locking and a correct
+ * list manipulation. For slave DMA direction carries the usual meaning, and,
+ * logically, the SG list is RAM and the addr variable contains slave address,
+ * e.g., the FIFO I/O register. For MEMCPY direction equals DMA_MEM_TO_MEM
+ * and the SG list contains only one element and points at the source buffer.
+ */
+static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
+	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
+	enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
+{
+	struct scatterlist *sg;
+	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
+	LIST_HEAD(tx_list);
+	int chunks = 0;
+	unsigned long irq_flags;
+	int i;
+
+	for_each_sg(sgl, sg, sg_len, i)
+		chunks += DIV_ROUND_UP(sg_dma_len(sg), schan->max_xfer_len);
+
+	/* Have to lock the whole loop to protect against concurrent release */
+	spin_lock_irqsave(&schan->chan_lock, irq_flags);
+
+	/*
+	 * Chaining:
+	 * first descriptor is what user is dealing with in all API calls, its
+	 *	cookie is at first set to -EBUSY, at tx-submit to a positive
+	 *	number
+	 * if more than one chunk is needed further chunks have cookie = -EINVAL
+	 * the last chunk, if not equal to the first, has cookie = -ENOSPC
+	 * all chunks are linked onto the tx_list head with their .node heads
+	 *	only during this function, then they are immediately spliced
+	 *	back onto the free list in form of a chain
+	 */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		if (!len)
+			goto err_get_desc;
+
+		do {
+			dev_dbg(schan->dev, "Add SG #%d@%p[%zu], dma %pad\n",
+				i, sg, len, &sg_addr);
+
+			if (direction == DMA_DEV_TO_MEM)
+				new = shdma_add_desc(schan, flags,
+						&sg_addr, addr, &len, &first,
+						direction);
+			else
+				new = shdma_add_desc(schan, flags,
+						addr, &sg_addr, &len, &first,
+						direction);
+			if (!new)
+				goto err_get_desc;
+
+			new->cyclic = cyclic;
+			if (cyclic)
+				new->chunks = 1;
+			else
+				new->chunks = chunks--;
+			list_add_tail(&new->node, &tx_list);
+		} while (len);
+	}
+
+	if (new != first)
+		new->async_tx.cookie = -ENOSPC;
+
+	/* Put them back on the free list, so, they don't get lost */
+	list_splice_tail(&tx_list, &schan->ld_free);
+
+	spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+	return &first->async_tx;
+
+err_get_desc:
+	list_for_each_entry(new, &tx_list, node)
+		new->mark = DESC_IDLE;
+	list_splice(&tx_list, &schan->ld_free);
+
+	spin_unlock_irqrestore(&schan->chan_lock, irq_flags);
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_memcpy(
+	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct scatterlist sg;
+
+	if (!chan || !len)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	sg_init_table(&sg, 1);
+	sg_set_page(&sg, pfn_to_page(PFN_DOWN(dma_src)), len,
+		    offset_in_page(dma_src));
+	sg_dma_address(&sg) = dma_src;
+	sg_dma_len(&sg) = len;
+
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			     flags, false);
+}
+
+static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || !sg_len) {
+		dev_warn(schan->dev, "%s: bad parameter: len=%d, id=%d\n",
+			 __func__, sg_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, false);
+}
+
+#define SHDMA_MAX_SG_LEN 32
+
+static struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	struct dma_async_tx_descriptor *desc;
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned int sg_len = buf_len / period_len;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+	struct scatterlist *sgl;
+	int i;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	if (sg_len > SHDMA_MAX_SG_LEN) {
+		dev_err(schan->dev, "sg length %d exceds limit %d",
+				sg_len, SHDMA_MAX_SG_LEN);
+		return NULL;
+	}
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || (buf_len < period_len)) {
+		dev_warn(schan->dev,
+			"%s: bad parameter: buf_len=%zu, period_len=%zu, id=%d\n",
+			__func__, buf_len, period_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	/*
+	 * Allocate the sg list dynamically as it would consumer too much stack
+	 * space.
+	 */
+	sgl = kcalloc(sg_len, sizeof(*sgl), GFP_KERNEL);
+	if (!sgl)
+		return NULL;
+
+	sg_init_table(sgl, sg_len);
+
+	for (i = 0; i < sg_len; i++) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	desc = shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, true);
+
+	kfree(sgl);
+	return desc;
+}
+
+static int shdma_terminate_all(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(chan->device);
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->chan_lock, flags);
+	ops->halt_channel(schan);
+
+	if (ops->get_partial && !list_empty(&schan->ld_queue)) {
+		/* Record partial transfer */
+		struct shdma_desc *desc = list_first_entry(&schan->ld_queue,
+							   struct shdma_desc, node);
+		desc->partial = ops->get_partial(schan, desc);
+	}
+
+	spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+	shdma_chan_ld_cleanup(schan, true);
+
+	return 0;
+}
+
+static int shdma_config(struct dma_chan *chan,
+			struct dma_slave_config *config)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+
+	/*
+	 * So far only .slave_id is used, but the slave drivers are
+	 * encouraged to also set a transfer direction and an address.
+	 */
+	if (!config)
+		return -EINVAL;
+
+	/*
+	 * overriding the slave_id through dma_slave_config is deprecated,
+	 * but possibly some out-of-tree drivers still do it.
+	 */
+	if (WARN_ON_ONCE(config->slave_id &&
+			 config->slave_id != schan->real_slave_id))
+		schan->real_slave_id = config->slave_id;
+
+	/*
+	 * We could lock this, but you shouldn't be configuring the
+	 * channel, while using it...
+	 */
+	return shdma_setup_slave(schan,
+				 config->direction == DMA_DEV_TO_MEM ?
+				 config->src_addr : config->dst_addr);
+}
+
+static void shdma_issue_pending(struct dma_chan *chan)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+
+	spin_lock_irq(&schan->chan_lock);
+	if (schan->pm_state == SHDMA_PM_ESTABLISHED)
+		shdma_chan_xfer_ld_queue(schan);
+	else
+		schan->pm_state = SHDMA_PM_PENDING;
+	spin_unlock_irq(&schan->chan_lock);
+}
+
+static enum dma_status shdma_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	enum dma_status status;
+	unsigned long flags;
+
+	shdma_chan_ld_cleanup(schan, false);
+
+	spin_lock_irqsave(&schan->chan_lock, flags);
+
+	status = dma_cookie_status(chan, cookie, txstate);
+
+	/*
+	 * If we don't find cookie on the queue, it has been aborted and we have
+	 * to report error
+	 */
+	if (status != DMA_COMPLETE) {
+		struct shdma_desc *sdesc;
+		status = DMA_ERROR;
+		list_for_each_entry(sdesc, &schan->ld_queue, node)
+			if (sdesc->cookie == cookie) {
+				status = DMA_IN_PROGRESS;
+				break;
+			}
+	}
+
+	spin_unlock_irqrestore(&schan->chan_lock, flags);
+
+	return status;
+}
+
+/* Called from error IRQ or NMI */
+bool shdma_reset(struct shdma_dev *sdev)
+{
+	const struct shdma_ops *ops = sdev->ops;
+	struct shdma_chan *schan;
+	unsigned int handled = 0;
+	int i;
+
+	/* Reset all channels */
+	shdma_for_each_chan(schan, sdev, i) {
+		struct shdma_desc *sdesc;
+		LIST_HEAD(dl);
+
+		if (!schan)
+			continue;
+
+		spin_lock(&schan->chan_lock);
+
+		/* Stop the channel */
+		ops->halt_channel(schan);
+
+		list_splice_init(&schan->ld_queue, &dl);
+
+		if (!list_empty(&dl)) {
+			dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
+			pm_runtime_put(schan->dev);
+		}
+		schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+		spin_unlock(&schan->chan_lock);
+
+		/* Complete all  */
+		list_for_each_entry(sdesc, &dl, node) {
+			struct dma_async_tx_descriptor *tx = &sdesc->async_tx;
+
+			sdesc->mark = DESC_IDLE;
+			dmaengine_desc_get_callback_invoke(tx, NULL);
+		}
+
+		spin_lock(&schan->chan_lock);
+		list_splice(&dl, &schan->ld_free);
+		spin_unlock(&schan->chan_lock);
+
+		handled++;
+	}
+
+	return !!handled;
+}
+EXPORT_SYMBOL(shdma_reset);
+
+static irqreturn_t chan_irq(int irq, void *dev)
+{
+	struct shdma_chan *schan = dev;
+	const struct shdma_ops *ops =
+		to_shdma_dev(schan->dma_chan.device)->ops;
+	irqreturn_t ret;
+
+	spin_lock(&schan->chan_lock);
+
+	ret = ops->chan_irq(schan, irq) ? IRQ_WAKE_THREAD : IRQ_NONE;
+
+	spin_unlock(&schan->chan_lock);
+
+	return ret;
+}
+
+static irqreturn_t chan_irqt(int irq, void *dev)
+{
+	struct shdma_chan *schan = dev;
+	const struct shdma_ops *ops =
+		to_shdma_dev(schan->dma_chan.device)->ops;
+	struct shdma_desc *sdesc;
+
+	spin_lock_irq(&schan->chan_lock);
+	list_for_each_entry(sdesc, &schan->ld_queue, node) {
+		if (sdesc->mark == DESC_SUBMITTED &&
+		    ops->desc_completed(schan, sdesc)) {
+			dev_dbg(schan->dev, "done #%d@%p\n",
+				sdesc->async_tx.cookie, &sdesc->async_tx);
+			sdesc->mark = DESC_COMPLETED;
+			break;
+		}
+	}
+	/* Next desc */
+	shdma_chan_xfer_ld_queue(schan);
+	spin_unlock_irq(&schan->chan_lock);
+
+	shdma_chan_ld_cleanup(schan, false);
+
+	return IRQ_HANDLED;
+}
+
+int shdma_request_irq(struct shdma_chan *schan, int irq,
+			   unsigned long flags, const char *name)
+{
+	int ret = devm_request_threaded_irq(schan->dev, irq, chan_irq,
+					    chan_irqt, flags, name, schan);
+
+	schan->irq = ret < 0 ? ret : irq;
+
+	return ret;
+}
+EXPORT_SYMBOL(shdma_request_irq);
+
+void shdma_chan_probe(struct shdma_dev *sdev,
+			   struct shdma_chan *schan, int id)
+{
+	schan->pm_state = SHDMA_PM_ESTABLISHED;
+
+	/* reference struct dma_device */
+	schan->dma_chan.device = &sdev->dma_dev;
+	dma_cookie_init(&schan->dma_chan);
+
+	schan->dev = sdev->dma_dev.dev;
+	schan->id = id;
+
+	if (!schan->max_xfer_len)
+		schan->max_xfer_len = PAGE_SIZE;
+
+	spin_lock_init(&schan->chan_lock);
+
+	/* Init descripter manage list */
+	INIT_LIST_HEAD(&schan->ld_queue);
+	INIT_LIST_HEAD(&schan->ld_free);
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&schan->dma_chan.device_node,
+			&sdev->dma_dev.channels);
+	sdev->schan[id] = schan;
+}
+EXPORT_SYMBOL(shdma_chan_probe);
+
+void shdma_chan_remove(struct shdma_chan *schan)
+{
+	list_del(&schan->dma_chan.device_node);
+}
+EXPORT_SYMBOL(shdma_chan_remove);
+
+int shdma_init(struct device *dev, struct shdma_dev *sdev,
+		    int chan_num)
+{
+	struct dma_device *dma_dev = &sdev->dma_dev;
+
+	/*
+	 * Require all call-backs for now, they can trivially be made optional
+	 * later as required
+	 */
+	if (!sdev->ops ||
+	    !sdev->desc_size ||
+	    !sdev->ops->embedded_desc ||
+	    !sdev->ops->start_xfer ||
+	    !sdev->ops->setup_xfer ||
+	    !sdev->ops->set_slave ||
+	    !sdev->ops->desc_setup ||
+	    !sdev->ops->slave_addr ||
+	    !sdev->ops->channel_busy ||
+	    !sdev->ops->halt_channel ||
+	    !sdev->ops->desc_completed)
+		return -EINVAL;
+
+	sdev->schan = kcalloc(chan_num, sizeof(*sdev->schan), GFP_KERNEL);
+	if (!sdev->schan)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* Common and MEMCPY operations */
+	dma_dev->device_alloc_chan_resources
+		= shdma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = shdma_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = shdma_prep_memcpy;
+	dma_dev->device_tx_status = shdma_tx_status;
+	dma_dev->device_issue_pending = shdma_issue_pending;
+
+	/* Compulsory for DMA_SLAVE fields */
+	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
+	dma_dev->device_config = shdma_config;
+	dma_dev->device_terminate_all = shdma_terminate_all;
+
+	dma_dev->dev = dev;
+
+	return 0;
+}
+EXPORT_SYMBOL(shdma_init);
+
+void shdma_cleanup(struct shdma_dev *sdev)
+{
+	kfree(sdev->schan);
+}
+EXPORT_SYMBOL(shdma_cleanup);
+
+static int __init shdma_enter(void)
+{
+	shdma_slave_used = kcalloc(DIV_ROUND_UP(slave_num, BITS_PER_LONG),
+				   sizeof(long),
+				   GFP_KERNEL);
+	if (!shdma_slave_used)
+		return -ENOMEM;
+	return 0;
+}
+module_init(shdma_enter);
+
+static void __exit shdma_exit(void)
+{
+	kfree(shdma_slave_used);
+}
+module_exit(shdma_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SH-DMA driver base library");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
diff --git a/drivers/dma/sh/shdma-of.c b/drivers/dma/sh/shdma-of.c
new file mode 100644
index 0000000..f999f9b
--- /dev/null
+++ b/drivers/dma/sh/shdma-of.c
@@ -0,0 +1,79 @@
+/*
+ * SHDMA Device Tree glue
+ *
+ * Copyright (C) 2013 Renesas Electronics Inc.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/shdma-base.h>
+
+#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
+
+static struct dma_chan *shdma_of_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *ofdma)
+{
+	u32 id = dma_spec->args[0];
+	dma_cap_mask_t mask;
+	struct dma_chan *chan;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	dma_cap_zero(mask);
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, shdma_chan_filter,
+				   (void *)(uintptr_t)id);
+	if (chan)
+		to_shdma_chan(chan)->hw_req = id;
+
+	return chan;
+}
+
+static int shdma_of_probe(struct platform_device *pdev)
+{
+	const struct of_dev_auxdata *lookup = dev_get_platdata(&pdev->dev);
+	int ret;
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 shdma_of_xlate, pdev);
+	if (ret < 0)
+		return ret;
+
+	ret = of_platform_populate(pdev->dev.of_node, NULL, lookup, &pdev->dev);
+	if (ret < 0)
+		of_dma_controller_free(pdev->dev.of_node);
+
+	return ret;
+}
+
+static const struct of_device_id shdma_of_match[] = {
+	{ .compatible = "renesas,shdma-mux", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
+
+static struct platform_driver shdma_of = {
+	.driver		= {
+		.name	= "shdma-of",
+		.of_match_table = shdma_of_match,
+	},
+	.probe		= shdma_of_probe,
+};
+
+module_platform_driver(shdma_of);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("SH-DMA driver DT glue");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
diff --git a/drivers/dma/sh/shdma-r8a73a4.c b/drivers/dma/sh/shdma-r8a73a4.c
new file mode 100644
index 0000000..96ea382
--- /dev/null
+++ b/drivers/dma/sh/shdma-r8a73a4.c
@@ -0,0 +1,77 @@
+/*
+ * Renesas SuperH DMA Engine support for r8a73a4 (APE6) SoCs
+ *
+ * Copyright (C) 2013 Renesas Electronics, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of version 2 the GNU General Public License as published by the Free
+ * Software Foundation.
+ */
+#include <linux/sh_dma.h>
+
+#include "shdma-arm.h"
+
+static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
+
+static const struct sh_dmae_slave_config dma_slaves[] = {
+	{
+		.chcr		= CHCR_TX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xd1,		/* MMC0 Tx */
+	}, {
+		.chcr		= CHCR_RX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xd2,		/* MMC0 Rx */
+	}, {
+		.chcr		= CHCR_TX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xe1,		/* MMC1 Tx */
+	}, {
+		.chcr		= CHCR_RX(XMIT_SZ_32BIT),
+		.mid_rid	= 0xe2,		/* MMC1 Rx */
+	},
+};
+
+#define DMAE_CHANNEL(a, b)				\
+	{						\
+		.offset         = (a) - 0x20,		\
+		.dmars          = (a) - 0x20 + 0x40,	\
+		.chclr_bit	= (b),			\
+		.chclr_offset	= 0x80 - 0x20,		\
+	}
+
+static const struct sh_dmae_channel dma_channels[] = {
+	DMAE_CHANNEL(0x8000, 0),
+	DMAE_CHANNEL(0x8080, 1),
+	DMAE_CHANNEL(0x8100, 2),
+	DMAE_CHANNEL(0x8180, 3),
+	DMAE_CHANNEL(0x8200, 4),
+	DMAE_CHANNEL(0x8280, 5),
+	DMAE_CHANNEL(0x8300, 6),
+	DMAE_CHANNEL(0x8380, 7),
+	DMAE_CHANNEL(0x8400, 8),
+	DMAE_CHANNEL(0x8480, 9),
+	DMAE_CHANNEL(0x8500, 10),
+	DMAE_CHANNEL(0x8580, 11),
+	DMAE_CHANNEL(0x8600, 12),
+	DMAE_CHANNEL(0x8680, 13),
+	DMAE_CHANNEL(0x8700, 14),
+	DMAE_CHANNEL(0x8780, 15),
+	DMAE_CHANNEL(0x8800, 16),
+	DMAE_CHANNEL(0x8880, 17),
+	DMAE_CHANNEL(0x8900, 18),
+	DMAE_CHANNEL(0x8980, 19),
+};
+
+const struct sh_dmae_pdata r8a73a4_dma_pdata = {
+	.slave		= dma_slaves,
+	.slave_num	= ARRAY_SIZE(dma_slaves),
+	.channel	= dma_channels,
+	.channel_num	= ARRAY_SIZE(dma_channels),
+	.ts_low_shift	= TS_LOW_SHIFT,
+	.ts_low_mask	= TS_LOW_BIT << TS_LOW_SHIFT,
+	.ts_high_shift	= TS_HI_SHIFT,
+	.ts_high_mask	= TS_HI_BIT << TS_HI_SHIFT,
+	.ts_shift	= dma_ts_shift,
+	.ts_shift_num	= ARRAY_SIZE(dma_ts_shift),
+	.dmaor_init     = DMAOR_DME,
+	.chclr_present	= 1,
+	.chclr_bitwise	= 1,
+};
diff --git a/drivers/dma/sh/shdma.h b/drivers/dma/sh/shdma.h
new file mode 100644
index 0000000..2c0a969
--- /dev/null
+++ b/drivers/dma/sh/shdma.h
@@ -0,0 +1,72 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_SHDMA_H
+#define __DMA_SHDMA_H
+
+#include <linux/sh_dma.h>
+#include <linux/shdma-base.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+
+#define SH_DMAE_MAX_CHANNELS 20
+#define SH_DMAE_TCR_MAX 0x00FFFFFF	/* 16MB */
+
+struct device;
+
+struct sh_dmae_chan {
+	struct shdma_chan shdma_chan;
+	const struct sh_dmae_slave_config *config; /* Slave DMA configuration */
+	int xmit_shift;			/* log_2(bytes_per_xfer) */
+	void __iomem *base;
+	char dev_id[16];		/* unique name per DMAC of channel */
+	int pm_error;
+	dma_addr_t slave_addr;
+};
+
+struct sh_dmae_device {
+	struct shdma_dev shdma_dev;
+	struct sh_dmae_chan *chan[SH_DMAE_MAX_CHANNELS];
+	const struct sh_dmae_pdata *pdata;
+	struct list_head node;
+	void __iomem *chan_reg;
+	void __iomem *dmars;
+	unsigned int chcr_offset;
+	u32 chcr_ie_bit;
+};
+
+struct sh_dmae_regs {
+	u32 sar; /* SAR / source address */
+	u32 dar; /* DAR / destination address */
+	u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_dmae_desc {
+	struct sh_dmae_regs hw;
+	struct shdma_desc shdma_desc;
+};
+
+#define to_sh_chan(chan) container_of(chan, struct sh_dmae_chan, shdma_chan)
+#define to_sh_desc(lh) container_of(lh, struct sh_desc, node)
+#define tx_to_sh_desc(tx) container_of(tx, struct sh_desc, async_tx)
+#define to_sh_dev(chan) container_of(chan->shdma_chan.dma_chan.device,\
+				     struct sh_dmae_device, shdma_dev.dma_dev)
+
+#ifdef CONFIG_SH_DMAE_R8A73A4
+extern const struct sh_dmae_pdata r8a73a4_dma_pdata;
+#define r8a73a4_shdma_devid (&r8a73a4_dma_pdata)
+#else
+#define r8a73a4_shdma_devid NULL
+#endif
+
+#endif	/* __DMA_SHDMA_H */
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c
new file mode 100644
index 0000000..04a74e0
--- /dev/null
+++ b/drivers/dma/sh/shdmac.c
@@ -0,0 +1,949 @@
+/*
+ * Renesas SuperH DMA Engine support
+ *
+ * base is drivers/dma/flsdma.c
+ *
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * - DMA of SuperH does not have Hardware DMA chain mode.
+ * - MAX DMA size is 16MB.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kdebug.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/rculist.h>
+#include <linux/sh_dma.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "shdma.h"
+
+/* DMA registers */
+#define SAR	0x00	/* Source Address Register */
+#define DAR	0x04	/* Destination Address Register */
+#define TCR	0x08	/* Transfer Count Register */
+#define CHCR	0x0C	/* Channel Control Register */
+#define DMAOR	0x40	/* DMA Operation Register */
+
+#define TEND	0x18 /* USB-DMAC */
+
+#define SH_DMAE_DRV_NAME "sh-dma-engine"
+
+/* Default MEMCPY transfer size = 2^2 = 4 bytes */
+#define LOG2_DEFAULT_XFER_SIZE	2
+#define SH_DMA_SLAVE_NUMBER 256
+#define SH_DMA_TCR_MAX (16 * 1024 * 1024 - 1)
+
+/*
+ * Used for write-side mutual exclusion for the global device list,
+ * read-side synchronization by way of RCU, and per-controller data.
+ */
+static DEFINE_SPINLOCK(sh_dmae_lock);
+static LIST_HEAD(sh_dmae_devices);
+
+/*
+ * Different DMAC implementations provide different ways to clear DMA channels:
+ * (1) none - no CHCLR registers are available
+ * (2) one CHCLR register per channel - 0 has to be written to it to clear
+ *     channel buffers
+ * (3) one CHCLR per several channels - 1 has to be written to the bit,
+ *     corresponding to the specific channel to reset it
+ */
+static void channel_clear(struct sh_dmae_chan *sh_dc)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+	const struct sh_dmae_channel *chan_pdata = shdev->pdata->channel +
+		sh_dc->shdma_chan.id;
+	u32 val = shdev->pdata->chclr_bitwise ? 1 << chan_pdata->chclr_bit : 0;
+
+	__raw_writel(val, shdev->chan_reg + chan_pdata->chclr_offset);
+}
+
+static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg)
+{
+	__raw_writel(data, sh_dc->base + reg);
+}
+
+static u32 sh_dmae_readl(struct sh_dmae_chan *sh_dc, u32 reg)
+{
+	return __raw_readl(sh_dc->base + reg);
+}
+
+static u16 dmaor_read(struct sh_dmae_device *shdev)
+{
+	void __iomem *addr = shdev->chan_reg + DMAOR;
+
+	if (shdev->pdata->dmaor_is_32bit)
+		return __raw_readl(addr);
+	else
+		return __raw_readw(addr);
+}
+
+static void dmaor_write(struct sh_dmae_device *shdev, u16 data)
+{
+	void __iomem *addr = shdev->chan_reg + DMAOR;
+
+	if (shdev->pdata->dmaor_is_32bit)
+		__raw_writel(data, addr);
+	else
+		__raw_writew(data, addr);
+}
+
+static void chcr_write(struct sh_dmae_chan *sh_dc, u32 data)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	__raw_writel(data, sh_dc->base + shdev->chcr_offset);
+}
+
+static u32 chcr_read(struct sh_dmae_chan *sh_dc)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_dc);
+
+	return __raw_readl(sh_dc->base + shdev->chcr_offset);
+}
+
+/*
+ * Reset DMA controller
+ *
+ * SH7780 has two DMAOR register
+ */
+static void sh_dmae_ctl_stop(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev);
+	dmaor_write(shdev, dmaor & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME));
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+}
+
+static int sh_dmae_rst(struct sh_dmae_device *shdev)
+{
+	unsigned short dmaor;
+	unsigned long flags;
+
+	spin_lock_irqsave(&sh_dmae_lock, flags);
+
+	dmaor = dmaor_read(shdev) & ~(DMAOR_NMIF | DMAOR_AE | DMAOR_DME);
+
+	if (shdev->pdata->chclr_present) {
+		int i;
+		for (i = 0; i < shdev->pdata->channel_num; i++) {
+			struct sh_dmae_chan *sh_chan = shdev->chan[i];
+			if (sh_chan)
+				channel_clear(sh_chan);
+		}
+	}
+
+	dmaor_write(shdev, dmaor | shdev->pdata->dmaor_init);
+
+	dmaor = dmaor_read(shdev);
+
+	spin_unlock_irqrestore(&sh_dmae_lock, flags);
+
+	if (dmaor & (DMAOR_AE | DMAOR_NMIF)) {
+		dev_warn(shdev->shdma_dev.dma_dev.dev, "Can't initialize DMAOR.\n");
+		return -EIO;
+	}
+	if (shdev->pdata->dmaor_init & ~dmaor)
+		dev_warn(shdev->shdma_dev.dma_dev.dev,
+			 "DMAOR=0x%x hasn't latched the initial value 0x%x.\n",
+			 dmaor, shdev->pdata->dmaor_init);
+	return 0;
+}
+
+static bool dmae_is_busy(struct sh_dmae_chan *sh_chan)
+{
+	u32 chcr = chcr_read(sh_chan);
+
+	if ((chcr & (CHCR_DE | CHCR_TE)) == CHCR_DE)
+		return true; /* working */
+
+	return false; /* waiting */
+}
+
+static unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan, u32 chcr)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
+	int cnt = ((chcr & pdata->ts_low_mask) >> pdata->ts_low_shift) |
+		((chcr & pdata->ts_high_mask) >> pdata->ts_high_shift);
+
+	if (cnt >= pdata->ts_shift_num)
+		cnt = 0;
+
+	return pdata->ts_shift[cnt];
+}
+
+static u32 log2size_to_chcr(struct sh_dmae_chan *sh_chan, int l2size)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
+	int i;
+
+	for (i = 0; i < pdata->ts_shift_num; i++)
+		if (pdata->ts_shift[i] == l2size)
+			break;
+
+	if (i == pdata->ts_shift_num)
+		i = 0;
+
+	return ((i << pdata->ts_low_shift) & pdata->ts_low_mask) |
+		((i << pdata->ts_high_shift) & pdata->ts_high_mask);
+}
+
+static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw)
+{
+	sh_dmae_writel(sh_chan, hw->sar, SAR);
+	sh_dmae_writel(sh_chan, hw->dar, DAR);
+	sh_dmae_writel(sh_chan, hw->tcr >> sh_chan->xmit_shift, TCR);
+}
+
+static void dmae_start(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	if (shdev->pdata->needs_tend_set)
+		sh_dmae_writel(sh_chan, 0xFFFFFFFF, TEND);
+
+	chcr |= CHCR_DE | shdev->chcr_ie_bit;
+	chcr_write(sh_chan, chcr & ~CHCR_TE);
+}
+
+static void dmae_init(struct sh_dmae_chan *sh_chan)
+{
+	/*
+	 * Default configuration for dual address memory-memory transfer.
+	 */
+	u32 chcr = DM_INC | SM_INC | RS_AUTO | log2size_to_chcr(sh_chan,
+						   LOG2_DEFAULT_XFER_SIZE);
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, chcr);
+	chcr_write(sh_chan, chcr);
+}
+
+static int dmae_set_chcr(struct sh_dmae_chan *sh_chan, u32 val)
+{
+	/* If DMA is active, cannot set CHCR. TODO: remove this superfluous check */
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	sh_chan->xmit_shift = calc_xmit_shift(sh_chan, val);
+	chcr_write(sh_chan, val);
+
+	return 0;
+}
+
+static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->shdma_chan.id];
+	void __iomem *addr = shdev->dmars;
+	unsigned int shift = chan_pdata->dmars_bit;
+
+	if (dmae_is_busy(sh_chan))
+		return -EBUSY;
+
+	if (pdata->no_dmars)
+		return 0;
+
+	/* in the case of a missing DMARS resource use first memory window */
+	if (!addr)
+		addr = shdev->chan_reg;
+	addr += chan_pdata->dmars;
+
+	__raw_writew((__raw_readw(addr) & (0xff00 >> shift)) | (val << shift),
+		     addr);
+
+	return 0;
+}
+
+static void sh_dmae_start_xfer(struct shdma_chan *schan,
+			       struct shdma_desc *sdesc)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+	dev_dbg(sh_chan->shdma_chan.dev, "Queue #%d to %d: %u@%x -> %x\n",
+		sdesc->async_tx.cookie, sh_chan->shdma_chan.id,
+		sh_desc->hw.tcr, sh_desc->hw.sar, sh_desc->hw.dar);
+	/* Get the ld start address from ld_queue */
+	dmae_set_reg(sh_chan, &sh_desc->hw);
+	dmae_start(sh_chan);
+}
+
+static bool sh_dmae_channel_busy(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	return dmae_is_busy(sh_chan);
+}
+
+static void sh_dmae_setup_xfer(struct shdma_chan *schan,
+			       int slave_id)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+
+	if (slave_id >= 0) {
+		const struct sh_dmae_slave_config *cfg =
+			sh_chan->config;
+
+		dmae_set_dmars(sh_chan, cfg->mid_rid);
+		dmae_set_chcr(sh_chan, cfg->chcr);
+	} else {
+		dmae_init(sh_chan);
+	}
+}
+
+/*
+ * Find a slave channel configuration from the contoller list by either a slave
+ * ID in the non-DT case, or by a MID/RID value in the DT case
+ */
+static const struct sh_dmae_slave_config *dmae_find_slave(
+	struct sh_dmae_chan *sh_chan, int match)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	const struct sh_dmae_pdata *pdata = shdev->pdata;
+	const struct sh_dmae_slave_config *cfg;
+	int i;
+
+	if (!sh_chan->shdma_chan.dev->of_node) {
+		if (match >= SH_DMA_SLAVE_NUMBER)
+			return NULL;
+
+		for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+			if (cfg->slave_id == match)
+				return cfg;
+	} else {
+		for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+			if (cfg->mid_rid == match) {
+				sh_chan->shdma_chan.slave_id = i;
+				return cfg;
+			}
+	}
+
+	return NULL;
+}
+
+static int sh_dmae_set_slave(struct shdma_chan *schan,
+			     int slave_id, dma_addr_t slave_addr, bool try)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	const struct sh_dmae_slave_config *cfg = dmae_find_slave(sh_chan, slave_id);
+	if (!cfg)
+		return -ENXIO;
+
+	if (!try) {
+		sh_chan->config = cfg;
+		sh_chan->slave_addr = slave_addr ? : cfg->addr;
+	}
+
+	return 0;
+}
+
+static void dmae_halt(struct sh_dmae_chan *sh_chan)
+{
+	struct sh_dmae_device *shdev = to_sh_dev(sh_chan);
+	u32 chcr = chcr_read(sh_chan);
+
+	chcr &= ~(CHCR_DE | CHCR_TE | shdev->chcr_ie_bit);
+	chcr_write(sh_chan, chcr);
+}
+
+static int sh_dmae_desc_setup(struct shdma_chan *schan,
+			      struct shdma_desc *sdesc,
+			      dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+
+	if (*len > schan->max_xfer_len)
+		*len = schan->max_xfer_len;
+
+	sh_desc->hw.sar = src;
+	sh_desc->hw.dar = dst;
+	sh_desc->hw.tcr = *len;
+
+	return 0;
+}
+
+static void sh_dmae_halt(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	dmae_halt(sh_chan);
+}
+
+static bool sh_dmae_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+
+	if (!(chcr_read(sh_chan) & CHCR_TE))
+		return false;
+
+	/* DMA stop */
+	dmae_halt(sh_chan);
+
+	return true;
+}
+
+static size_t sh_dmae_get_partial(struct shdma_chan *schan,
+				  struct shdma_desc *sdesc)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan, struct sh_dmae_chan,
+						    shdma_chan);
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+	return sh_desc->hw.tcr -
+		(sh_dmae_readl(sh_chan, TCR) << sh_chan->xmit_shift);
+}
+
+/* Called from error IRQ or NMI */
+static bool sh_dmae_reset(struct sh_dmae_device *shdev)
+{
+	bool ret;
+
+	/* halt the dma controller */
+	sh_dmae_ctl_stop(shdev);
+
+	/* We cannot detect, which channel caused the error, have to reset all */
+	ret = shdma_reset(&shdev->shdma_dev);
+
+	sh_dmae_rst(shdev);
+
+	return ret;
+}
+
+static irqreturn_t sh_dmae_err(int irq, void *data)
+{
+	struct sh_dmae_device *shdev = data;
+
+	if (!(dmaor_read(shdev) & DMAOR_AE))
+		return IRQ_NONE;
+
+	sh_dmae_reset(shdev);
+	return IRQ_HANDLED;
+}
+
+static bool sh_dmae_desc_completed(struct shdma_chan *schan,
+				   struct shdma_desc *sdesc)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan,
+					struct sh_dmae_chan, shdma_chan);
+	struct sh_dmae_desc *sh_desc = container_of(sdesc,
+					struct sh_dmae_desc, shdma_desc);
+	u32 sar_buf = sh_dmae_readl(sh_chan, SAR);
+	u32 dar_buf = sh_dmae_readl(sh_chan, DAR);
+
+	return	(sdesc->direction == DMA_DEV_TO_MEM &&
+		 (sh_desc->hw.dar + sh_desc->hw.tcr) == dar_buf) ||
+		(sdesc->direction != DMA_DEV_TO_MEM &&
+		 (sh_desc->hw.sar + sh_desc->hw.tcr) == sar_buf);
+}
+
+static bool sh_dmae_nmi_notify(struct sh_dmae_device *shdev)
+{
+	/* Fast path out if NMIF is not asserted for this controller */
+	if ((dmaor_read(shdev) & DMAOR_NMIF) == 0)
+		return false;
+
+	return sh_dmae_reset(shdev);
+}
+
+static int sh_dmae_nmi_handler(struct notifier_block *self,
+			       unsigned long cmd, void *data)
+{
+	struct sh_dmae_device *shdev;
+	int ret = NOTIFY_DONE;
+	bool triggered;
+
+	/*
+	 * Only concern ourselves with NMI events.
+	 *
+	 * Normally we would check the die chain value, but as this needs
+	 * to be architecture independent, check for NMI context instead.
+	 */
+	if (!in_nmi())
+		return NOTIFY_DONE;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(shdev, &sh_dmae_devices, node) {
+		/*
+		 * Only stop if one of the controllers has NMIF asserted,
+		 * we do not want to interfere with regular address error
+		 * handling or NMI events that don't concern the DMACs.
+		 */
+		triggered = sh_dmae_nmi_notify(shdev);
+		if (triggered == true)
+			ret = NOTIFY_OK;
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static struct notifier_block sh_dmae_nmi_notifier __read_mostly = {
+	.notifier_call	= sh_dmae_nmi_handler,
+
+	/* Run before NMI debug handler and KGDB */
+	.priority	= 1,
+};
+
+static int sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
+					int irq, unsigned long flags)
+{
+	const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+	struct shdma_dev *sdev = &shdev->shdma_dev;
+	struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev);
+	struct sh_dmae_chan *sh_chan;
+	struct shdma_chan *schan;
+	int err;
+
+	sh_chan = devm_kzalloc(sdev->dma_dev.dev, sizeof(struct sh_dmae_chan),
+			       GFP_KERNEL);
+	if (!sh_chan)
+		return -ENOMEM;
+
+	schan = &sh_chan->shdma_chan;
+	schan->max_xfer_len = SH_DMA_TCR_MAX + 1;
+
+	shdma_chan_probe(sdev, schan, id);
+
+	sh_chan->base = shdev->chan_reg + chan_pdata->offset;
+
+	/* set up channel irq */
+	if (pdev->id >= 0)
+		snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+			 "sh-dmae%d.%d", pdev->id, id);
+	else
+		snprintf(sh_chan->dev_id, sizeof(sh_chan->dev_id),
+			 "sh-dma%d", id);
+
+	err = shdma_request_irq(schan, irq, flags, sh_chan->dev_id);
+	if (err) {
+		dev_err(sdev->dma_dev.dev,
+			"DMA channel %d request_irq error %d\n",
+			id, err);
+		goto err_no_irq;
+	}
+
+	shdev->chan[id] = sh_chan;
+	return 0;
+
+err_no_irq:
+	/* remove from dmaengine device node */
+	shdma_chan_remove(schan);
+	return err;
+}
+
+static void sh_dmae_chan_remove(struct sh_dmae_device *shdev)
+{
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &shdev->shdma_dev, i) {
+		BUG_ON(!schan);
+
+		shdma_chan_remove(schan);
+	}
+}
+
+#ifdef CONFIG_PM
+static int sh_dmae_runtime_suspend(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+	sh_dmae_ctl_stop(shdev);
+	return 0;
+}
+
+static int sh_dmae_runtime_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+	return sh_dmae_rst(shdev);
+}
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+static int sh_dmae_suspend(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+
+	sh_dmae_ctl_stop(shdev);
+	return 0;
+}
+
+static int sh_dmae_resume(struct device *dev)
+{
+	struct sh_dmae_device *shdev = dev_get_drvdata(dev);
+	int i, ret;
+
+	ret = sh_dmae_rst(shdev);
+	if (ret < 0)
+		dev_err(dev, "Failed to reset!\n");
+
+	for (i = 0; i < shdev->pdata->channel_num; i++) {
+		struct sh_dmae_chan *sh_chan = shdev->chan[i];
+
+		if (!sh_chan->shdma_chan.desc_num)
+			continue;
+
+		if (sh_chan->shdma_chan.slave_id >= 0) {
+			const struct sh_dmae_slave_config *cfg = sh_chan->config;
+			dmae_set_dmars(sh_chan, cfg->mid_rid);
+			dmae_set_chcr(sh_chan, cfg->chcr);
+		} else {
+			dmae_init(sh_chan);
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops sh_dmae_pm = {
+	SET_SYSTEM_SLEEP_PM_OPS(sh_dmae_suspend, sh_dmae_resume)
+	SET_RUNTIME_PM_OPS(sh_dmae_runtime_suspend, sh_dmae_runtime_resume,
+			   NULL)
+};
+
+static dma_addr_t sh_dmae_slave_addr(struct shdma_chan *schan)
+{
+	struct sh_dmae_chan *sh_chan = container_of(schan,
+					struct sh_dmae_chan, shdma_chan);
+
+	/*
+	 * Implicit BUG_ON(!sh_chan->config)
+	 * This is an exclusive slave DMA operation, may only be called after a
+	 * successful slave configuration.
+	 */
+	return sh_chan->slave_addr;
+}
+
+static struct shdma_desc *sh_dmae_embedded_desc(void *buf, int i)
+{
+	return &((struct sh_dmae_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops sh_dmae_shdma_ops = {
+	.desc_completed = sh_dmae_desc_completed,
+	.halt_channel = sh_dmae_halt,
+	.channel_busy = sh_dmae_channel_busy,
+	.slave_addr = sh_dmae_slave_addr,
+	.desc_setup = sh_dmae_desc_setup,
+	.set_slave = sh_dmae_set_slave,
+	.setup_xfer = sh_dmae_setup_xfer,
+	.start_xfer = sh_dmae_start_xfer,
+	.embedded_desc = sh_dmae_embedded_desc,
+	.chan_irq = sh_dmae_chan_irq,
+	.get_partial = sh_dmae_get_partial,
+};
+
+static const struct of_device_id sh_dmae_of_match[] = {
+	{.compatible = "renesas,shdma-r8a73a4", .data = r8a73a4_shdma_devid,},
+	{}
+};
+MODULE_DEVICE_TABLE(of, sh_dmae_of_match);
+
+static int sh_dmae_probe(struct platform_device *pdev)
+{
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES  | DMA_SLAVE_BUSWIDTH_8_BYTES |
+		DMA_SLAVE_BUSWIDTH_16_BYTES | DMA_SLAVE_BUSWIDTH_32_BYTES;
+	const struct sh_dmae_pdata *pdata;
+	unsigned long chan_flag[SH_DMAE_MAX_CHANNELS] = {};
+	int chan_irq[SH_DMAE_MAX_CHANNELS];
+	unsigned long irqflags = 0;
+	int err, errirq, i, irq_cnt = 0, irqres = 0, irq_cap = 0;
+	struct sh_dmae_device *shdev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *dmars, *errirq_res, *chanirq_res;
+
+	if (pdev->dev.of_node)
+		pdata = of_device_get_match_data(&pdev->dev);
+	else
+		pdata = dev_get_platdata(&pdev->dev);
+
+	/* get platform data */
+	if (!pdata || !pdata->channel_num)
+		return -ENODEV;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	/* DMARS area is optional */
+	dmars = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	/*
+	 * IRQ resources:
+	 * 1. there always must be at least one IRQ IO-resource. On SH4 it is
+	 *    the error IRQ, in which case it is the only IRQ in this resource:
+	 *    start == end. If it is the only IRQ resource, all channels also
+	 *    use the same IRQ.
+	 * 2. DMA channel IRQ resources can be specified one per resource or in
+	 *    ranges (start != end)
+	 * 3. iff all events (channels and, optionally, error) on this
+	 *    controller use the same IRQ, only one IRQ resource can be
+	 *    specified, otherwise there must be one IRQ per channel, even if
+	 *    some of them are equal
+	 * 4. if all IRQs on this controller are equal or if some specific IRQs
+	 *    specify IORESOURCE_IRQ_SHAREABLE in their resources, they will be
+	 *    requested with the IRQF_SHARED flag
+	 */
+	errirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!chan || !errirq_res)
+		return -ENODEV;
+
+	shdev = devm_kzalloc(&pdev->dev, sizeof(struct sh_dmae_device),
+			     GFP_KERNEL);
+	if (!shdev)
+		return -ENOMEM;
+
+	dma_dev = &shdev->shdma_dev.dma_dev;
+
+	shdev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(shdev->chan_reg))
+		return PTR_ERR(shdev->chan_reg);
+	if (dmars) {
+		shdev->dmars = devm_ioremap_resource(&pdev->dev, dmars);
+		if (IS_ERR(shdev->dmars))
+			return PTR_ERR(shdev->dmars);
+	}
+
+	dma_dev->src_addr_widths = widths;
+	dma_dev->dst_addr_widths = widths;
+	dma_dev->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	if (!pdata->slave_only)
+		dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	if (pdata->slave && pdata->slave_num)
+		dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	/* Default transfer size of 32 bytes requires 32-byte alignment */
+	dma_dev->copy_align = LOG2_DEFAULT_XFER_SIZE;
+
+	shdev->shdma_dev.ops = &sh_dmae_shdma_ops;
+	shdev->shdma_dev.desc_size = sizeof(struct sh_dmae_desc);
+	err = shdma_init(&pdev->dev, &shdev->shdma_dev,
+			      pdata->channel_num);
+	if (err < 0)
+		goto eshdma;
+
+	/* platform data */
+	shdev->pdata = pdata;
+
+	if (pdata->chcr_offset)
+		shdev->chcr_offset = pdata->chcr_offset;
+	else
+		shdev->chcr_offset = CHCR;
+
+	if (pdata->chcr_ie_bit)
+		shdev->chcr_ie_bit = pdata->chcr_ie_bit;
+	else
+		shdev->chcr_ie_bit = CHCR_IE;
+
+	platform_set_drvdata(pdev, shdev);
+
+	pm_runtime_enable(&pdev->dev);
+	err = pm_runtime_get_sync(&pdev->dev);
+	if (err < 0)
+		dev_err(&pdev->dev, "%s(): GET = %d\n", __func__, err);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_add_tail_rcu(&shdev->node, &sh_dmae_devices);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	/* reset dma controller - only needed as a test */
+	err = sh_dmae_rst(shdev);
+	if (err)
+		goto rst_err;
+
+	if (IS_ENABLED(CONFIG_CPU_SH4) || IS_ENABLED(CONFIG_ARCH_RENESAS)) {
+		chanirq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
+
+		if (!chanirq_res)
+			chanirq_res = errirq_res;
+		else
+			irqres++;
+
+		if (chanirq_res == errirq_res ||
+		    (errirq_res->flags & IORESOURCE_BITS) == IORESOURCE_IRQ_SHAREABLE)
+			irqflags = IRQF_SHARED;
+
+		errirq = errirq_res->start;
+
+		err = devm_request_irq(&pdev->dev, errirq, sh_dmae_err,
+				       irqflags, "DMAC Address Error", shdev);
+		if (err) {
+			dev_err(&pdev->dev,
+				"DMA failed requesting irq #%d, error %d\n",
+				errirq, err);
+			goto eirq_err;
+		}
+	} else {
+		chanirq_res = errirq_res;
+	}
+
+	if (chanirq_res->start == chanirq_res->end &&
+	    !platform_get_resource(pdev, IORESOURCE_IRQ, 1)) {
+		/* Special case - all multiplexed */
+		for (; irq_cnt < pdata->channel_num; irq_cnt++) {
+			if (irq_cnt < SH_DMAE_MAX_CHANNELS) {
+				chan_irq[irq_cnt] = chanirq_res->start;
+				chan_flag[irq_cnt] = IRQF_SHARED;
+			} else {
+				irq_cap = 1;
+				break;
+			}
+		}
+	} else {
+		do {
+			for (i = chanirq_res->start; i <= chanirq_res->end; i++) {
+				if (irq_cnt >= SH_DMAE_MAX_CHANNELS) {
+					irq_cap = 1;
+					break;
+				}
+
+				if ((errirq_res->flags & IORESOURCE_BITS) ==
+				    IORESOURCE_IRQ_SHAREABLE)
+					chan_flag[irq_cnt] = IRQF_SHARED;
+				else
+					chan_flag[irq_cnt] = 0;
+				dev_dbg(&pdev->dev,
+					"Found IRQ %d for channel %d\n",
+					i, irq_cnt);
+				chan_irq[irq_cnt++] = i;
+			}
+
+			if (irq_cnt >= SH_DMAE_MAX_CHANNELS)
+				break;
+
+			chanirq_res = platform_get_resource(pdev,
+						IORESOURCE_IRQ, ++irqres);
+		} while (irq_cnt < pdata->channel_num && chanirq_res);
+	}
+
+	/* Create DMA Channel */
+	for (i = 0; i < irq_cnt; i++) {
+		err = sh_dmae_chan_probe(shdev, i, chan_irq[i], chan_flag[i]);
+		if (err)
+			goto chan_probe_err;
+	}
+
+	if (irq_cap)
+		dev_notice(&pdev->dev, "Attempting to register %d DMA "
+			   "channels when a maximum of %d are supported.\n",
+			   pdata->channel_num, SH_DMAE_MAX_CHANNELS);
+
+	pm_runtime_put(&pdev->dev);
+
+	err = dma_async_device_register(&shdev->shdma_dev.dma_dev);
+	if (err < 0)
+		goto edmadevreg;
+
+	return err;
+
+edmadevreg:
+	pm_runtime_get(&pdev->dev);
+
+chan_probe_err:
+	sh_dmae_chan_remove(shdev);
+
+eirq_err:
+rst_err:
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	shdma_cleanup(&shdev->shdma_dev);
+eshdma:
+	synchronize_rcu();
+
+	return err;
+}
+
+static int sh_dmae_remove(struct platform_device *pdev)
+{
+	struct sh_dmae_device *shdev = platform_get_drvdata(pdev);
+	struct dma_device *dma_dev = &shdev->shdma_dev.dma_dev;
+
+	dma_async_device_unregister(dma_dev);
+
+	spin_lock_irq(&sh_dmae_lock);
+	list_del_rcu(&shdev->node);
+	spin_unlock_irq(&sh_dmae_lock);
+
+	pm_runtime_disable(&pdev->dev);
+
+	sh_dmae_chan_remove(shdev);
+	shdma_cleanup(&shdev->shdma_dev);
+
+	synchronize_rcu();
+
+	return 0;
+}
+
+static struct platform_driver sh_dmae_driver = {
+	.driver		= {
+		.pm	= &sh_dmae_pm,
+		.name	= SH_DMAE_DRV_NAME,
+		.of_match_table = sh_dmae_of_match,
+	},
+	.remove		= sh_dmae_remove,
+};
+
+static int __init sh_dmae_init(void)
+{
+	/* Wire up NMI handling */
+	int err = register_die_notifier(&sh_dmae_nmi_notifier);
+	if (err)
+		return err;
+
+	return platform_driver_probe(&sh_dmae_driver, sh_dmae_probe);
+}
+module_init(sh_dmae_init);
+
+static void __exit sh_dmae_exit(void)
+{
+	platform_driver_unregister(&sh_dmae_driver);
+
+	unregister_die_notifier(&sh_dmae_nmi_notifier);
+}
+module_exit(sh_dmae_exit);
+
+MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>");
+MODULE_DESCRIPTION("Renesas SH DMA Engine driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" SH_DMAE_DRV_NAME);
diff --git a/drivers/dma/sh/sudmac.c b/drivers/dma/sh/sudmac.c
new file mode 100644
index 0000000..69b9564
--- /dev/null
+++ b/drivers/dma/sh/sudmac.c
@@ -0,0 +1,417 @@
+/*
+ * Renesas SUDMAC support
+ *
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * based on drivers/dma/sh/shdma.c:
+ * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
+ * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sudmac.h>
+
+struct sudmac_chan {
+	struct shdma_chan shdma_chan;
+	void __iomem *base;
+	char dev_id[16];	/* unique name per DMAC of channel */
+
+	u32 offset;		/* for CFG, BA, BBC, CA, CBC, DEN */
+	u32 cfg;
+	u32 dint_end_bit;
+};
+
+struct sudmac_device {
+	struct shdma_dev shdma_dev;
+	struct sudmac_pdata *pdata;
+	void __iomem *chan_reg;
+};
+
+struct sudmac_regs {
+	u32 base_addr;
+	u32 base_byte_count;
+};
+
+struct sudmac_desc {
+	struct sudmac_regs hw;
+	struct shdma_desc shdma_desc;
+};
+
+#define to_chan(schan) container_of(schan, struct sudmac_chan, shdma_chan)
+#define to_desc(sdesc) container_of(sdesc, struct sudmac_desc, shdma_desc)
+#define to_sdev(sc) container_of(sc->shdma_chan.dma_chan.device, \
+				 struct sudmac_device, shdma_dev.dma_dev)
+
+/* SUDMAC register */
+#define SUDMAC_CH0CFG		0x00
+#define SUDMAC_CH0BA		0x10
+#define SUDMAC_CH0BBC		0x18
+#define SUDMAC_CH0CA		0x20
+#define SUDMAC_CH0CBC		0x28
+#define SUDMAC_CH0DEN		0x30
+#define SUDMAC_DSTSCLR		0x38
+#define SUDMAC_DBUFCTRL		0x3C
+#define SUDMAC_DINTCTRL		0x40
+#define SUDMAC_DINTSTS		0x44
+#define SUDMAC_DINTSTSCLR	0x48
+#define SUDMAC_CH0SHCTRL	0x50
+
+/* Definitions for the sudmac_channel.config */
+#define SUDMAC_SENDBUFM	0x1000 /* b12: Transmit Buffer Mode */
+#define SUDMAC_RCVENDM	0x0100 /* b8: Receive Data Transfer End Mode */
+#define SUDMAC_LBA_WAIT	0x0030 /* b5-4: Local Bus Access Wait */
+
+/* Definitions for the sudmac_channel.dint_end_bit */
+#define SUDMAC_CH1ENDE	0x0002 /* b1: Ch1 DMA Transfer End Int Enable */
+#define SUDMAC_CH0ENDE	0x0001 /* b0: Ch0 DMA Transfer End Int Enable */
+
+#define SUDMAC_DRV_NAME "sudmac"
+
+static void sudmac_writel(struct sudmac_chan *sc, u32 data, u32 reg)
+{
+	iowrite32(data, sc->base + reg);
+}
+
+static u32 sudmac_readl(struct sudmac_chan *sc, u32 reg)
+{
+	return ioread32(sc->base + reg);
+}
+
+static bool sudmac_is_busy(struct sudmac_chan *sc)
+{
+	u32 den = sudmac_readl(sc, SUDMAC_CH0DEN + sc->offset);
+
+	if (den)
+		return true; /* working */
+
+	return false; /* waiting */
+}
+
+static void sudmac_set_reg(struct sudmac_chan *sc, struct sudmac_regs *hw,
+			   struct shdma_desc *sdesc)
+{
+	sudmac_writel(sc, sc->cfg, SUDMAC_CH0CFG + sc->offset);
+	sudmac_writel(sc, hw->base_addr, SUDMAC_CH0BA + sc->offset);
+	sudmac_writel(sc, hw->base_byte_count, SUDMAC_CH0BBC + sc->offset);
+}
+
+static void sudmac_start(struct sudmac_chan *sc)
+{
+	u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL);
+
+	sudmac_writel(sc, dintctrl | sc->dint_end_bit, SUDMAC_DINTCTRL);
+	sudmac_writel(sc, 1, SUDMAC_CH0DEN + sc->offset);
+}
+
+static void sudmac_start_xfer(struct shdma_chan *schan,
+			      struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+
+	sudmac_set_reg(sc, &sd->hw, sdesc);
+	sudmac_start(sc);
+}
+
+static bool sudmac_channel_busy(struct shdma_chan *schan)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+
+	return sudmac_is_busy(sc);
+}
+
+static void sudmac_setup_xfer(struct shdma_chan *schan, int slave_id)
+{
+}
+
+static const struct sudmac_slave_config *sudmac_find_slave(
+	struct sudmac_chan *sc, int slave_id)
+{
+	struct sudmac_device *sdev = to_sdev(sc);
+	struct sudmac_pdata *pdata = sdev->pdata;
+	const struct sudmac_slave_config *cfg;
+	int i;
+
+	for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
+		if (cfg->slave_id == slave_id)
+			return cfg;
+
+	return NULL;
+}
+
+static int sudmac_set_slave(struct shdma_chan *schan, int slave_id,
+			    dma_addr_t slave_addr, bool try)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	const struct sudmac_slave_config *cfg = sudmac_find_slave(sc, slave_id);
+
+	if (!cfg)
+		return -ENODEV;
+
+	return 0;
+}
+
+static inline void sudmac_dma_halt(struct sudmac_chan *sc)
+{
+	u32 dintctrl = sudmac_readl(sc, SUDMAC_DINTCTRL);
+
+	sudmac_writel(sc, 0, SUDMAC_CH0DEN + sc->offset);
+	sudmac_writel(sc, dintctrl & ~sc->dint_end_bit, SUDMAC_DINTCTRL);
+	sudmac_writel(sc, sc->dint_end_bit, SUDMAC_DINTSTSCLR);
+}
+
+static int sudmac_desc_setup(struct shdma_chan *schan,
+			     struct shdma_desc *sdesc,
+			     dma_addr_t src, dma_addr_t dst, size_t *len)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+
+	dev_dbg(sc->shdma_chan.dev, "%s: src=%pad, dst=%pad, len=%zu\n",
+		__func__, &src, &dst, *len);
+
+	if (*len > schan->max_xfer_len)
+		*len = schan->max_xfer_len;
+
+	if (dst)
+		sd->hw.base_addr = dst;
+	else if (src)
+		sd->hw.base_addr = src;
+	sd->hw.base_byte_count = *len;
+
+	return 0;
+}
+
+static void sudmac_halt(struct shdma_chan *schan)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+
+	sudmac_dma_halt(sc);
+}
+
+static bool sudmac_chan_irq(struct shdma_chan *schan, int irq)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	u32 dintsts = sudmac_readl(sc, SUDMAC_DINTSTS);
+
+	if (!(dintsts & sc->dint_end_bit))
+		return false;
+
+	/* DMA stop */
+	sudmac_dma_halt(sc);
+
+	return true;
+}
+
+static size_t sudmac_get_partial(struct shdma_chan *schan,
+				 struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+	u32 current_byte_count = sudmac_readl(sc, SUDMAC_CH0CBC + sc->offset);
+
+	return sd->hw.base_byte_count - current_byte_count;
+}
+
+static bool sudmac_desc_completed(struct shdma_chan *schan,
+				  struct shdma_desc *sdesc)
+{
+	struct sudmac_chan *sc = to_chan(schan);
+	struct sudmac_desc *sd = to_desc(sdesc);
+	u32 current_addr = sudmac_readl(sc, SUDMAC_CH0CA + sc->offset);
+
+	return sd->hw.base_addr + sd->hw.base_byte_count == current_addr;
+}
+
+static int sudmac_chan_probe(struct sudmac_device *su_dev, int id, int irq,
+			     unsigned long flags)
+{
+	struct shdma_dev *sdev = &su_dev->shdma_dev;
+	struct platform_device *pdev = to_platform_device(sdev->dma_dev.dev);
+	struct sudmac_chan *sc;
+	struct shdma_chan *schan;
+	int err;
+
+	sc = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_chan), GFP_KERNEL);
+	if (!sc)
+		return -ENOMEM;
+
+	schan = &sc->shdma_chan;
+	schan->max_xfer_len = 64 * 1024 * 1024 - 1;
+
+	shdma_chan_probe(sdev, schan, id);
+
+	sc->base = su_dev->chan_reg;
+
+	/* get platform_data */
+	sc->offset = su_dev->pdata->channel->offset;
+	if (su_dev->pdata->channel->config & SUDMAC_TX_BUFFER_MODE)
+		sc->cfg |= SUDMAC_SENDBUFM;
+	if (su_dev->pdata->channel->config & SUDMAC_RX_END_MODE)
+		sc->cfg |= SUDMAC_RCVENDM;
+	sc->cfg |= (su_dev->pdata->channel->wait << 4) & SUDMAC_LBA_WAIT;
+
+	if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH0)
+		sc->dint_end_bit |= SUDMAC_CH0ENDE;
+	if (su_dev->pdata->channel->dint_end_bit & SUDMAC_DMA_BIT_CH1)
+		sc->dint_end_bit |= SUDMAC_CH1ENDE;
+
+	/* set up channel irq */
+	if (pdev->id >= 0)
+		snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d.%d",
+			 pdev->id, id);
+	else
+		snprintf(sc->dev_id, sizeof(sc->dev_id), "sudmac%d", id);
+
+	err = shdma_request_irq(schan, irq, flags, sc->dev_id);
+	if (err) {
+		dev_err(sdev->dma_dev.dev,
+			"DMA channel %d request_irq failed %d\n", id, err);
+		goto err_no_irq;
+	}
+
+	return 0;
+
+err_no_irq:
+	/* remove from dmaengine device node */
+	shdma_chan_remove(schan);
+	return err;
+}
+
+static void sudmac_chan_remove(struct sudmac_device *su_dev)
+{
+	struct shdma_chan *schan;
+	int i;
+
+	shdma_for_each_chan(schan, &su_dev->shdma_dev, i) {
+		BUG_ON(!schan);
+
+		shdma_chan_remove(schan);
+	}
+}
+
+static dma_addr_t sudmac_slave_addr(struct shdma_chan *schan)
+{
+	/* SUDMAC doesn't need the address */
+	return 0;
+}
+
+static struct shdma_desc *sudmac_embedded_desc(void *buf, int i)
+{
+	return &((struct sudmac_desc *)buf)[i].shdma_desc;
+}
+
+static const struct shdma_ops sudmac_shdma_ops = {
+	.desc_completed = sudmac_desc_completed,
+	.halt_channel = sudmac_halt,
+	.channel_busy = sudmac_channel_busy,
+	.slave_addr = sudmac_slave_addr,
+	.desc_setup = sudmac_desc_setup,
+	.set_slave = sudmac_set_slave,
+	.setup_xfer = sudmac_setup_xfer,
+	.start_xfer = sudmac_start_xfer,
+	.embedded_desc = sudmac_embedded_desc,
+	.chan_irq = sudmac_chan_irq,
+	.get_partial = sudmac_get_partial,
+};
+
+static int sudmac_probe(struct platform_device *pdev)
+{
+	struct sudmac_pdata *pdata = dev_get_platdata(&pdev->dev);
+	int err, i;
+	struct sudmac_device *su_dev;
+	struct dma_device *dma_dev;
+	struct resource *chan, *irq_res;
+
+	/* get platform data */
+	if (!pdata)
+		return -ENODEV;
+
+	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!irq_res)
+		return -ENODEV;
+
+	err = -ENOMEM;
+	su_dev = devm_kzalloc(&pdev->dev, sizeof(struct sudmac_device),
+			      GFP_KERNEL);
+	if (!su_dev)
+		return err;
+
+	dma_dev = &su_dev->shdma_dev.dma_dev;
+
+	chan = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	su_dev->chan_reg = devm_ioremap_resource(&pdev->dev, chan);
+	if (IS_ERR(su_dev->chan_reg))
+		return PTR_ERR(su_dev->chan_reg);
+
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+	su_dev->shdma_dev.ops = &sudmac_shdma_ops;
+	su_dev->shdma_dev.desc_size = sizeof(struct sudmac_desc);
+	err = shdma_init(&pdev->dev, &su_dev->shdma_dev, pdata->channel_num);
+	if (err < 0)
+		return err;
+
+	/* platform data */
+	su_dev->pdata = dev_get_platdata(&pdev->dev);
+
+	platform_set_drvdata(pdev, su_dev);
+
+	/* Create DMA Channel */
+	for (i = 0; i < pdata->channel_num; i++) {
+		err = sudmac_chan_probe(su_dev, i, irq_res->start, IRQF_SHARED);
+		if (err)
+			goto chan_probe_err;
+	}
+
+	err = dma_async_device_register(&su_dev->shdma_dev.dma_dev);
+	if (err < 0)
+		goto chan_probe_err;
+
+	return err;
+
+chan_probe_err:
+	sudmac_chan_remove(su_dev);
+
+	shdma_cleanup(&su_dev->shdma_dev);
+
+	return err;
+}
+
+static int sudmac_remove(struct platform_device *pdev)
+{
+	struct sudmac_device *su_dev = platform_get_drvdata(pdev);
+	struct dma_device *dma_dev = &su_dev->shdma_dev.dma_dev;
+
+	dma_async_device_unregister(dma_dev);
+	sudmac_chan_remove(su_dev);
+	shdma_cleanup(&su_dev->shdma_dev);
+
+	return 0;
+}
+
+static struct platform_driver sudmac_driver = {
+	.driver		= {
+		.name	= SUDMAC_DRV_NAME,
+	},
+	.probe		= sudmac_probe,
+	.remove		= sudmac_remove,
+};
+module_platform_driver(sudmac_driver);
+
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("Renesas SUDMAC driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" SUDMAC_DRV_NAME);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
new file mode 100644
index 0000000..1bb1a8e
--- /dev/null
+++ b/drivers/dma/sh/usb-dmac.c
@@ -0,0 +1,917 @@
+/*
+ * Renesas USB DMA Controller Driver
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * based on rcar-dmac.c
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * struct usb_dmac_sg - Descriptor for a hardware transfer
+ * @mem_addr: memory address
+ * @size: transfer size in bytes
+ */
+struct usb_dmac_sg {
+	dma_addr_t mem_addr;
+	u32 size;
+};
+
+/*
+ * struct usb_dmac_desc - USB DMA Transfer Descriptor
+ * @vd: base virtual channel DMA transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @sg_allocated_len: length of allocated sg
+ * @sg_len: length of sg
+ * @sg_index: index of sg
+ * @residue: residue after the DMAC completed a transfer
+ * @node: node for desc_got and desc_freed
+ * @done_cookie: cookie after the DMAC completed a transfer
+ * @sg: information for the transfer
+ */
+struct usb_dmac_desc {
+	struct virt_dma_desc vd;
+	enum dma_transfer_direction direction;
+	unsigned int sg_allocated_len;
+	unsigned int sg_len;
+	unsigned int sg_index;
+	u32 residue;
+	struct list_head node;
+	dma_cookie_t done_cookie;
+	struct usb_dmac_sg sg[0];
+};
+
+#define to_usb_dmac_desc(vd)	container_of(vd, struct usb_dmac_desc, vd)
+
+/*
+ * struct usb_dmac_chan - USB DMA Controller Channel
+ * @vc: base virtual DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: irq number of this channel
+ * @desc: the current descriptor
+ * @descs_allocated: number of descriptors allocated
+ * @desc_got: got descriptors
+ * @desc_freed: freed descriptors after the DMAC completed a transfer
+ */
+struct usb_dmac_chan {
+	struct virt_dma_chan vc;
+	void __iomem *iomem;
+	unsigned int index;
+	int irq;
+	struct usb_dmac_desc *desc;
+	int descs_allocated;
+	struct list_head desc_got;
+	struct list_head desc_freed;
+};
+
+#define to_usb_dmac_chan(c) container_of(c, struct usb_dmac_chan, vc.chan)
+
+/*
+ * struct usb_dmac - USB DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ */
+struct usb_dmac {
+	struct dma_device engine;
+	struct device *dev;
+	void __iomem *iomem;
+
+	unsigned int n_channels;
+	struct usb_dmac_chan *channels;
+};
+
+#define to_usb_dmac(d)		container_of(d, struct usb_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define USB_DMAC_CHAN_OFFSET(i)		(0x20 + 0x20 * (i))
+
+#define USB_DMASWR			0x0008
+#define USB_DMASWR_SWR			(1 << 0)
+#define USB_DMAOR			0x0060
+#define USB_DMAOR_AE			(1 << 1)
+#define USB_DMAOR_DME			(1 << 0)
+
+#define USB_DMASAR			0x0000
+#define USB_DMADAR			0x0004
+#define USB_DMATCR			0x0008
+#define USB_DMATCR_MASK			0x00ffffff
+#define USB_DMACHCR			0x0014
+#define USB_DMACHCR_FTE			(1 << 24)
+#define USB_DMACHCR_NULLE		(1 << 16)
+#define USB_DMACHCR_NULL		(1 << 12)
+#define USB_DMACHCR_TS_8B		((0 << 7) | (0 << 6))
+#define USB_DMACHCR_TS_16B		((0 << 7) | (1 << 6))
+#define USB_DMACHCR_TS_32B		((1 << 7) | (0 << 6))
+#define USB_DMACHCR_IE			(1 << 5)
+#define USB_DMACHCR_SP			(1 << 2)
+#define USB_DMACHCR_TE			(1 << 1)
+#define USB_DMACHCR_DE			(1 << 0)
+#define USB_DMATEND			0x0018
+
+/* Hardcode the xfer_shift to 5 (32bytes) */
+#define USB_DMAC_XFER_SHIFT	5
+#define USB_DMAC_XFER_SIZE	(1 << USB_DMAC_XFER_SHIFT)
+#define USB_DMAC_CHCR_TS	USB_DMACHCR_TS_32B
+#define USB_DMAC_SLAVE_BUSWIDTH	DMA_SLAVE_BUSWIDTH_32_BYTES
+
+/* for descriptors */
+#define USB_DMAC_INITIAL_NR_DESC	16
+#define USB_DMAC_INITIAL_NR_SG		8
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void usb_dmac_write(struct usb_dmac *dmac, u32 reg, u32 data)
+{
+	writel(data, dmac->iomem + reg);
+}
+
+static u32 usb_dmac_read(struct usb_dmac *dmac, u32 reg)
+{
+	return readl(dmac->iomem + reg);
+}
+
+static u32 usb_dmac_chan_read(struct usb_dmac_chan *chan, u32 reg)
+{
+	return readl(chan->iomem + reg);
+}
+
+static void usb_dmac_chan_write(struct usb_dmac_chan *chan, u32 reg, u32 data)
+{
+	writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool usb_dmac_chan_is_busy(struct usb_dmac_chan *chan)
+{
+	u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+	return (chcr & (USB_DMACHCR_DE | USB_DMACHCR_TE)) == USB_DMACHCR_DE;
+}
+
+static u32 usb_dmac_calc_tend(u32 size)
+{
+	/*
+	 * Please refer to the Figure "Example of Final Transaction Valid
+	 * Data Transfer Enable (EDTEN) Setting" in the data sheet.
+	 */
+	return 0xffffffff << (32 - (size % USB_DMAC_XFER_SIZE ?	:
+						USB_DMAC_XFER_SIZE));
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_sg(struct usb_dmac_chan *chan,
+				   unsigned int index)
+{
+	struct usb_dmac_desc *desc = chan->desc;
+	struct usb_dmac_sg *sg = desc->sg + index;
+	dma_addr_t src_addr = 0, dst_addr = 0;
+
+	WARN_ON_ONCE(usb_dmac_chan_is_busy(chan));
+
+	if (desc->direction == DMA_DEV_TO_MEM)
+		dst_addr = sg->mem_addr;
+	else
+		src_addr = sg->mem_addr;
+
+	dev_dbg(chan->vc.chan.device->dev,
+		"chan%u: queue sg %p: %u@%pad -> %pad\n",
+		chan->index, sg, sg->size, &src_addr, &dst_addr);
+
+	usb_dmac_chan_write(chan, USB_DMASAR, src_addr & 0xffffffff);
+	usb_dmac_chan_write(chan, USB_DMADAR, dst_addr & 0xffffffff);
+	usb_dmac_chan_write(chan, USB_DMATCR,
+			    DIV_ROUND_UP(sg->size, USB_DMAC_XFER_SIZE));
+	usb_dmac_chan_write(chan, USB_DMATEND, usb_dmac_calc_tend(sg->size));
+
+	usb_dmac_chan_write(chan, USB_DMACHCR, USB_DMAC_CHCR_TS |
+			USB_DMACHCR_NULLE | USB_DMACHCR_IE | USB_DMACHCR_DE);
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_desc(struct usb_dmac_chan *chan)
+{
+	struct virt_dma_desc *vd;
+
+	vd = vchan_next_desc(&chan->vc);
+	if (!vd) {
+		chan->desc = NULL;
+		return;
+	}
+
+	/*
+	 * Remove this request from vc->desc_issued. Otherwise, this driver
+	 * will get the previous value from vchan_next_desc() after a transfer
+	 * was completed.
+	 */
+	list_del(&vd->node);
+
+	chan->desc = to_usb_dmac_desc(vd);
+	chan->desc->sg_index = 0;
+	usb_dmac_chan_start_sg(chan, 0);
+}
+
+static int usb_dmac_init(struct usb_dmac *dmac)
+{
+	u16 dmaor;
+
+	/* Clear all channels and enable the DMAC globally. */
+	usb_dmac_write(dmac, USB_DMAOR, USB_DMAOR_DME);
+
+	dmaor = usb_dmac_read(dmac, USB_DMAOR);
+	if ((dmaor & (USB_DMAOR_AE | USB_DMAOR_DME)) != USB_DMAOR_DME) {
+		dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+static int usb_dmac_desc_alloc(struct usb_dmac_chan *chan, unsigned int sg_len,
+			       gfp_t gfp)
+{
+	struct usb_dmac_desc *desc;
+	unsigned long flags;
+
+	desc = kzalloc(struct_size(desc, sg, sg_len), gfp);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->sg_allocated_len = sg_len;
+	INIT_LIST_HEAD(&desc->node);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_add_tail(&desc->node, &chan->desc_freed);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return 0;
+}
+
+static void usb_dmac_desc_free(struct usb_dmac_chan *chan)
+{
+	struct usb_dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	list_splice_init(&chan->desc_freed, &list);
+	list_splice_init(&chan->desc_got, &list);
+
+	list_for_each_entry_safe(desc, _desc, &list, node) {
+		list_del(&desc->node);
+		kfree(desc);
+	}
+	chan->descs_allocated = 0;
+}
+
+static struct usb_dmac_desc *usb_dmac_desc_get(struct usb_dmac_chan *chan,
+					       unsigned int sg_len, gfp_t gfp)
+{
+	struct usb_dmac_desc *desc = NULL;
+	unsigned long flags;
+
+	/* Get a freed descritpor */
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_for_each_entry(desc, &chan->desc_freed, node) {
+		if (sg_len <= desc->sg_allocated_len) {
+			list_move_tail(&desc->node, &chan->desc_got);
+			spin_unlock_irqrestore(&chan->vc.lock, flags);
+			return desc;
+		}
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	/* Allocate a new descriptor */
+	if (!usb_dmac_desc_alloc(chan, sg_len, gfp)) {
+		/* If allocated the desc, it was added to tail of the list */
+		spin_lock_irqsave(&chan->vc.lock, flags);
+		desc = list_last_entry(&chan->desc_freed, struct usb_dmac_desc,
+				       node);
+		list_move_tail(&desc->node, &chan->desc_got);
+		spin_unlock_irqrestore(&chan->vc.lock, flags);
+		return desc;
+	}
+
+	return NULL;
+}
+
+static void usb_dmac_desc_put(struct usb_dmac_chan *chan,
+			      struct usb_dmac_desc *desc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_move_tail(&desc->node, &chan->desc_freed);
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void usb_dmac_soft_reset(struct usb_dmac_chan *uchan)
+{
+	struct dma_chan *chan = &uchan->vc.chan;
+	struct usb_dmac *dmac = to_usb_dmac(chan->device);
+	int i;
+
+	/* Don't issue soft reset if any one of channels is busy */
+	for (i = 0; i < dmac->n_channels; ++i) {
+		if (usb_dmac_chan_is_busy(uchan))
+			return;
+	}
+
+	usb_dmac_write(dmac, USB_DMAOR, 0);
+	usb_dmac_write(dmac, USB_DMASWR, USB_DMASWR_SWR);
+	udelay(100);
+	usb_dmac_write(dmac, USB_DMASWR, 0);
+	usb_dmac_write(dmac, USB_DMAOR, 1);
+}
+
+static void usb_dmac_chan_halt(struct usb_dmac_chan *chan)
+{
+	u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+	chcr &= ~(USB_DMACHCR_IE | USB_DMACHCR_TE | USB_DMACHCR_DE);
+	usb_dmac_chan_write(chan, USB_DMACHCR, chcr);
+
+	usb_dmac_soft_reset(chan);
+}
+
+static void usb_dmac_stop(struct usb_dmac *dmac)
+{
+	usb_dmac_write(dmac, USB_DMAOR, 0);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int usb_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	int ret;
+
+	while (uchan->descs_allocated < USB_DMAC_INITIAL_NR_DESC) {
+		ret = usb_dmac_desc_alloc(uchan, USB_DMAC_INITIAL_NR_SG,
+					  GFP_KERNEL);
+		if (ret < 0) {
+			usb_dmac_desc_free(uchan);
+			return ret;
+		}
+		uchan->descs_allocated++;
+	}
+
+	return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void usb_dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	unsigned long flags;
+
+	/* Protect against ISR */
+	spin_lock_irqsave(&uchan->vc.lock, flags);
+	usb_dmac_chan_halt(uchan);
+	spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+	usb_dmac_desc_free(uchan);
+	vchan_free_chan_resources(&uchan->vc);
+
+	pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+usb_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       unsigned int sg_len, enum dma_transfer_direction dir,
+		       unsigned long dma_flags, void *context)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	struct usb_dmac_desc *desc;
+	struct scatterlist *sg;
+	int i;
+
+	if (!sg_len) {
+		dev_warn(chan->device->dev,
+			 "%s: bad parameter: len=%d\n", __func__, sg_len);
+		return NULL;
+	}
+
+	desc = usb_dmac_desc_get(uchan, sg_len, GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->direction = dir;
+	desc->sg_len = sg_len;
+	for_each_sg(sgl, sg, sg_len, i) {
+		desc->sg[i].mem_addr = sg_dma_address(sg);
+		desc->sg[i].size = sg_dma_len(sg);
+	}
+
+	return vchan_tx_prep(&uchan->vc, &desc->vd, dma_flags);
+}
+
+static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	struct usb_dmac_desc *desc, *_desc;
+	unsigned long flags;
+	LIST_HEAD(head);
+	LIST_HEAD(list);
+
+	spin_lock_irqsave(&uchan->vc.lock, flags);
+	usb_dmac_chan_halt(uchan);
+	vchan_get_all_descriptors(&uchan->vc, &head);
+	if (uchan->desc)
+		uchan->desc = NULL;
+	list_splice_init(&uchan->desc_got, &list);
+	list_for_each_entry_safe(desc, _desc, &list, node)
+		list_move_tail(&desc->node, &uchan->desc_freed);
+	spin_unlock_irqrestore(&uchan->vc.lock, flags);
+	vchan_dma_desc_free_list(&uchan->vc, &head);
+
+	return 0;
+}
+
+static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
+						 struct usb_dmac_desc *desc,
+						 int sg_index)
+{
+	struct usb_dmac_sg *sg = desc->sg + sg_index;
+	u32 mem_addr = sg->mem_addr & 0xffffffff;
+	unsigned int residue = sg->size;
+
+	/*
+	 * We cannot use USB_DMATCR to calculate residue because USB_DMATCR
+	 * has unsuited value to calculate.
+	 */
+	if (desc->direction == DMA_DEV_TO_MEM)
+		residue -= usb_dmac_chan_read(chan, USB_DMADAR) - mem_addr;
+	else
+		residue -= usb_dmac_chan_read(chan, USB_DMASAR) - mem_addr;
+
+	return residue;
+}
+
+static u32 usb_dmac_chan_get_residue_if_complete(struct usb_dmac_chan *chan,
+						 dma_cookie_t cookie)
+{
+	struct usb_dmac_desc *desc;
+	u32 residue = 0;
+
+	list_for_each_entry_reverse(desc, &chan->desc_freed, node) {
+		if (desc->done_cookie == cookie) {
+			residue = desc->residue;
+			break;
+		}
+	}
+
+	return residue;
+}
+
+static u32 usb_dmac_chan_get_residue(struct usb_dmac_chan *chan,
+				     dma_cookie_t cookie)
+{
+	u32 residue = 0;
+	struct virt_dma_desc *vd;
+	struct usb_dmac_desc *desc = chan->desc;
+	int i;
+
+	if (!desc) {
+		vd = vchan_find_desc(&chan->vc, cookie);
+		if (!vd)
+			return 0;
+		desc = to_usb_dmac_desc(vd);
+	}
+
+	/* Compute the size of all usb_dmac_sg still to be transferred */
+	for (i = desc->sg_index + 1; i < desc->sg_len; i++)
+		residue += desc->sg[i].size;
+
+	/* Add the residue for the current sg */
+	residue += usb_dmac_get_current_residue(chan, desc, desc->sg_index);
+
+	return residue;
+}
+
+static enum dma_status usb_dmac_tx_status(struct dma_chan *chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	enum dma_status status;
+	unsigned int residue = 0;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, txstate);
+	/* a client driver will get residue after DMA_COMPLETE */
+	if (!txstate)
+		return status;
+
+	spin_lock_irqsave(&uchan->vc.lock, flags);
+	if (status == DMA_COMPLETE)
+		residue = usb_dmac_chan_get_residue_if_complete(uchan, cookie);
+	else
+		residue = usb_dmac_chan_get_residue(uchan, cookie);
+	spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+	dma_set_residue(txstate, residue);
+
+	return status;
+}
+
+static void usb_dmac_issue_pending(struct dma_chan *chan)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&uchan->vc.lock, flags);
+	if (vchan_issue_pending(&uchan->vc) && !uchan->desc)
+		usb_dmac_chan_start_desc(uchan);
+	spin_unlock_irqrestore(&uchan->vc.lock, flags);
+}
+
+static void usb_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+	struct usb_dmac_desc *desc = to_usb_dmac_desc(vd);
+	struct usb_dmac_chan *chan = to_usb_dmac_chan(vd->tx.chan);
+
+	usb_dmac_desc_put(chan, desc);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void usb_dmac_isr_transfer_end(struct usb_dmac_chan *chan)
+{
+	struct usb_dmac_desc *desc = chan->desc;
+
+	BUG_ON(!desc);
+
+	if (++desc->sg_index < desc->sg_len) {
+		usb_dmac_chan_start_sg(chan, desc->sg_index);
+	} else {
+		desc->residue = usb_dmac_get_current_residue(chan, desc,
+							desc->sg_index - 1);
+		desc->done_cookie = desc->vd.tx.cookie;
+		vchan_cookie_complete(&desc->vd);
+
+		/* Restart the next transfer if this driver has a next desc */
+		usb_dmac_chan_start_desc(chan);
+	}
+}
+
+static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
+{
+	struct usb_dmac_chan *chan = dev;
+	irqreturn_t ret = IRQ_NONE;
+	u32 mask = 0;
+	u32 chcr;
+	bool xfer_end = false;
+
+	spin_lock(&chan->vc.lock);
+
+	chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+	if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) {
+		mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP;
+		if (chcr & USB_DMACHCR_DE)
+			xfer_end = true;
+		ret |= IRQ_HANDLED;
+	}
+	if (chcr & USB_DMACHCR_NULL) {
+		/* An interruption of TE will happen after we set FTE */
+		mask |= USB_DMACHCR_NULL;
+		chcr |= USB_DMACHCR_FTE;
+		ret |= IRQ_HANDLED;
+	}
+	if (mask)
+		usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+
+	if (xfer_end)
+		usb_dmac_isr_transfer_end(chan);
+
+	spin_unlock(&chan->vc.lock);
+
+	return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool usb_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+	struct of_phandle_args *dma_spec = arg;
+
+	if (dma_spec->np != chan->device->dev->of_node)
+		return false;
+
+	/* USB-DMAC should be used with fixed usb controller's FIFO */
+	if (uchan->index != dma_spec->args[0])
+		return false;
+
+	return true;
+}
+
+static struct dma_chan *usb_dmac_of_xlate(struct of_phandle_args *dma_spec,
+					  struct of_dma *ofdma)
+{
+	struct dma_chan *chan;
+	dma_cap_mask_t mask;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	/* Only slave DMA channels can be allocated via DT */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	chan = dma_request_channel(mask, usb_dmac_chan_filter, dma_spec);
+	if (!chan)
+		return NULL;
+
+	return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+#ifdef CONFIG_PM
+static int usb_dmac_runtime_suspend(struct device *dev)
+{
+	struct usb_dmac *dmac = dev_get_drvdata(dev);
+	int i;
+
+	for (i = 0; i < dmac->n_channels; ++i) {
+		if (!dmac->channels[i].iomem)
+			break;
+		usb_dmac_chan_halt(&dmac->channels[i]);
+	}
+
+	return 0;
+}
+
+static int usb_dmac_runtime_resume(struct device *dev)
+{
+	struct usb_dmac *dmac = dev_get_drvdata(dev);
+
+	return usb_dmac_init(dmac);
+}
+#endif /* CONFIG_PM */
+
+static const struct dev_pm_ops usb_dmac_pm = {
+	SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume,
+			   NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int usb_dmac_chan_probe(struct usb_dmac *dmac,
+			       struct usb_dmac_chan *uchan,
+			       unsigned int index)
+{
+	struct platform_device *pdev = to_platform_device(dmac->dev);
+	char pdev_irqname[5];
+	char *irqname;
+	int ret;
+
+	uchan->index = index;
+	uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index);
+
+	/* Request the channel interrupt. */
+	sprintf(pdev_irqname, "ch%u", index);
+	uchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+	if (uchan->irq < 0) {
+		dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+		return -ENODEV;
+	}
+
+	irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+				 dev_name(dmac->dev), index);
+	if (!irqname)
+		return -ENOMEM;
+
+	ret = devm_request_irq(dmac->dev, uchan->irq, usb_dmac_isr_channel,
+			       IRQF_SHARED, irqname, uchan);
+	if (ret) {
+		dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+			uchan->irq, ret);
+		return ret;
+	}
+
+	uchan->vc.desc_free = usb_dmac_virt_desc_free;
+	vchan_init(&uchan->vc, &dmac->engine);
+	INIT_LIST_HEAD(&uchan->desc_freed);
+	INIT_LIST_HEAD(&uchan->desc_got);
+
+	return 0;
+}
+
+static int usb_dmac_parse_of(struct device *dev, struct usb_dmac *dmac)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+	if (ret < 0) {
+		dev_err(dev, "unable to read dma-channels property\n");
+		return ret;
+	}
+
+	if (dmac->n_channels <= 0 || dmac->n_channels >= 100) {
+		dev_err(dev, "invalid number of channels %u\n",
+			dmac->n_channels);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int usb_dmac_probe(struct platform_device *pdev)
+{
+	const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH;
+	struct dma_device *engine;
+	struct usb_dmac *dmac;
+	struct resource *mem;
+	unsigned int i;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->dev = &pdev->dev;
+	platform_set_drvdata(pdev, dmac);
+
+	ret = usb_dmac_parse_of(&pdev->dev, dmac);
+	if (ret < 0)
+		return ret;
+
+	dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+				      sizeof(*dmac->channels), GFP_KERNEL);
+	if (!dmac->channels)
+		return -ENOMEM;
+
+	/* Request resources. */
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(dmac->iomem))
+		return PTR_ERR(dmac->iomem);
+
+	/* Enable runtime PM and initialize the device. */
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+		goto error_pm;
+	}
+
+	ret = usb_dmac_init(dmac);
+
+	if (ret) {
+		dev_err(&pdev->dev, "failed to reset device\n");
+		goto error;
+	}
+
+	/* Initialize the channels. */
+	INIT_LIST_HEAD(&dmac->engine.channels);
+
+	for (i = 0; i < dmac->n_channels; ++i) {
+		ret = usb_dmac_chan_probe(dmac, &dmac->channels[i], i);
+		if (ret < 0)
+			goto error;
+	}
+
+	/* Register the DMAC as a DMA provider for DT. */
+	ret = of_dma_controller_register(pdev->dev.of_node, usb_dmac_of_xlate,
+					 NULL);
+	if (ret < 0)
+		goto error;
+
+	/*
+	 * Register the DMA engine device.
+	 *
+	 * Default transfer size of 32 bytes requires 32-byte alignment.
+	 */
+	engine = &dmac->engine;
+	dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+	engine->dev = &pdev->dev;
+
+	engine->src_addr_widths = widths;
+	engine->dst_addr_widths = widths;
+	engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	engine->device_alloc_chan_resources = usb_dmac_alloc_chan_resources;
+	engine->device_free_chan_resources = usb_dmac_free_chan_resources;
+	engine->device_prep_slave_sg = usb_dmac_prep_slave_sg;
+	engine->device_terminate_all = usb_dmac_chan_terminate_all;
+	engine->device_tx_status = usb_dmac_tx_status;
+	engine->device_issue_pending = usb_dmac_issue_pending;
+
+	ret = dma_async_device_register(engine);
+	if (ret < 0)
+		goto error;
+
+	pm_runtime_put(&pdev->dev);
+	return 0;
+
+error:
+	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_put(&pdev->dev);
+error_pm:
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static void usb_dmac_chan_remove(struct usb_dmac *dmac,
+				 struct usb_dmac_chan *uchan)
+{
+	usb_dmac_chan_halt(uchan);
+	devm_free_irq(dmac->dev, uchan->irq, uchan);
+}
+
+static int usb_dmac_remove(struct platform_device *pdev)
+{
+	struct usb_dmac *dmac = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < dmac->n_channels; ++i)
+		usb_dmac_chan_remove(dmac, &dmac->channels[i]);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&dmac->engine);
+
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static void usb_dmac_shutdown(struct platform_device *pdev)
+{
+	struct usb_dmac *dmac = platform_get_drvdata(pdev);
+
+	usb_dmac_stop(dmac);
+}
+
+static const struct of_device_id usb_dmac_of_ids[] = {
+	{ .compatible = "renesas,usb-dmac", },
+	{ /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, usb_dmac_of_ids);
+
+static struct platform_driver usb_dmac_driver = {
+	.driver		= {
+		.pm	= &usb_dmac_pm,
+		.name	= "usb-dmac",
+		.of_match_table = usb_dmac_of_ids,
+	},
+	.probe		= usb_dmac_probe,
+	.remove		= usb_dmac_remove,
+	.shutdown	= usb_dmac_shutdown,
+};
+
+module_platform_driver(usb_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas USB DMA Controller Driver");
+MODULE_AUTHOR("Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
new file mode 100644
index 0000000..a0733ac
--- /dev/null
+++ b/drivers/dma/sirf-dma.c
@@ -0,0 +1,1171 @@
+/*
+ * DMA controller driver for CSR SiRFprimaII
+ *
+ * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/pm_runtime.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+#include <linux/sirfsoc_dma.h>
+
+#include "dmaengine.h"
+
+#define SIRFSOC_DMA_VER_A7V1                    1
+#define SIRFSOC_DMA_VER_A7V2                    2
+#define SIRFSOC_DMA_VER_A6                      4
+
+#define SIRFSOC_DMA_DESCRIPTORS                 16
+#define SIRFSOC_DMA_CHANNELS                    16
+#define SIRFSOC_DMA_TABLE_NUM                   256
+
+#define SIRFSOC_DMA_CH_ADDR                     0x00
+#define SIRFSOC_DMA_CH_XLEN                     0x04
+#define SIRFSOC_DMA_CH_YLEN                     0x08
+#define SIRFSOC_DMA_CH_CTRL                     0x0C
+
+#define SIRFSOC_DMA_WIDTH_0                     0x100
+#define SIRFSOC_DMA_CH_VALID                    0x140
+#define SIRFSOC_DMA_CH_INT                      0x144
+#define SIRFSOC_DMA_INT_EN                      0x148
+#define SIRFSOC_DMA_INT_EN_CLR                  0x14C
+#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x154
+#define SIRFSOC_DMA_WIDTH_ATLAS7                0x10
+#define SIRFSOC_DMA_VALID_ATLAS7                0x14
+#define SIRFSOC_DMA_INT_ATLAS7                  0x18
+#define SIRFSOC_DMA_INT_EN_ATLAS7               0x1c
+#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7            0x20
+#define SIRFSOC_DMA_CUR_DATA_ADDR               0x34
+#define SIRFSOC_DMA_MUL_ATLAS7                  0x38
+#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7         0x158
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7     0x15C
+#define SIRFSOC_DMA_IOBG_SCMD_EN		0x800
+#define SIRFSOC_DMA_EARLY_RESP_SET		0x818
+#define SIRFSOC_DMA_EARLY_RESP_CLR		0x81C
+
+#define SIRFSOC_DMA_MODE_CTRL_BIT               4
+#define SIRFSOC_DMA_DIR_CTRL_BIT                5
+#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7        2
+#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7       3
+#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7         4
+#define SIRFSOC_DMA_TAB_NUM_ATLAS7              7
+#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7        5
+#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7     25
+#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT            32
+
+#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7         BIT(0)
+#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7          BIT(1)
+#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7          BIT(2)
+#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7         BIT(3)
+#define SIRFSOC_DMA_INT_INV_INT_ATLAS7          BIT(4)
+#define SIRFSOC_DMA_INT_END_INT_ATLAS7          BIT(5)
+#define SIRFSOC_DMA_INT_ALL_ATLAS7              0x3F
+
+/* xlen and dma_width register is in 4 bytes boundary */
+#define SIRFSOC_DMA_WORD_LEN			4
+#define SIRFSOC_DMA_XLEN_MAX_V1         0x800
+#define SIRFSOC_DMA_XLEN_MAX_V2         0x1000
+
+struct sirfsoc_dma_desc {
+	struct dma_async_tx_descriptor	desc;
+	struct list_head		node;
+
+	/* SiRFprimaII 2D-DMA parameters */
+
+	int             xlen;           /* DMA xlen */
+	int             ylen;           /* DMA ylen */
+	int             width;          /* DMA width */
+	int             dir;
+	bool            cyclic;         /* is loop DMA? */
+	bool            chain;          /* is chain DMA? */
+	u32             addr;		/* DMA buffer address */
+	u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */
+};
+
+struct sirfsoc_dma_chan {
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		queued;
+	struct list_head		active;
+	struct list_head		completed;
+	unsigned long			happened_cyclic;
+	unsigned long			completed_cyclic;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+
+	int				mode;
+};
+
+struct sirfsoc_dma_regs {
+	u32				ctrl[SIRFSOC_DMA_CHANNELS];
+	u32				interrupt_en;
+};
+
+struct sirfsoc_dma {
+	struct dma_device		dma;
+	struct tasklet_struct		tasklet;
+	struct sirfsoc_dma_chan		channels[SIRFSOC_DMA_CHANNELS];
+	void __iomem			*base;
+	int				irq;
+	struct clk			*clk;
+	int				type;
+	void (*exec_desc)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
+	struct sirfsoc_dma_regs		regs_save;
+};
+
+struct sirfsoc_dmadata {
+	void (*exec)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
+	int type;
+};
+
+enum sirfsoc_dma_chain_flag {
+	SIRFSOC_DMA_CHAIN_NORMAL = 0x01,
+	SIRFSOC_DMA_CHAIN_PAUSE = 0x02,
+	SIRFSOC_DMA_CHAIN_LOOP = 0x03,
+	SIRFSOC_DMA_CHAIN_END = 0x04
+};
+
+#define DRV_NAME	"sirfsoc_dma"
+
+static int sirfsoc_dma_runtime_suspend(struct device *dev);
+
+/* Convert struct dma_chan to struct sirfsoc_dma_chan */
+static inline
+struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sirfsoc_dma_chan, chan);
+}
+
+/* Convert struct dma_chan to struct sirfsoc_dma */
+static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
+	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
+}
+
+static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	if (sdesc->chain) {
+		/* DMA v2 HW chain mode */
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3,
+			       base + SIRFSOC_DMA_CH_CTRL);
+	} else {
+		/* DMA v2 legacy mode */
+		writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7);
+		writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)),
+				base + SIRFSOC_DMA_MUL_ATLAS7);
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       0x3, base + SIRFSOC_DMA_CH_CTRL);
+	}
+	writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 :
+		       (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 |
+			SIRFSOC_DMA_INT_LOOP_INT_ATLAS7),
+		       base + SIRFSOC_DMA_INT_EN_ATLAS7);
+	writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic)
+		writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+}
+
+static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN);
+	writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET);
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+	}
+
+}
+
+static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	}
+
+}
+
+/* Execute all queued DMA descriptors */
+static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	void __iomem *base;
+
+	/*
+	 * lock has been held by functions calling this, so we don't hold
+	 * lock again
+	 */
+	base = sdma->base;
+	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
+				 node);
+	/* Move the first queued descriptor to active list */
+	list_move_tail(&sdesc->node, &schan->active);
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
+	/* Start the DMA transfer */
+	sdma->exec_desc(sdesc, cid, schan->mode, base);
+
+	if (sdesc->cyclic)
+		schan->happened_cyclic = schan->completed_cyclic = 0;
+}
+
+/* Interrupt handler */
+static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
+{
+	struct sirfsoc_dma *sdma = data;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	u32 is;
+	bool chain;
+	int ch;
+	void __iomem *reg;
+
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A6:
+	case SIRFSOC_DMA_VER_A7V1:
+		is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+		reg = sdma->base + SIRFSOC_DMA_CH_INT;
+		while ((ch = fls(is) - 1) >= 0) {
+			is &= ~(1 << ch);
+			writel_relaxed(1 << ch, reg);
+			schan = &sdma->channels[ch];
+			spin_lock(&schan->lock);
+			sdesc = list_first_entry(&schan->active,
+						 struct sirfsoc_dma_desc, node);
+			if (!sdesc->cyclic) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			} else
+				schan->happened_cyclic++;
+			spin_unlock(&schan->lock);
+		}
+		break;
+
+	case SIRFSOC_DMA_VER_A7V2:
+		is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7);
+
+		reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7;
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg);
+		schan = &sdma->channels[0];
+		spin_lock(&schan->lock);
+		sdesc = list_first_entry(&schan->active,
+					 struct sirfsoc_dma_desc, node);
+		if (!sdesc->cyclic) {
+			chain = sdesc->chain;
+			if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) ||
+				(!chain &&
+				(is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			}
+		} else if (sdesc->cyclic && (is &
+					SIRFSOC_DMA_INT_LOOP_INT_ATLAS7))
+			schan->happened_cyclic++;
+
+		spin_unlock(&schan->lock);
+		break;
+
+	default:
+		break;
+	}
+
+	/* Schedule tasklet */
+	tasklet_schedule(&sdma->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/* process completed descriptors */
+static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dma_desc *sdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long flags;
+	unsigned long happened_cyclic;
+	LIST_HEAD(list);
+	int i;
+
+	for (i = 0; i < sdma->dma.chancnt; i++) {
+		schan = &sdma->channels[i];
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&schan->lock, flags);
+		if (!list_empty(&schan->completed)) {
+			list_splice_tail_init(&schan->completed, &list);
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			/* Execute callbacks and run dependencies */
+			list_for_each_entry(sdesc, &list, node) {
+				desc = &sdesc->desc;
+
+				dmaengine_desc_get_callback_invoke(desc, NULL);
+				last_cookie = desc->cookie;
+				dma_run_dependencies(desc);
+			}
+
+			/* Free descriptors */
+			spin_lock_irqsave(&schan->lock, flags);
+			list_splice_tail_init(&list, &schan->free);
+			schan->chan.completed_cookie = last_cookie;
+			spin_unlock_irqrestore(&schan->lock, flags);
+		} else {
+			if (list_empty(&schan->active)) {
+				spin_unlock_irqrestore(&schan->lock, flags);
+				continue;
+			}
+
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active,
+				struct sirfsoc_dma_desc, node);
+
+			/* cyclic DMA */
+			happened_cyclic = schan->happened_cyclic;
+			spin_unlock_irqrestore(&schan->lock, flags);
+
+			desc = &sdesc->desc;
+			while (happened_cyclic != schan->completed_cyclic) {
+				dmaengine_desc_get_callback_invoke(desc, NULL);
+				schan->completed_cyclic++;
+			}
+		}
+	}
+}
+
+/* DMA Tasklet */
+static void sirfsoc_dma_tasklet(unsigned long data)
+{
+	struct sirfsoc_dma *sdma = (void *)data;
+
+	sirfsoc_dma_process_completed(sdma);
+}
+
+/* Submit descriptor to hardware */
+static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Move descriptor to queue */
+	list_move_tail(&sdesc->node, &schan->queued);
+
+	cookie = dma_cookie_assign(txd);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return cookie;
+}
+
+static int sirfsoc_dma_slave_config(struct dma_chan *chan,
+				    struct dma_slave_config *config)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
+		(config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
+		return -EINVAL;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	schan->mode = (config->src_maxburst == 4 ? 1 : 0);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_terminate_all(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT);
+		writel_relaxed((1 << cid) | 1 << (cid + 16),
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7);
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7,
+			       sdma->base + SIRFSOC_DMA_INT_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+			       ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	default:
+		break;
+	}
+
+	list_splice_tail_init(&schan->active, &schan->free);
+	list_splice_tail_init(&schan->queued, &schan->free);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_pause_chan(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
+		writel_relaxed((1 << cid) | 1 << (cid + 16),
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+static int sirfsoc_dma_resume_chan(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
+	int cid = schan->chan.chan_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
+		writel_relaxed((1 << cid) | 1 << (cid + 16),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0x10001,
+			       sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) |
+			       ((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return 0;
+}
+
+/* Alloc channel resources */
+static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc;
+	unsigned long flags;
+	LIST_HEAD(descs);
+	int i;
+
+	pm_runtime_get_sync(sdma->dma.dev);
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
+		sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
+		if (!sdesc) {
+			dev_notice(sdma->dma.dev, "Memory allocation error. "
+				"Allocated only %u descriptors\n", i);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&sdesc->desc, chan);
+		sdesc->desc.flags = DMA_CTRL_ACK;
+		sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
+
+		list_add_tail(&sdesc->node, &descs);
+	}
+
+	/* Return error only if no descriptors were allocated */
+	if (i == 0)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	list_splice_tail_init(&descs, &schan->free);
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return i;
+}
+
+/* Free channel resources */
+static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_desc *sdesc, *tmp;
+	unsigned long flags;
+	LIST_HEAD(descs);
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&schan->prepared));
+	BUG_ON(!list_empty(&schan->queued));
+	BUG_ON(!list_empty(&schan->active));
+	BUG_ON(!list_empty(&schan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&schan->free, &descs);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(sdesc, tmp, &descs, node)
+		kfree(sdesc);
+
+	pm_runtime_put(sdma->dma.dev);
+}
+
+/* Send pending descriptor to hardware */
+static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active) && !list_empty(&schan->queued))
+		sirfsoc_dma_execute(schan);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+}
+
+/* Check request completion status */
+static enum dma_status
+sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+	struct dma_tx_state *txstate)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	unsigned long flags;
+	enum dma_status ret;
+	struct sirfsoc_dma_desc *sdesc;
+	int cid = schan->chan.chan_id;
+	unsigned long dma_pos;
+	unsigned long dma_request_bytes;
+	unsigned long residue;
+
+	spin_lock_irqsave(&schan->lock, flags);
+
+	if (list_empty(&schan->active)) {
+		ret = dma_cookie_status(chan, cookie, txstate);
+		dma_set_residue(txstate, 0);
+		spin_unlock_irqrestore(&schan->lock, flags);
+		return ret;
+	}
+	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node);
+	if (sdesc->cyclic)
+		dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
+			(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	else
+		dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR);
+	} else {
+		dma_pos = readl_relaxed(
+			sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2;
+	}
+
+	residue = dma_request_bytes - (dma_pos - sdesc->addr);
+	dma_set_residue(txstate, residue);
+
+	spin_unlock_irqrestore(&schan->lock, flags);
+
+	return ret;
+}
+
+static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+	int ret;
+
+	if ((xt->dir != DMA_MEM_TO_DEV) && (xt->dir != DMA_DEV_TO_MEM)) {
+		ret = -EINVAL;
+		goto err_dir;
+	}
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc) {
+		/* try to free completed descriptors */
+		sirfsoc_dma_process_completed(sdma);
+		ret = 0;
+		goto no_desc;
+	}
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+
+	/*
+	 * Number of chunks in a frame can only be 1 for prima2
+	 * and ylen (number of frame - 1) must be at least 0
+	 */
+	if ((xt->frame_size == 1) && (xt->numf > 0)) {
+		sdesc->cyclic = 0;
+		sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
+		sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
+				SIRFSOC_DMA_WORD_LEN;
+		sdesc->ylen = xt->numf - 1;
+		if (xt->dir == DMA_MEM_TO_DEV) {
+			sdesc->addr = xt->src_start;
+			sdesc->dir = 1;
+		} else {
+			sdesc->addr = xt->dst_start;
+			sdesc->dir = 0;
+		}
+
+		list_add_tail(&sdesc->node, &schan->prepared);
+	} else {
+		pr_err("sirfsoc DMA Invalid xfer\n");
+		ret = -EINVAL;
+		goto err_xfer;
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+err_xfer:
+	spin_unlock_irqrestore(&schan->lock, iflags);
+no_desc:
+err_dir:
+	return ERR_PTR(ret);
+}
+
+static struct dma_async_tx_descriptor *
+sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
+	size_t buf_len, size_t period_len,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
+	struct sirfsoc_dma_desc *sdesc = NULL;
+	unsigned long iflags;
+
+	/*
+	 * we only support cycle transfer with 2 period
+	 * If the X-length is set to 0, it would be the loop mode.
+	 * The DMA address keeps increasing until reaching the end of a loop
+	 * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
+	 * the DMA address goes back to the beginning of this area.
+	 * In loop mode, the DMA data region is divided into two parts, BUFA
+	 * and BUFB. DMA controller generates interrupts twice in each loop:
+	 * when the DMA address reaches the end of BUFA or the end of the
+	 * BUFB
+	 */
+	if (buf_len !=  2 * period_len)
+		return ERR_PTR(-EINVAL);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&schan->lock, iflags);
+	if (!list_empty(&schan->free)) {
+		sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
+			node);
+		list_del(&sdesc->node);
+	}
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	if (!sdesc)
+		return NULL;
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&schan->lock, iflags);
+	sdesc->addr = addr;
+	sdesc->cyclic = 1;
+	sdesc->xlen = 0;
+	sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
+	sdesc->width = 1;
+	list_add_tail(&sdesc->node, &schan->prepared);
+	spin_unlock_irqrestore(&schan->lock, iflags);
+
+	return &sdesc->desc;
+}
+
+/*
+ * The DMA controller consists of 16 independent DMA channels.
+ * Each channel is allocated to a different function
+ */
+bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
+{
+	unsigned int ch_nr = (unsigned int) chan_id;
+
+	if (ch_nr == chan->chan_id +
+		chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(sirfsoc_dma_filter_id);
+
+#define SIRFSOC_DMA_BUSWIDTHS \
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+static struct dma_chan *of_dma_sirfsoc_xlate(struct of_phandle_args *dma_spec,
+	struct of_dma *ofdma)
+{
+	struct sirfsoc_dma *sdma = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+
+	if (request >= SIRFSOC_DMA_CHANNELS)
+		return NULL;
+
+	return dma_get_slave_channel(&sdma->channels[request].chan);
+}
+
+static int sirfsoc_dma_probe(struct platform_device *op)
+{
+	struct device_node *dn = op->dev.of_node;
+	struct device *dev = &op->dev;
+	struct dma_device *dma;
+	struct sirfsoc_dma *sdma;
+	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dmadata *data;
+	struct resource res;
+	ulong regs_start, regs_size;
+	u32 id;
+	int ret, i;
+
+	sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
+	if (!sdma)
+		return -ENOMEM;
+
+	data = (struct sirfsoc_dmadata *)
+		(of_match_device(op->dev.driver->of_match_table,
+				 &op->dev)->data);
+	sdma->exec_desc = data->exec;
+	sdma->type = data->type;
+
+	if (of_property_read_u32(dn, "cell-index", &id)) {
+		dev_err(dev, "Fail to get DMAC index\n");
+		return -ENODEV;
+	}
+
+	sdma->irq = irq_of_parse_and_map(dn, 0);
+	if (!sdma->irq) {
+		dev_err(dev, "Error mapping IRQ!\n");
+		return -EINVAL;
+	}
+
+	sdma->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(sdma->clk)) {
+		dev_err(dev, "failed to get a clock.\n");
+		return PTR_ERR(sdma->clk);
+	}
+
+	ret = of_address_to_resource(dn, 0, &res);
+	if (ret) {
+		dev_err(dev, "Error parsing memory region!\n");
+		goto irq_dispose;
+	}
+
+	regs_start = res.start;
+	regs_size = resource_size(&res);
+
+	sdma->base = devm_ioremap(dev, regs_start, regs_size);
+	if (!sdma->base) {
+		dev_err(dev, "Error mapping memory region!\n");
+		ret = -ENOMEM;
+		goto irq_dispose;
+	}
+
+	ret = request_irq(sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME, sdma);
+	if (ret) {
+		dev_err(dev, "Error requesting IRQ!\n");
+		ret = -EINVAL;
+		goto irq_dispose;
+	}
+
+	dma = &sdma->dma;
+	dma->dev = dev;
+
+	dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
+	dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
+	dma->device_issue_pending = sirfsoc_dma_issue_pending;
+	dma->device_config = sirfsoc_dma_slave_config;
+	dma->device_pause = sirfsoc_dma_pause_chan;
+	dma->device_resume = sirfsoc_dma_resume_chan;
+	dma->device_terminate_all = sirfsoc_dma_terminate_all;
+	dma->device_tx_status = sirfsoc_dma_tx_status;
+	dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
+	dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
+	dma->src_addr_widths = SIRFSOC_DMA_BUSWIDTHS;
+	dma->dst_addr_widths = SIRFSOC_DMA_BUSWIDTHS;
+	dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma_cap_set(DMA_SLAVE, dma->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	for (i = 0; i < SIRFSOC_DMA_CHANNELS; i++) {
+		schan = &sdma->channels[i];
+
+		schan->chan.device = dma;
+		dma_cookie_init(&schan->chan);
+
+		INIT_LIST_HEAD(&schan->free);
+		INIT_LIST_HEAD(&schan->prepared);
+		INIT_LIST_HEAD(&schan->queued);
+		INIT_LIST_HEAD(&schan->active);
+		INIT_LIST_HEAD(&schan->completed);
+
+		spin_lock_init(&schan->lock);
+		list_add_tail(&schan->chan.device_node, &dma->channels);
+	}
+
+	tasklet_init(&sdma->tasklet, sirfsoc_dma_tasklet, (unsigned long)sdma);
+
+	/* Register DMA engine */
+	dev_set_drvdata(dev, sdma);
+
+	ret = dma_async_device_register(dma);
+	if (ret)
+		goto free_irq;
+
+	/* Device-tree DMA controller registration */
+	ret = of_dma_controller_register(dn, of_dma_sirfsoc_xlate, sdma);
+	if (ret) {
+		dev_err(dev, "failed to register DMA controller\n");
+		goto unreg_dma_dev;
+	}
+
+	pm_runtime_enable(&op->dev);
+	dev_info(dev, "initialized SIRFSOC DMAC driver\n");
+
+	return 0;
+
+unreg_dma_dev:
+	dma_async_device_unregister(dma);
+free_irq:
+	free_irq(sdma->irq, sdma);
+irq_dispose:
+	irq_dispose_mapping(sdma->irq);
+	return ret;
+}
+
+static int sirfsoc_dma_remove(struct platform_device *op)
+{
+	struct device *dev = &op->dev;
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	of_dma_controller_free(op->dev.of_node);
+	dma_async_device_unregister(&sdma->dma);
+	free_irq(sdma->irq, sdma);
+	tasklet_kill(&sdma->tasklet);
+	irq_dispose_mapping(sdma->irq);
+	pm_runtime_disable(&op->dev);
+	if (!pm_runtime_status_suspended(&op->dev))
+		sirfsoc_dma_runtime_suspend(&op->dev);
+
+	return 0;
+}
+
+static int __maybe_unused sirfsoc_dma_runtime_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(sdma->clk);
+	return 0;
+}
+
+static int __maybe_unused sirfsoc_dma_runtime_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(sdma->clk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+	return 0;
+}
+
+static int __maybe_unused sirfsoc_dma_pm_suspend(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+	int count;
+	u32 int_offset;
+
+	/*
+	 * if we were runtime-suspended before, resume to enable clock
+	 * before accessing register
+	 */
+	if (pm_runtime_status_suspended(dev)) {
+		ret = sirfsoc_dma_runtime_resume(dev);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+	}
+
+	/*
+	 * DMA controller will lose all registers while suspending
+	 * so we need to save registers for active channels
+	 */
+	for (ch = 0; ch < count; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		save->ctrl[ch] = readl_relaxed(sdma->base +
+			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	}
+	save->interrupt_en = readl_relaxed(sdma->base + int_offset);
+
+	/* Disable clock */
+	sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static int __maybe_unused sirfsoc_dma_pm_resume(struct device *dev)
+{
+	struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
+	struct sirfsoc_dma_regs *save = &sdma->regs_save;
+	struct sirfsoc_dma_desc *sdesc;
+	struct sirfsoc_dma_chan *schan;
+	int ch;
+	int ret;
+	int count;
+	u32 int_offset;
+	u32 width_offset;
+
+	/* Enable clock before accessing register */
+	ret = sirfsoc_dma_runtime_resume(dev);
+	if (ret < 0)
+		return ret;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+		width_offset = SIRFSOC_DMA_WIDTH_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+		width_offset = SIRFSOC_DMA_WIDTH_0;
+	}
+
+	writel_relaxed(save->interrupt_en, sdma->base + int_offset);
+	for (ch = 0; ch < count; ch++) {
+		schan = &sdma->channels[ch];
+		if (list_empty(&schan->active))
+			continue;
+		sdesc = list_first_entry(&schan->active,
+			struct sirfsoc_dma_desc,
+			node);
+		writel_relaxed(sdesc->width,
+			sdma->base + width_offset + ch * 4);
+		writel_relaxed(sdesc->xlen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen,
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(save->ctrl[ch],
+			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
+		if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+			writel_relaxed(sdesc->addr,
+				sdma->base + SIRFSOC_DMA_CH_ADDR);
+		} else {
+			writel_relaxed(sdesc->addr >> 2,
+				sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+		}
+	}
+
+	/* if we were runtime-suspended before, suspend again */
+	if (pm_runtime_status_suspended(dev))
+		sirfsoc_dma_runtime_suspend(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
+};
+
+static struct sirfsoc_dmadata sirfsoc_dmadata_a6 = {
+	.exec = sirfsoc_dma_execute_hw_a6,
+	.type = SIRFSOC_DMA_VER_A6,
+};
+
+static struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = {
+	.exec = sirfsoc_dma_execute_hw_a7v1,
+	.type = SIRFSOC_DMA_VER_A7V1,
+};
+
+static struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = {
+	.exec = sirfsoc_dma_execute_hw_a7v2,
+	.type = SIRFSOC_DMA_VER_A7V2,
+};
+
+static const struct of_device_id sirfsoc_dma_match[] = {
+	{ .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,},
+	{ .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,},
+	{ .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,},
+	{},
+};
+MODULE_DEVICE_TABLE(of, sirfsoc_dma_match);
+
+static struct platform_driver sirfsoc_dma_driver = {
+	.probe		= sirfsoc_dma_probe,
+	.remove		= sirfsoc_dma_remove,
+	.driver = {
+		.name = DRV_NAME,
+		.pm = &sirfsoc_dma_pm_ops,
+		.of_match_table	= sirfsoc_dma_match,
+	},
+};
+
+static __init int sirfsoc_dma_init(void)
+{
+	return platform_driver_register(&sirfsoc_dma_driver);
+}
+
+static void __exit sirfsoc_dma_exit(void)
+{
+	platform_driver_unregister(&sirfsoc_dma_driver);
+}
+
+subsys_initcall(sirfsoc_dma_init);
+module_exit(sirfsoc_dma_exit);
+
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
+MODULE_DESCRIPTION("SIRFSOC DMA control driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
new file mode 100644
index 0000000..55df0d4
--- /dev/null
+++ b/drivers/dma/sprd-dma.c
@@ -0,0 +1,1045 @@
+/*
+ * Copyright (C) 2017 Spreadtrum Communications Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma/sprd-dma.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define SPRD_DMA_CHN_REG_OFFSET		0x1000
+#define SPRD_DMA_CHN_REG_LENGTH		0x40
+#define SPRD_DMA_MEMCPY_MIN_SIZE	64
+
+/* DMA global registers definition */
+#define SPRD_DMA_GLB_PAUSE		0x0
+#define SPRD_DMA_GLB_FRAG_WAIT		0x4
+#define SPRD_DMA_GLB_REQ_PEND0_EN	0x8
+#define SPRD_DMA_GLB_REQ_PEND1_EN	0xc
+#define SPRD_DMA_GLB_INT_RAW_STS	0x10
+#define SPRD_DMA_GLB_INT_MSK_STS	0x14
+#define SPRD_DMA_GLB_REQ_STS		0x18
+#define SPRD_DMA_GLB_CHN_EN_STS		0x1c
+#define SPRD_DMA_GLB_DEBUG_STS		0x20
+#define SPRD_DMA_GLB_ARB_SEL_STS	0x24
+#define SPRD_DMA_GLB_REQ_UID(uid)	(0x4 * ((uid) - 1))
+#define SPRD_DMA_GLB_REQ_UID_OFFSET	0x2000
+
+/* DMA channel registers definition */
+#define SPRD_DMA_CHN_PAUSE		0x0
+#define SPRD_DMA_CHN_REQ		0x4
+#define SPRD_DMA_CHN_CFG		0x8
+#define SPRD_DMA_CHN_INTC		0xc
+#define SPRD_DMA_CHN_SRC_ADDR		0x10
+#define SPRD_DMA_CHN_DES_ADDR		0x14
+#define SPRD_DMA_CHN_FRG_LEN		0x18
+#define SPRD_DMA_CHN_BLK_LEN		0x1c
+#define SPRD_DMA_CHN_TRSC_LEN		0x20
+#define SPRD_DMA_CHN_TRSF_STEP		0x24
+#define SPRD_DMA_CHN_WARP_PTR		0x28
+#define SPRD_DMA_CHN_WARP_TO		0x2c
+#define SPRD_DMA_CHN_LLIST_PTR		0x30
+#define SPRD_DMA_CHN_FRAG_STEP		0x34
+#define SPRD_DMA_CHN_SRC_BLK_STEP	0x38
+#define SPRD_DMA_CHN_DES_BLK_STEP	0x3c
+
+/* SPRD_DMA_CHN_INTC register definition */
+#define SPRD_DMA_INT_MASK		GENMASK(4, 0)
+#define SPRD_DMA_INT_CLR_OFFSET		24
+#define SPRD_DMA_FRAG_INT_EN		BIT(0)
+#define SPRD_DMA_BLK_INT_EN		BIT(1)
+#define SPRD_DMA_TRANS_INT_EN		BIT(2)
+#define SPRD_DMA_LIST_INT_EN		BIT(3)
+#define SPRD_DMA_CFG_ERR_INT_EN		BIT(4)
+
+/* SPRD_DMA_CHN_CFG register definition */
+#define SPRD_DMA_CHN_EN			BIT(0)
+#define SPRD_DMA_WAIT_BDONE_OFFSET	24
+#define SPRD_DMA_DONOT_WAIT_BDONE	1
+
+/* SPRD_DMA_CHN_REQ register definition */
+#define SPRD_DMA_REQ_EN			BIT(0)
+
+/* SPRD_DMA_CHN_PAUSE register definition */
+#define SPRD_DMA_PAUSE_EN		BIT(0)
+#define SPRD_DMA_PAUSE_STS		BIT(2)
+#define SPRD_DMA_PAUSE_CNT		0x2000
+
+/* DMA_CHN_WARP_* register definition */
+#define SPRD_DMA_HIGH_ADDR_MASK		GENMASK(31, 28)
+#define SPRD_DMA_LOW_ADDR_MASK		GENMASK(31, 0)
+#define SPRD_DMA_HIGH_ADDR_OFFSET	4
+
+/* SPRD_DMA_CHN_INTC register definition */
+#define SPRD_DMA_FRAG_INT_STS		BIT(16)
+#define SPRD_DMA_BLK_INT_STS		BIT(17)
+#define SPRD_DMA_TRSC_INT_STS		BIT(18)
+#define SPRD_DMA_LIST_INT_STS		BIT(19)
+#define SPRD_DMA_CFGERR_INT_STS		BIT(20)
+#define SPRD_DMA_CHN_INT_STS					\
+	(SPRD_DMA_FRAG_INT_STS | SPRD_DMA_BLK_INT_STS |		\
+	 SPRD_DMA_TRSC_INT_STS | SPRD_DMA_LIST_INT_STS |	\
+	 SPRD_DMA_CFGERR_INT_STS)
+
+/* SPRD_DMA_CHN_FRG_LEN register definition */
+#define SPRD_DMA_SRC_DATAWIDTH_OFFSET	30
+#define SPRD_DMA_DES_DATAWIDTH_OFFSET	28
+#define SPRD_DMA_SWT_MODE_OFFSET	26
+#define SPRD_DMA_REQ_MODE_OFFSET	24
+#define SPRD_DMA_REQ_MODE_MASK		GENMASK(1, 0)
+#define SPRD_DMA_FIX_SEL_OFFSET		21
+#define SPRD_DMA_FIX_EN_OFFSET		20
+#define SPRD_DMA_LLIST_END_OFFSET	19
+#define SPRD_DMA_FRG_LEN_MASK		GENMASK(16, 0)
+
+/* SPRD_DMA_CHN_BLK_LEN register definition */
+#define SPRD_DMA_BLK_LEN_MASK		GENMASK(16, 0)
+
+/* SPRD_DMA_CHN_TRSC_LEN register definition */
+#define SPRD_DMA_TRSC_LEN_MASK		GENMASK(27, 0)
+
+/* SPRD_DMA_CHN_TRSF_STEP register definition */
+#define SPRD_DMA_DEST_TRSF_STEP_OFFSET	16
+#define SPRD_DMA_SRC_TRSF_STEP_OFFSET	0
+#define SPRD_DMA_TRSF_STEP_MASK		GENMASK(15, 0)
+
+/* define the DMA transfer step type */
+#define SPRD_DMA_NONE_STEP		0
+#define SPRD_DMA_BYTE_STEP		1
+#define SPRD_DMA_SHORT_STEP		2
+#define SPRD_DMA_WORD_STEP		4
+#define SPRD_DMA_DWORD_STEP		8
+
+#define SPRD_DMA_SOFTWARE_UID		0
+
+/* dma data width values */
+enum sprd_dma_datawidth {
+	SPRD_DMA_DATAWIDTH_1_BYTE,
+	SPRD_DMA_DATAWIDTH_2_BYTES,
+	SPRD_DMA_DATAWIDTH_4_BYTES,
+	SPRD_DMA_DATAWIDTH_8_BYTES,
+};
+
+/* dma channel hardware configuration */
+struct sprd_dma_chn_hw {
+	u32 pause;
+	u32 req;
+	u32 cfg;
+	u32 intc;
+	u32 src_addr;
+	u32 des_addr;
+	u32 frg_len;
+	u32 blk_len;
+	u32 trsc_len;
+	u32 trsf_step;
+	u32 wrap_ptr;
+	u32 wrap_to;
+	u32 llist_ptr;
+	u32 frg_step;
+	u32 src_blk_step;
+	u32 des_blk_step;
+};
+
+/* dma request description */
+struct sprd_dma_desc {
+	struct virt_dma_desc	vd;
+	struct sprd_dma_chn_hw	chn_hw;
+};
+
+/* dma channel description */
+struct sprd_dma_chn {
+	struct virt_dma_chan	vc;
+	void __iomem		*chn_base;
+	struct dma_slave_config	slave_cfg;
+	u32			chn_num;
+	u32			dev_id;
+	struct sprd_dma_desc	*cur_desc;
+};
+
+/* SPRD dma device */
+struct sprd_dma_dev {
+	struct dma_device	dma_dev;
+	void __iomem		*glb_base;
+	struct clk		*clk;
+	struct clk		*ashb_clk;
+	int			irq;
+	u32			total_chns;
+	struct sprd_dma_chn	channels[0];
+};
+
+static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param);
+static struct of_dma_filter_info sprd_dma_info = {
+	.filter_fn = sprd_dma_filter_fn,
+};
+
+static inline struct sprd_dma_chn *to_sprd_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct sprd_dma_chn, vc.chan);
+}
+
+static inline struct sprd_dma_dev *to_sprd_dma_dev(struct dma_chan *c)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(c);
+
+	return container_of(schan, struct sprd_dma_dev, channels[c->chan_id]);
+}
+
+static inline struct sprd_dma_desc *to_sprd_dma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct sprd_dma_desc, vd);
+}
+
+static void sprd_dma_chn_update(struct sprd_dma_chn *schan, u32 reg,
+				u32 mask, u32 val)
+{
+	u32 orig = readl(schan->chn_base + reg);
+	u32 tmp;
+
+	tmp = (orig & ~mask) | val;
+	writel(tmp, schan->chn_base + reg);
+}
+
+static int sprd_dma_enable(struct sprd_dma_dev *sdev)
+{
+	int ret;
+
+	ret = clk_prepare_enable(sdev->clk);
+	if (ret)
+		return ret;
+
+	/*
+	 * The ashb_clk is optional and only for AGCP DMA controller, so we
+	 * need add one condition to check if the ashb_clk need enable.
+	 */
+	if (!IS_ERR(sdev->ashb_clk))
+		ret = clk_prepare_enable(sdev->ashb_clk);
+
+	return ret;
+}
+
+static void sprd_dma_disable(struct sprd_dma_dev *sdev)
+{
+	clk_disable_unprepare(sdev->clk);
+
+	/*
+	 * Need to check if we need disable the optional ashb_clk for AGCP DMA.
+	 */
+	if (!IS_ERR(sdev->ashb_clk))
+		clk_disable_unprepare(sdev->ashb_clk);
+}
+
+static void sprd_dma_set_uid(struct sprd_dma_chn *schan)
+{
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+	u32 dev_id = schan->dev_id;
+
+	if (dev_id != SPRD_DMA_SOFTWARE_UID) {
+		u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET +
+				 SPRD_DMA_GLB_REQ_UID(dev_id);
+
+		writel(schan->chn_num + 1, sdev->glb_base + uid_offset);
+	}
+}
+
+static void sprd_dma_unset_uid(struct sprd_dma_chn *schan)
+{
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+	u32 dev_id = schan->dev_id;
+
+	if (dev_id != SPRD_DMA_SOFTWARE_UID) {
+		u32 uid_offset = SPRD_DMA_GLB_REQ_UID_OFFSET +
+				 SPRD_DMA_GLB_REQ_UID(dev_id);
+
+		writel(0, sdev->glb_base + uid_offset);
+	}
+}
+
+static void sprd_dma_clear_int(struct sprd_dma_chn *schan)
+{
+	sprd_dma_chn_update(schan, SPRD_DMA_CHN_INTC,
+			    SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET,
+			    SPRD_DMA_INT_MASK << SPRD_DMA_INT_CLR_OFFSET);
+}
+
+static void sprd_dma_enable_chn(struct sprd_dma_chn *schan)
+{
+	sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN,
+			    SPRD_DMA_CHN_EN);
+}
+
+static void sprd_dma_disable_chn(struct sprd_dma_chn *schan)
+{
+	sprd_dma_chn_update(schan, SPRD_DMA_CHN_CFG, SPRD_DMA_CHN_EN, 0);
+}
+
+static void sprd_dma_soft_request(struct sprd_dma_chn *schan)
+{
+	sprd_dma_chn_update(schan, SPRD_DMA_CHN_REQ, SPRD_DMA_REQ_EN,
+			    SPRD_DMA_REQ_EN);
+}
+
+static void sprd_dma_pause_resume(struct sprd_dma_chn *schan, bool enable)
+{
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+	u32 pause, timeout = SPRD_DMA_PAUSE_CNT;
+
+	if (enable) {
+		sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE,
+				    SPRD_DMA_PAUSE_EN, SPRD_DMA_PAUSE_EN);
+
+		do {
+			pause = readl(schan->chn_base + SPRD_DMA_CHN_PAUSE);
+			if (pause & SPRD_DMA_PAUSE_STS)
+				break;
+
+			cpu_relax();
+		} while (--timeout > 0);
+
+		if (!timeout)
+			dev_warn(sdev->dma_dev.dev,
+				 "pause dma controller timeout\n");
+	} else {
+		sprd_dma_chn_update(schan, SPRD_DMA_CHN_PAUSE,
+				    SPRD_DMA_PAUSE_EN, 0);
+	}
+}
+
+static void sprd_dma_stop_and_disable(struct sprd_dma_chn *schan)
+{
+	u32 cfg = readl(schan->chn_base + SPRD_DMA_CHN_CFG);
+
+	if (!(cfg & SPRD_DMA_CHN_EN))
+		return;
+
+	sprd_dma_pause_resume(schan, true);
+	sprd_dma_disable_chn(schan);
+}
+
+static unsigned long sprd_dma_get_dst_addr(struct sprd_dma_chn *schan)
+{
+	unsigned long addr, addr_high;
+
+	addr = readl(schan->chn_base + SPRD_DMA_CHN_DES_ADDR);
+	addr_high = readl(schan->chn_base + SPRD_DMA_CHN_WARP_TO) &
+		    SPRD_DMA_HIGH_ADDR_MASK;
+
+	return addr | (addr_high << SPRD_DMA_HIGH_ADDR_OFFSET);
+}
+
+static enum sprd_dma_int_type sprd_dma_get_int_type(struct sprd_dma_chn *schan)
+{
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+	u32 intc_sts = readl(schan->chn_base + SPRD_DMA_CHN_INTC) &
+		       SPRD_DMA_CHN_INT_STS;
+
+	switch (intc_sts) {
+	case SPRD_DMA_CFGERR_INT_STS:
+		return SPRD_DMA_CFGERR_INT;
+
+	case SPRD_DMA_LIST_INT_STS:
+		return SPRD_DMA_LIST_INT;
+
+	case SPRD_DMA_TRSC_INT_STS:
+		return SPRD_DMA_TRANS_INT;
+
+	case SPRD_DMA_BLK_INT_STS:
+		return SPRD_DMA_BLK_INT;
+
+	case SPRD_DMA_FRAG_INT_STS:
+		return SPRD_DMA_FRAG_INT;
+
+	default:
+		dev_warn(sdev->dma_dev.dev, "incorrect dma interrupt type\n");
+		return SPRD_DMA_NO_INT;
+	}
+}
+
+static enum sprd_dma_req_mode sprd_dma_get_req_type(struct sprd_dma_chn *schan)
+{
+	u32 frag_reg = readl(schan->chn_base + SPRD_DMA_CHN_FRG_LEN);
+
+	return (frag_reg >> SPRD_DMA_REQ_MODE_OFFSET) & SPRD_DMA_REQ_MODE_MASK;
+}
+
+static void sprd_dma_set_chn_config(struct sprd_dma_chn *schan,
+				    struct sprd_dma_desc *sdesc)
+{
+	struct sprd_dma_chn_hw *cfg = &sdesc->chn_hw;
+
+	writel(cfg->pause, schan->chn_base + SPRD_DMA_CHN_PAUSE);
+	writel(cfg->cfg, schan->chn_base + SPRD_DMA_CHN_CFG);
+	writel(cfg->intc, schan->chn_base + SPRD_DMA_CHN_INTC);
+	writel(cfg->src_addr, schan->chn_base + SPRD_DMA_CHN_SRC_ADDR);
+	writel(cfg->des_addr, schan->chn_base + SPRD_DMA_CHN_DES_ADDR);
+	writel(cfg->frg_len, schan->chn_base + SPRD_DMA_CHN_FRG_LEN);
+	writel(cfg->blk_len, schan->chn_base + SPRD_DMA_CHN_BLK_LEN);
+	writel(cfg->trsc_len, schan->chn_base + SPRD_DMA_CHN_TRSC_LEN);
+	writel(cfg->trsf_step, schan->chn_base + SPRD_DMA_CHN_TRSF_STEP);
+	writel(cfg->wrap_ptr, schan->chn_base + SPRD_DMA_CHN_WARP_PTR);
+	writel(cfg->wrap_to, schan->chn_base + SPRD_DMA_CHN_WARP_TO);
+	writel(cfg->llist_ptr, schan->chn_base + SPRD_DMA_CHN_LLIST_PTR);
+	writel(cfg->frg_step, schan->chn_base + SPRD_DMA_CHN_FRAG_STEP);
+	writel(cfg->src_blk_step, schan->chn_base + SPRD_DMA_CHN_SRC_BLK_STEP);
+	writel(cfg->des_blk_step, schan->chn_base + SPRD_DMA_CHN_DES_BLK_STEP);
+	writel(cfg->req, schan->chn_base + SPRD_DMA_CHN_REQ);
+}
+
+static void sprd_dma_start(struct sprd_dma_chn *schan)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&schan->vc);
+
+	if (!vd)
+		return;
+
+	list_del(&vd->node);
+	schan->cur_desc = to_sprd_dma_desc(vd);
+
+	/*
+	 * Copy the DMA configuration from DMA descriptor to this hardware
+	 * channel.
+	 */
+	sprd_dma_set_chn_config(schan, schan->cur_desc);
+	sprd_dma_set_uid(schan);
+	sprd_dma_enable_chn(schan);
+
+	if (schan->dev_id == SPRD_DMA_SOFTWARE_UID)
+		sprd_dma_soft_request(schan);
+}
+
+static void sprd_dma_stop(struct sprd_dma_chn *schan)
+{
+	sprd_dma_stop_and_disable(schan);
+	sprd_dma_unset_uid(schan);
+	sprd_dma_clear_int(schan);
+}
+
+static bool sprd_dma_check_trans_done(struct sprd_dma_desc *sdesc,
+				      enum sprd_dma_int_type int_type,
+				      enum sprd_dma_req_mode req_mode)
+{
+	if (int_type == SPRD_DMA_NO_INT)
+		return false;
+
+	if (int_type >= req_mode + 1)
+		return true;
+	else
+		return false;
+}
+
+static irqreturn_t dma_irq_handle(int irq, void *dev_id)
+{
+	struct sprd_dma_dev *sdev = (struct sprd_dma_dev *)dev_id;
+	u32 irq_status = readl(sdev->glb_base + SPRD_DMA_GLB_INT_MSK_STS);
+	struct sprd_dma_chn *schan;
+	struct sprd_dma_desc *sdesc;
+	enum sprd_dma_req_mode req_type;
+	enum sprd_dma_int_type int_type;
+	bool trans_done = false;
+	u32 i;
+
+	while (irq_status) {
+		i = __ffs(irq_status);
+		irq_status &= (irq_status - 1);
+		schan = &sdev->channels[i];
+
+		spin_lock(&schan->vc.lock);
+		int_type = sprd_dma_get_int_type(schan);
+		req_type = sprd_dma_get_req_type(schan);
+		sprd_dma_clear_int(schan);
+
+		sdesc = schan->cur_desc;
+
+		/* Check if the dma request descriptor is done. */
+		trans_done = sprd_dma_check_trans_done(sdesc, int_type,
+						       req_type);
+		if (trans_done == true) {
+			vchan_cookie_complete(&sdesc->vd);
+			schan->cur_desc = NULL;
+			sprd_dma_start(schan);
+		}
+		spin_unlock(&schan->vc.lock);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	int ret;
+
+	ret = pm_runtime_get_sync(chan->device->dev);
+	if (ret < 0)
+		return ret;
+
+	schan->dev_id = SPRD_DMA_SOFTWARE_UID;
+	return 0;
+}
+
+static void sprd_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	sprd_dma_stop(schan);
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+	vchan_free_chan_resources(&schan->vc);
+	pm_runtime_put(chan->device->dev);
+}
+
+static enum dma_status sprd_dma_tx_status(struct dma_chan *chan,
+					  dma_cookie_t cookie,
+					  struct dma_tx_state *txstate)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	u32 pos;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	vd = vchan_find_desc(&schan->vc, cookie);
+	if (vd) {
+		struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd);
+		struct sprd_dma_chn_hw *hw = &sdesc->chn_hw;
+
+		if (hw->trsc_len > 0)
+			pos = hw->trsc_len;
+		else if (hw->blk_len > 0)
+			pos = hw->blk_len;
+		else if (hw->frg_len > 0)
+			pos = hw->frg_len;
+		else
+			pos = 0;
+	} else if (schan->cur_desc && schan->cur_desc->vd.tx.cookie == cookie) {
+		pos = sprd_dma_get_dst_addr(schan);
+	} else {
+		pos = 0;
+	}
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+	dma_set_residue(txstate, pos);
+	return ret;
+}
+
+static void sprd_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	if (vchan_issue_pending(&schan->vc) && !schan->cur_desc)
+		sprd_dma_start(schan);
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+}
+
+static int sprd_dma_get_datawidth(enum dma_slave_buswidth buswidth)
+{
+	switch (buswidth) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return ffs(buswidth) - 1;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int sprd_dma_get_step(enum dma_slave_buswidth buswidth)
+{
+	switch (buswidth) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return buswidth;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static int sprd_dma_fill_desc(struct dma_chan *chan,
+			      struct sprd_dma_desc *sdesc,
+			      dma_addr_t src, dma_addr_t dst, u32 len,
+			      enum dma_transfer_direction dir,
+			      unsigned long flags,
+			      struct dma_slave_config *slave_cfg)
+{
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(chan);
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct sprd_dma_chn_hw *hw = &sdesc->chn_hw;
+	u32 req_mode = (flags >> SPRD_DMA_REQ_SHIFT) & SPRD_DMA_REQ_MODE_MASK;
+	u32 int_mode = flags & SPRD_DMA_INT_MASK;
+	int src_datawidth, dst_datawidth, src_step, dst_step;
+	u32 temp, fix_mode = 0, fix_en = 0;
+
+	if (dir == DMA_MEM_TO_DEV) {
+		src_step = sprd_dma_get_step(slave_cfg->src_addr_width);
+		if (src_step < 0) {
+			dev_err(sdev->dma_dev.dev, "invalid source step\n");
+			return src_step;
+		}
+		dst_step = SPRD_DMA_NONE_STEP;
+	} else {
+		dst_step = sprd_dma_get_step(slave_cfg->dst_addr_width);
+		if (dst_step < 0) {
+			dev_err(sdev->dma_dev.dev, "invalid destination step\n");
+			return dst_step;
+		}
+		src_step = SPRD_DMA_NONE_STEP;
+	}
+
+	src_datawidth = sprd_dma_get_datawidth(slave_cfg->src_addr_width);
+	if (src_datawidth < 0) {
+		dev_err(sdev->dma_dev.dev, "invalid source datawidth\n");
+		return src_datawidth;
+	}
+
+	dst_datawidth = sprd_dma_get_datawidth(slave_cfg->dst_addr_width);
+	if (dst_datawidth < 0) {
+		dev_err(sdev->dma_dev.dev, "invalid destination datawidth\n");
+		return dst_datawidth;
+	}
+
+	if (slave_cfg->slave_id)
+		schan->dev_id = slave_cfg->slave_id;
+
+	hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET;
+
+	/*
+	 * wrap_ptr and wrap_to will save the high 4 bits source address and
+	 * destination address.
+	 */
+	hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK;
+	hw->wrap_to = (dst >> SPRD_DMA_HIGH_ADDR_OFFSET) & SPRD_DMA_HIGH_ADDR_MASK;
+	hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK;
+	hw->des_addr = dst & SPRD_DMA_LOW_ADDR_MASK;
+
+	/*
+	 * If the src step and dst step both are 0 or both are not 0, that means
+	 * we can not enable the fix mode. If one is 0 and another one is not,
+	 * we can enable the fix mode.
+	 */
+	if ((src_step != 0 && dst_step != 0) || (src_step | dst_step) == 0) {
+		fix_en = 0;
+	} else {
+		fix_en = 1;
+		if (src_step)
+			fix_mode = 1;
+		else
+			fix_mode = 0;
+	}
+
+	hw->intc = int_mode | SPRD_DMA_CFG_ERR_INT_EN;
+
+	temp = src_datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET;
+	temp |= dst_datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET;
+	temp |= req_mode << SPRD_DMA_REQ_MODE_OFFSET;
+	temp |= fix_mode << SPRD_DMA_FIX_SEL_OFFSET;
+	temp |= fix_en << SPRD_DMA_FIX_EN_OFFSET;
+	temp |= slave_cfg->src_maxburst & SPRD_DMA_FRG_LEN_MASK;
+	hw->frg_len = temp;
+
+	hw->blk_len = len & SPRD_DMA_BLK_LEN_MASK;
+	hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK;
+
+	temp = (dst_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET;
+	temp |= (src_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET;
+	hw->trsf_step = temp;
+
+	hw->frg_step = 0;
+	hw->src_blk_step = 0;
+	hw->des_blk_step = 0;
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+sprd_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+			 size_t len, unsigned long flags)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct sprd_dma_desc *sdesc;
+	struct sprd_dma_chn_hw *hw;
+	enum sprd_dma_datawidth datawidth;
+	u32 step, temp;
+
+	sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT);
+	if (!sdesc)
+		return NULL;
+
+	hw = &sdesc->chn_hw;
+
+	hw->cfg = SPRD_DMA_DONOT_WAIT_BDONE << SPRD_DMA_WAIT_BDONE_OFFSET;
+	hw->intc = SPRD_DMA_TRANS_INT | SPRD_DMA_CFG_ERR_INT_EN;
+	hw->src_addr = src & SPRD_DMA_LOW_ADDR_MASK;
+	hw->des_addr = dest & SPRD_DMA_LOW_ADDR_MASK;
+	hw->wrap_ptr = (src >> SPRD_DMA_HIGH_ADDR_OFFSET) &
+		SPRD_DMA_HIGH_ADDR_MASK;
+	hw->wrap_to = (dest >> SPRD_DMA_HIGH_ADDR_OFFSET) &
+		SPRD_DMA_HIGH_ADDR_MASK;
+
+	if (IS_ALIGNED(len, 8)) {
+		datawidth = SPRD_DMA_DATAWIDTH_8_BYTES;
+		step = SPRD_DMA_DWORD_STEP;
+	} else if (IS_ALIGNED(len, 4)) {
+		datawidth = SPRD_DMA_DATAWIDTH_4_BYTES;
+		step = SPRD_DMA_WORD_STEP;
+	} else if (IS_ALIGNED(len, 2)) {
+		datawidth = SPRD_DMA_DATAWIDTH_2_BYTES;
+		step = SPRD_DMA_SHORT_STEP;
+	} else {
+		datawidth = SPRD_DMA_DATAWIDTH_1_BYTE;
+		step = SPRD_DMA_BYTE_STEP;
+	}
+
+	temp = datawidth << SPRD_DMA_SRC_DATAWIDTH_OFFSET;
+	temp |= datawidth << SPRD_DMA_DES_DATAWIDTH_OFFSET;
+	temp |= SPRD_DMA_TRANS_REQ << SPRD_DMA_REQ_MODE_OFFSET;
+	temp |= len & SPRD_DMA_FRG_LEN_MASK;
+	hw->frg_len = temp;
+
+	hw->blk_len = len & SPRD_DMA_BLK_LEN_MASK;
+	hw->trsc_len = len & SPRD_DMA_TRSC_LEN_MASK;
+
+	temp = (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_DEST_TRSF_STEP_OFFSET;
+	temp |= (step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET;
+	hw->trsf_step = temp;
+
+	return vchan_tx_prep(&schan->vc, &sdesc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		       unsigned int sglen, enum dma_transfer_direction dir,
+		       unsigned long flags, void *context)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct dma_slave_config *slave_cfg = &schan->slave_cfg;
+	dma_addr_t src = 0, dst = 0;
+	struct sprd_dma_desc *sdesc;
+	struct scatterlist *sg;
+	u32 len = 0;
+	int ret, i;
+
+	/* TODO: now we only support one sg for each DMA configuration. */
+	if (!is_slave_direction(dir) || sglen > 1)
+		return NULL;
+
+	sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT);
+	if (!sdesc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		len = sg_dma_len(sg);
+
+		if (dir == DMA_MEM_TO_DEV) {
+			src = sg_dma_address(sg);
+			dst = slave_cfg->dst_addr;
+		} else {
+			src = slave_cfg->src_addr;
+			dst = sg_dma_address(sg);
+		}
+	}
+
+	ret = sprd_dma_fill_desc(chan, sdesc, src, dst, len, dir, flags,
+				 slave_cfg);
+	if (ret) {
+		kfree(sdesc);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&schan->vc, &sdesc->vd, flags);
+}
+
+static int sprd_dma_slave_config(struct dma_chan *chan,
+				 struct dma_slave_config *config)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct dma_slave_config *slave_cfg = &schan->slave_cfg;
+
+	if (!is_slave_direction(config->direction))
+		return -EINVAL;
+
+	memcpy(slave_cfg, config, sizeof(*config));
+	return 0;
+}
+
+static int sprd_dma_pause(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	sprd_dma_pause_resume(schan, true);
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+	return 0;
+}
+
+static int sprd_dma_resume(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	sprd_dma_pause_resume(schan, false);
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+	return 0;
+}
+
+static int sprd_dma_terminate_all(struct dma_chan *chan)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&schan->vc.lock, flags);
+	sprd_dma_stop(schan);
+
+	vchan_get_all_descriptors(&schan->vc, &head);
+	spin_unlock_irqrestore(&schan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&schan->vc, &head);
+	return 0;
+}
+
+static void sprd_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct sprd_dma_desc *sdesc = to_sprd_dma_desc(vd);
+
+	kfree(sdesc);
+}
+
+static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
+	u32 req = *(u32 *)param;
+
+	if (req < sdev->total_chns)
+		return req == schan->chn_num + 1;
+	else
+		return false;
+}
+
+static int sprd_dma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct sprd_dma_dev *sdev;
+	struct sprd_dma_chn *dma_chn;
+	struct resource *res;
+	u32 chn_count;
+	int ret, i;
+
+	ret = device_property_read_u32(&pdev->dev, "#dma-channels", &chn_count);
+	if (ret) {
+		dev_err(&pdev->dev, "get dma channels count failed\n");
+		return ret;
+	}
+
+	sdev = devm_kzalloc(&pdev->dev,
+			    struct_size(sdev, channels, chn_count),
+			    GFP_KERNEL);
+	if (!sdev)
+		return -ENOMEM;
+
+	sdev->clk = devm_clk_get(&pdev->dev, "enable");
+	if (IS_ERR(sdev->clk)) {
+		dev_err(&pdev->dev, "get enable clock failed\n");
+		return PTR_ERR(sdev->clk);
+	}
+
+	/* ashb clock is optional for AGCP DMA */
+	sdev->ashb_clk = devm_clk_get(&pdev->dev, "ashb_eb");
+	if (IS_ERR(sdev->ashb_clk))
+		dev_warn(&pdev->dev, "no optional ashb eb clock\n");
+
+	/*
+	 * We have three DMA controllers: AP DMA, AON DMA and AGCP DMA. For AGCP
+	 * DMA controller, it can or do not request the irq, which will save
+	 * system power without resuming system by DMA interrupts if AGCP DMA
+	 * does not request the irq. Thus the DMA interrupts property should
+	 * be optional.
+	 */
+	sdev->irq = platform_get_irq(pdev, 0);
+	if (sdev->irq > 0) {
+		ret = devm_request_irq(&pdev->dev, sdev->irq, dma_irq_handle,
+				       0, "sprd_dma", (void *)sdev);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "request dma irq failed\n");
+			return ret;
+		}
+	} else {
+		dev_warn(&pdev->dev, "no interrupts for the dma controller\n");
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sdev->glb_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sdev->glb_base))
+		return PTR_ERR(sdev->glb_base);
+
+	dma_cap_set(DMA_MEMCPY, sdev->dma_dev.cap_mask);
+	sdev->total_chns = chn_count;
+	sdev->dma_dev.chancnt = chn_count;
+	INIT_LIST_HEAD(&sdev->dma_dev.channels);
+	INIT_LIST_HEAD(&sdev->dma_dev.global_node);
+	sdev->dma_dev.dev = &pdev->dev;
+	sdev->dma_dev.device_alloc_chan_resources = sprd_dma_alloc_chan_resources;
+	sdev->dma_dev.device_free_chan_resources = sprd_dma_free_chan_resources;
+	sdev->dma_dev.device_tx_status = sprd_dma_tx_status;
+	sdev->dma_dev.device_issue_pending = sprd_dma_issue_pending;
+	sdev->dma_dev.device_prep_dma_memcpy = sprd_dma_prep_dma_memcpy;
+	sdev->dma_dev.device_prep_slave_sg = sprd_dma_prep_slave_sg;
+	sdev->dma_dev.device_config = sprd_dma_slave_config;
+	sdev->dma_dev.device_pause = sprd_dma_pause;
+	sdev->dma_dev.device_resume = sprd_dma_resume;
+	sdev->dma_dev.device_terminate_all = sprd_dma_terminate_all;
+
+	for (i = 0; i < chn_count; i++) {
+		dma_chn = &sdev->channels[i];
+		dma_chn->chn_num = i;
+		dma_chn->cur_desc = NULL;
+		/* get each channel's registers base address. */
+		dma_chn->chn_base = sdev->glb_base + SPRD_DMA_CHN_REG_OFFSET +
+				    SPRD_DMA_CHN_REG_LENGTH * i;
+
+		dma_chn->vc.desc_free = sprd_dma_free_desc;
+		vchan_init(&dma_chn->vc, &sdev->dma_dev);
+	}
+
+	platform_set_drvdata(pdev, sdev);
+	ret = sprd_dma_enable(sdev);
+	if (ret)
+		return ret;
+
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0)
+		goto err_rpm;
+
+	ret = dma_async_device_register(&sdev->dma_dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "register dma device failed:%d\n", ret);
+		goto err_register;
+	}
+
+	sprd_dma_info.dma_cap = sdev->dma_dev.cap_mask;
+	ret = of_dma_controller_register(np, of_dma_simple_xlate,
+					 &sprd_dma_info);
+	if (ret)
+		goto err_of_register;
+
+	pm_runtime_put(&pdev->dev);
+	return 0;
+
+err_of_register:
+	dma_async_device_unregister(&sdev->dma_dev);
+err_register:
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+err_rpm:
+	sprd_dma_disable(sdev);
+	return ret;
+}
+
+static int sprd_dma_remove(struct platform_device *pdev)
+{
+	struct sprd_dma_dev *sdev = platform_get_drvdata(pdev);
+	struct sprd_dma_chn *c, *cn;
+	int ret;
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0)
+		return ret;
+
+	/* explicitly free the irq */
+	if (sdev->irq > 0)
+		devm_free_irq(&pdev->dev, sdev->irq, sdev);
+
+	list_for_each_entry_safe(c, cn, &sdev->dma_dev.channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&sdev->dma_dev);
+	sprd_dma_disable(sdev);
+
+	pm_runtime_put_noidle(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static const struct of_device_id sprd_dma_match[] = {
+	{ .compatible = "sprd,sc9860-dma", },
+	{},
+};
+
+static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev)
+{
+	struct sprd_dma_dev *sdev = dev_get_drvdata(dev);
+
+	sprd_dma_disable(sdev);
+	return 0;
+}
+
+static int __maybe_unused sprd_dma_runtime_resume(struct device *dev)
+{
+	struct sprd_dma_dev *sdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = sprd_dma_enable(sdev);
+	if (ret)
+		dev_err(sdev->dma_dev.dev, "enable dma failed\n");
+
+	return ret;
+}
+
+static const struct dev_pm_ops sprd_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(sprd_dma_runtime_suspend,
+			   sprd_dma_runtime_resume,
+			   NULL)
+};
+
+static struct platform_driver sprd_dma_driver = {
+	.probe = sprd_dma_probe,
+	.remove = sprd_dma_remove,
+	.driver = {
+		.name = "sprd-dma",
+		.of_match_table = sprd_dma_match,
+		.pm = &sprd_dma_pm_ops,
+	},
+};
+module_platform_driver(sprd_dma_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("DMA driver for Spreadtrum");
+MODULE_AUTHOR("Baolin Wang <baolin.wang@spreadtrum.com>");
+MODULE_ALIAS("platform:sprd-dma");
diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c
new file mode 100644
index 0000000..bfb79bd
--- /dev/null
+++ b/drivers/dma/st_fdma.c
@@ -0,0 +1,889 @@
+/*
+ * DMA driver for STMicroelectronics STi FDMA controller
+ *
+ * Copyright (C) 2014 STMicroelectronics
+ *
+ * Author: Ludovic Barre <Ludovic.barre@st.com>
+ *	   Peter Griffin <peter.griffin@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/remoteproc.h>
+
+#include "st_fdma.h"
+
+static inline struct st_fdma_chan *to_st_fdma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct st_fdma_chan, vchan.chan);
+}
+
+static struct st_fdma_desc *to_st_fdma_desc(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct st_fdma_desc, vdesc);
+}
+
+static int st_fdma_dreq_get(struct st_fdma_chan *fchan)
+{
+	struct st_fdma_dev *fdev = fchan->fdev;
+	u32 req_line_cfg = fchan->cfg.req_line;
+	u32 dreq_line;
+	int try = 0;
+
+	/*
+	 * dreq_mask is shared for n channels of fdma, so all accesses must be
+	 * atomic. if the dreq_mask is changed between ffz and set_bit,
+	 * we retry
+	 */
+	do {
+		if (fdev->dreq_mask == ~0L) {
+			dev_err(fdev->dev, "No req lines available\n");
+			return -EINVAL;
+		}
+
+		if (try || req_line_cfg >= ST_FDMA_NR_DREQS) {
+			dev_err(fdev->dev, "Invalid or used req line\n");
+			return -EINVAL;
+		} else {
+			dreq_line = req_line_cfg;
+		}
+
+		try++;
+	} while (test_and_set_bit(dreq_line, &fdev->dreq_mask));
+
+	dev_dbg(fdev->dev, "get dreq_line:%d mask:%#lx\n",
+		dreq_line, fdev->dreq_mask);
+
+	return dreq_line;
+}
+
+static void st_fdma_dreq_put(struct st_fdma_chan *fchan)
+{
+	struct st_fdma_dev *fdev = fchan->fdev;
+
+	dev_dbg(fdev->dev, "put dreq_line:%#x\n", fchan->dreq_line);
+	clear_bit(fchan->dreq_line, &fdev->dreq_mask);
+}
+
+static void st_fdma_xfer_desc(struct st_fdma_chan *fchan)
+{
+	struct virt_dma_desc *vdesc;
+	unsigned long nbytes, ch_cmd, cmd;
+
+	vdesc = vchan_next_desc(&fchan->vchan);
+	if (!vdesc)
+		return;
+
+	fchan->fdesc = to_st_fdma_desc(vdesc);
+	nbytes = fchan->fdesc->node[0].desc->nbytes;
+	cmd = FDMA_CMD_START(fchan->vchan.chan.chan_id);
+	ch_cmd = fchan->fdesc->node[0].pdesc | FDMA_CH_CMD_STA_START;
+
+	/* start the channel for the descriptor */
+	fnode_write(fchan, nbytes, FDMA_CNTN_OFST);
+	fchan_write(fchan, ch_cmd, FDMA_CH_CMD_OFST);
+	writel(cmd,
+		fchan->fdev->slim_rproc->peri + FDMA_CMD_SET_OFST);
+
+	dev_dbg(fchan->fdev->dev, "start chan:%d\n", fchan->vchan.chan.chan_id);
+}
+
+static void st_fdma_ch_sta_update(struct st_fdma_chan *fchan,
+				  unsigned long int_sta)
+{
+	unsigned long ch_sta, ch_err;
+	int ch_id = fchan->vchan.chan.chan_id;
+	struct st_fdma_dev *fdev = fchan->fdev;
+
+	ch_sta = fchan_read(fchan, FDMA_CH_CMD_OFST);
+	ch_err = ch_sta & FDMA_CH_CMD_ERR_MASK;
+	ch_sta &= FDMA_CH_CMD_STA_MASK;
+
+	if (int_sta & FDMA_INT_STA_ERR) {
+		dev_warn(fdev->dev, "chan:%d, error:%ld\n", ch_id, ch_err);
+		fchan->status = DMA_ERROR;
+		return;
+	}
+
+	switch (ch_sta) {
+	case FDMA_CH_CMD_STA_PAUSED:
+		fchan->status = DMA_PAUSED;
+		break;
+
+	case FDMA_CH_CMD_STA_RUNNING:
+		fchan->status = DMA_IN_PROGRESS;
+		break;
+	}
+}
+
+static irqreturn_t st_fdma_irq_handler(int irq, void *dev_id)
+{
+	struct st_fdma_dev *fdev = dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	struct st_fdma_chan *fchan = &fdev->chans[0];
+	unsigned long int_sta, clr;
+
+	int_sta = fdma_read(fdev, FDMA_INT_STA_OFST);
+	clr = int_sta;
+
+	for (; int_sta != 0 ; int_sta >>= 2, fchan++) {
+		if (!(int_sta & (FDMA_INT_STA_CH | FDMA_INT_STA_ERR)))
+			continue;
+
+		spin_lock(&fchan->vchan.lock);
+		st_fdma_ch_sta_update(fchan, int_sta);
+
+		if (fchan->fdesc) {
+			if (!fchan->fdesc->iscyclic) {
+				list_del(&fchan->fdesc->vdesc.node);
+				vchan_cookie_complete(&fchan->fdesc->vdesc);
+				fchan->fdesc = NULL;
+				fchan->status = DMA_COMPLETE;
+			} else {
+				vchan_cyclic_callback(&fchan->fdesc->vdesc);
+			}
+
+			/* Start the next descriptor (if available) */
+			if (!fchan->fdesc)
+				st_fdma_xfer_desc(fchan);
+		}
+
+		spin_unlock(&fchan->vchan.lock);
+		ret = IRQ_HANDLED;
+	}
+
+	fdma_write(fdev, clr, FDMA_INT_CLR_OFST);
+
+	return ret;
+}
+
+static struct dma_chan *st_fdma_of_xlate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct st_fdma_dev *fdev = ofdma->of_dma_data;
+	struct dma_chan *chan;
+	struct st_fdma_chan *fchan;
+	int ret;
+
+	if (dma_spec->args_count < 1)
+		return ERR_PTR(-EINVAL);
+
+	if (fdev->dma_device.dev->of_node != dma_spec->np)
+		return ERR_PTR(-EINVAL);
+
+	ret = rproc_boot(fdev->slim_rproc->rproc);
+	if (ret == -ENOENT)
+		return ERR_PTR(-EPROBE_DEFER);
+	else if (ret)
+		return ERR_PTR(ret);
+
+	chan = dma_get_any_slave_channel(&fdev->dma_device);
+	if (!chan)
+		goto err_chan;
+
+	fchan = to_st_fdma_chan(chan);
+
+	fchan->cfg.of_node = dma_spec->np;
+	fchan->cfg.req_line = dma_spec->args[0];
+	fchan->cfg.req_ctrl = 0;
+	fchan->cfg.type = ST_FDMA_TYPE_FREE_RUN;
+
+	if (dma_spec->args_count > 1)
+		fchan->cfg.req_ctrl = dma_spec->args[1]
+			& FDMA_REQ_CTRL_CFG_MASK;
+
+	if (dma_spec->args_count > 2)
+		fchan->cfg.type = dma_spec->args[2];
+
+	if (fchan->cfg.type == ST_FDMA_TYPE_FREE_RUN) {
+		fchan->dreq_line = 0;
+	} else {
+		fchan->dreq_line = st_fdma_dreq_get(fchan);
+		if (IS_ERR_VALUE(fchan->dreq_line)) {
+			chan = ERR_PTR(fchan->dreq_line);
+			goto err_chan;
+		}
+	}
+
+	dev_dbg(fdev->dev, "xlate req_line:%d type:%d req_ctrl:%#lx\n",
+		fchan->cfg.req_line, fchan->cfg.type, fchan->cfg.req_ctrl);
+
+	return chan;
+
+err_chan:
+	rproc_shutdown(fdev->slim_rproc->rproc);
+	return chan;
+
+}
+
+static void st_fdma_free_desc(struct virt_dma_desc *vdesc)
+{
+	struct st_fdma_desc *fdesc;
+	int i;
+
+	fdesc = to_st_fdma_desc(vdesc);
+	for (i = 0; i < fdesc->n_nodes; i++)
+		dma_pool_free(fdesc->fchan->node_pool, fdesc->node[i].desc,
+			      fdesc->node[i].pdesc);
+	kfree(fdesc);
+}
+
+static struct st_fdma_desc *st_fdma_alloc_desc(struct st_fdma_chan *fchan,
+					       int sg_len)
+{
+	struct st_fdma_desc *fdesc;
+	int i;
+
+	fdesc = kzalloc(sizeof(*fdesc) +
+			sizeof(struct st_fdma_sw_node) * sg_len, GFP_NOWAIT);
+	if (!fdesc)
+		return NULL;
+
+	fdesc->fchan = fchan;
+	fdesc->n_nodes = sg_len;
+	for (i = 0; i < sg_len; i++) {
+		fdesc->node[i].desc = dma_pool_alloc(fchan->node_pool,
+				GFP_NOWAIT, &fdesc->node[i].pdesc);
+		if (!fdesc->node[i].desc)
+			goto err;
+	}
+	return fdesc;
+
+err:
+	while (--i >= 0)
+		dma_pool_free(fchan->node_pool, fdesc->node[i].desc,
+			      fdesc->node[i].pdesc);
+	kfree(fdesc);
+	return NULL;
+}
+
+static int st_fdma_alloc_chan_res(struct dma_chan *chan)
+{
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+
+	/* Create the dma pool for descriptor allocation */
+	fchan->node_pool = dma_pool_create(dev_name(&chan->dev->device),
+					    fchan->fdev->dev,
+					    sizeof(struct st_fdma_hw_node),
+					    __alignof__(struct st_fdma_hw_node),
+					    0);
+
+	if (!fchan->node_pool) {
+		dev_err(fchan->fdev->dev, "unable to allocate desc pool\n");
+		return -ENOMEM;
+	}
+
+	dev_dbg(fchan->fdev->dev, "alloc ch_id:%d type:%d\n",
+		fchan->vchan.chan.chan_id, fchan->cfg.type);
+
+	return 0;
+}
+
+static void st_fdma_free_chan_res(struct dma_chan *chan)
+{
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	struct rproc *rproc = fchan->fdev->slim_rproc->rproc;
+	unsigned long flags;
+
+	LIST_HEAD(head);
+
+	dev_dbg(fchan->fdev->dev, "%s: freeing chan:%d\n",
+		__func__, fchan->vchan.chan.chan_id);
+
+	if (fchan->cfg.type != ST_FDMA_TYPE_FREE_RUN)
+		st_fdma_dreq_put(fchan);
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+	fchan->fdesc = NULL;
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+	dma_pool_destroy(fchan->node_pool);
+	fchan->node_pool = NULL;
+	memset(&fchan->cfg, 0, sizeof(struct st_fdma_cfg));
+
+	rproc_shutdown(rproc);
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_dma_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct st_fdma_chan *fchan;
+	struct st_fdma_desc *fdesc;
+	struct st_fdma_hw_node *hw_node;
+
+	if (!len)
+		return NULL;
+
+	fchan = to_st_fdma_chan(chan);
+
+	/* We only require a single descriptor */
+	fdesc = st_fdma_alloc_desc(fchan, 1);
+	if (!fdesc) {
+		dev_err(fchan->fdev->dev, "no memory for desc\n");
+		return NULL;
+	}
+
+	hw_node = fdesc->node[0].desc;
+	hw_node->next = 0;
+	hw_node->control = FDMA_NODE_CTRL_REQ_MAP_FREE_RUN;
+	hw_node->control |= FDMA_NODE_CTRL_SRC_INCR;
+	hw_node->control |= FDMA_NODE_CTRL_DST_INCR;
+	hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+	hw_node->nbytes = len;
+	hw_node->saddr = src;
+	hw_node->daddr = dst;
+	hw_node->generic.length = len;
+	hw_node->generic.sstride = 0;
+	hw_node->generic.dstride = 0;
+
+	return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static int config_reqctrl(struct st_fdma_chan *fchan,
+			  enum dma_transfer_direction direction)
+{
+	u32 maxburst = 0, addr = 0;
+	enum dma_slave_buswidth width;
+	int ch_id = fchan->vchan.chan.chan_id;
+	struct st_fdma_dev *fdev = fchan->fdev;
+
+	switch (direction) {
+
+	case DMA_DEV_TO_MEM:
+		fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_WNR;
+		maxburst = fchan->scfg.src_maxburst;
+		width = fchan->scfg.src_addr_width;
+		addr = fchan->scfg.src_addr;
+		break;
+
+	case DMA_MEM_TO_DEV:
+		fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_WNR;
+		maxburst = fchan->scfg.dst_maxburst;
+		width = fchan->scfg.dst_addr_width;
+		addr = fchan->scfg.dst_addr;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_OPCODE_MASK;
+
+	switch (width) {
+
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST1;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST2;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST4;
+		break;
+
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_OPCODE_LD_ST8;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	fchan->cfg.req_ctrl &= ~FDMA_REQ_CTRL_NUM_OPS_MASK;
+	fchan->cfg.req_ctrl |= FDMA_REQ_CTRL_NUM_OPS(maxburst-1);
+	dreq_write(fchan, fchan->cfg.req_ctrl, FDMA_REQ_CTRL_OFST);
+
+	fchan->cfg.dev_addr = addr;
+	fchan->cfg.dir = direction;
+
+	dev_dbg(fdev->dev, "chan:%d config_reqctrl:%#x req_ctrl:%#lx\n",
+		ch_id, addr, fchan->cfg.req_ctrl);
+
+	return 0;
+}
+
+static void fill_hw_node(struct st_fdma_hw_node *hw_node,
+			struct st_fdma_chan *fchan,
+			enum dma_transfer_direction direction)
+{
+	if (direction == DMA_MEM_TO_DEV) {
+		hw_node->control |= FDMA_NODE_CTRL_SRC_INCR;
+		hw_node->control |= FDMA_NODE_CTRL_DST_STATIC;
+		hw_node->daddr = fchan->cfg.dev_addr;
+	} else {
+		hw_node->control |= FDMA_NODE_CTRL_SRC_STATIC;
+		hw_node->control |= FDMA_NODE_CTRL_DST_INCR;
+		hw_node->saddr = fchan->cfg.dev_addr;
+	}
+
+	hw_node->generic.sstride = 0;
+	hw_node->generic.dstride = 0;
+}
+
+static inline struct st_fdma_chan *st_fdma_prep_common(struct dma_chan *chan,
+		size_t len, enum dma_transfer_direction direction)
+{
+	struct st_fdma_chan *fchan;
+
+	if (!chan || !len)
+		return NULL;
+
+	fchan = to_st_fdma_chan(chan);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(fchan->fdev->dev, "bad direction?\n");
+		return NULL;
+	}
+
+	return fchan;
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t buf_addr, size_t len,
+		size_t period_len, enum dma_transfer_direction direction,
+		unsigned long flags)
+{
+	struct st_fdma_chan *fchan;
+	struct st_fdma_desc *fdesc;
+	int sg_len, i;
+
+	fchan = st_fdma_prep_common(chan, len, direction);
+	if (!fchan)
+		return NULL;
+
+	if (!period_len)
+		return NULL;
+
+	if (config_reqctrl(fchan, direction)) {
+		dev_err(fchan->fdev->dev, "bad width or direction\n");
+		return NULL;
+	}
+
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0) {
+		dev_err(fchan->fdev->dev, "len is not multiple of period\n");
+		return NULL;
+	}
+
+	sg_len = len / period_len;
+	fdesc = st_fdma_alloc_desc(fchan, sg_len);
+	if (!fdesc) {
+		dev_err(fchan->fdev->dev, "no memory for desc\n");
+		return NULL;
+	}
+
+	fdesc->iscyclic = true;
+
+	for (i = 0; i < sg_len; i++) {
+		struct st_fdma_hw_node *hw_node = fdesc->node[i].desc;
+
+		hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc;
+
+		hw_node->control =
+			FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line);
+		hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+
+		fill_hw_node(hw_node, fchan, direction);
+
+		if (direction == DMA_MEM_TO_DEV)
+			hw_node->saddr = buf_addr + (i * period_len);
+		else
+			hw_node->daddr = buf_addr + (i * period_len);
+
+		hw_node->nbytes = period_len;
+		hw_node->generic.length = period_len;
+	}
+
+	return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *st_fdma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct st_fdma_chan *fchan;
+	struct st_fdma_desc *fdesc;
+	struct st_fdma_hw_node *hw_node;
+	struct scatterlist *sg;
+	int i;
+
+	fchan = st_fdma_prep_common(chan, sg_len, direction);
+	if (!fchan)
+		return NULL;
+
+	if (!sgl)
+		return NULL;
+
+	fdesc = st_fdma_alloc_desc(fchan, sg_len);
+	if (!fdesc) {
+		dev_err(fchan->fdev->dev, "no memory for desc\n");
+		return NULL;
+	}
+
+	fdesc->iscyclic = false;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		hw_node = fdesc->node[i].desc;
+
+		hw_node->next = fdesc->node[(i + 1) % sg_len].pdesc;
+		hw_node->control = FDMA_NODE_CTRL_REQ_MAP_DREQ(fchan->dreq_line);
+
+		fill_hw_node(hw_node, fchan, direction);
+
+		if (direction == DMA_MEM_TO_DEV)
+			hw_node->saddr = sg_dma_address(sg);
+		else
+			hw_node->daddr = sg_dma_address(sg);
+
+		hw_node->nbytes = sg_dma_len(sg);
+		hw_node->generic.length = sg_dma_len(sg);
+	}
+
+	/* interrupt at end of last node */
+	hw_node->control |= FDMA_NODE_CTRL_INT_EON;
+
+	return vchan_tx_prep(&fchan->vchan, &fdesc->vdesc, flags);
+}
+
+static size_t st_fdma_desc_residue(struct st_fdma_chan *fchan,
+				   struct virt_dma_desc *vdesc,
+				   bool in_progress)
+{
+	struct st_fdma_desc *fdesc = fchan->fdesc;
+	size_t residue = 0;
+	dma_addr_t cur_addr = 0;
+	int i;
+
+	if (in_progress) {
+		cur_addr = fchan_read(fchan, FDMA_CH_CMD_OFST);
+		cur_addr &= FDMA_CH_CMD_DATA_MASK;
+	}
+
+	for (i = fchan->fdesc->n_nodes - 1 ; i >= 0; i--) {
+		if (cur_addr == fdesc->node[i].pdesc) {
+			residue += fnode_read(fchan, FDMA_CNTN_OFST);
+			break;
+		}
+		residue += fdesc->node[i].desc->nbytes;
+	}
+
+	return residue;
+}
+
+static enum dma_status st_fdma_tx_status(struct dma_chan *chan,
+					 dma_cookie_t cookie,
+					 struct dma_tx_state *txstate)
+{
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+	vd = vchan_find_desc(&fchan->vchan, cookie);
+	if (fchan->fdesc && cookie == fchan->fdesc->vdesc.tx.cookie)
+		txstate->residue = st_fdma_desc_residue(fchan, vd, true);
+	else if (vd)
+		txstate->residue = st_fdma_desc_residue(fchan, vd, false);
+	else
+		txstate->residue = 0;
+
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+	return ret;
+}
+
+static void st_fdma_issue_pending(struct dma_chan *chan)
+{
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+
+	if (vchan_issue_pending(&fchan->vchan) && !fchan->fdesc)
+		st_fdma_xfer_desc(fchan);
+
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+}
+
+static int st_fdma_pause(struct dma_chan *chan)
+{
+	unsigned long flags;
+	LIST_HEAD(head);
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	int ch_id = fchan->vchan.chan.chan_id;
+	unsigned long cmd = FDMA_CMD_PAUSE(ch_id);
+
+	dev_dbg(fchan->fdev->dev, "pause chan:%d\n", ch_id);
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+	if (fchan->fdesc)
+		fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST);
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+	return 0;
+}
+
+static int st_fdma_resume(struct dma_chan *chan)
+{
+	unsigned long flags;
+	unsigned long val;
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	int ch_id = fchan->vchan.chan.chan_id;
+
+	dev_dbg(fchan->fdev->dev, "resume chan:%d\n", ch_id);
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+	if (fchan->fdesc) {
+		val = fchan_read(fchan, FDMA_CH_CMD_OFST);
+		val &= FDMA_CH_CMD_DATA_MASK;
+		fchan_write(fchan, val, FDMA_CH_CMD_OFST);
+	}
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+
+	return 0;
+}
+
+static int st_fdma_terminate_all(struct dma_chan *chan)
+{
+	unsigned long flags;
+	LIST_HEAD(head);
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+	int ch_id = fchan->vchan.chan.chan_id;
+	unsigned long cmd = FDMA_CMD_PAUSE(ch_id);
+
+	dev_dbg(fchan->fdev->dev, "terminate chan:%d\n", ch_id);
+
+	spin_lock_irqsave(&fchan->vchan.lock, flags);
+	fdma_write(fchan->fdev, cmd, FDMA_CMD_SET_OFST);
+	fchan->fdesc = NULL;
+	vchan_get_all_descriptors(&fchan->vchan, &head);
+	spin_unlock_irqrestore(&fchan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&fchan->vchan, &head);
+
+	return 0;
+}
+
+static int st_fdma_slave_config(struct dma_chan *chan,
+				struct dma_slave_config *slave_cfg)
+{
+	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
+
+	memcpy(&fchan->scfg, slave_cfg, sizeof(fchan->scfg));
+	return 0;
+}
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_11 = {
+	.name = "STiH407",
+	.id = 0,
+};
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_12 = {
+	.name = "STiH407",
+	.id = 1,
+};
+
+static const struct st_fdma_driverdata fdma_mpe31_stih407_13 = {
+	.name = "STiH407",
+	.id = 2,
+};
+
+static const struct of_device_id st_fdma_match[] = {
+	{ .compatible = "st,stih407-fdma-mpe31-11"
+	  , .data = &fdma_mpe31_stih407_11 },
+	{ .compatible = "st,stih407-fdma-mpe31-12"
+	  , .data = &fdma_mpe31_stih407_12 },
+	{ .compatible = "st,stih407-fdma-mpe31-13"
+	  , .data = &fdma_mpe31_stih407_13 },
+	{},
+};
+MODULE_DEVICE_TABLE(of, st_fdma_match);
+
+static int st_fdma_parse_dt(struct platform_device *pdev,
+			const struct st_fdma_driverdata *drvdata,
+			struct st_fdma_dev *fdev)
+{
+	snprintf(fdev->fw_name, FW_NAME_SIZE, "fdma_%s_%d.elf",
+		drvdata->name, drvdata->id);
+
+	return of_property_read_u32(pdev->dev.of_node, "dma-channels",
+				    &fdev->nr_channels);
+}
+#define FDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static void st_fdma_free(struct st_fdma_dev *fdev)
+{
+	struct st_fdma_chan *fchan;
+	int i;
+
+	for (i = 0; i < fdev->nr_channels; i++) {
+		fchan = &fdev->chans[i];
+		list_del(&fchan->vchan.chan.device_node);
+		tasklet_kill(&fchan->vchan.task);
+	}
+}
+
+static int st_fdma_probe(struct platform_device *pdev)
+{
+	struct st_fdma_dev *fdev;
+	const struct of_device_id *match;
+	struct device_node *np = pdev->dev.of_node;
+	const struct st_fdma_driverdata *drvdata;
+	int ret, i;
+
+	match = of_match_device((st_fdma_match), &pdev->dev);
+	if (!match || !match->data) {
+		dev_err(&pdev->dev, "No device match found\n");
+		return -ENODEV;
+	}
+
+	drvdata = match->data;
+
+	fdev = devm_kzalloc(&pdev->dev, sizeof(*fdev), GFP_KERNEL);
+	if (!fdev)
+		return -ENOMEM;
+
+	ret = st_fdma_parse_dt(pdev, drvdata, fdev);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to find platform data\n");
+		goto err;
+	}
+
+	fdev->chans = devm_kcalloc(&pdev->dev, fdev->nr_channels,
+				   sizeof(struct st_fdma_chan), GFP_KERNEL);
+	if (!fdev->chans)
+		return -ENOMEM;
+
+	fdev->dev = &pdev->dev;
+	fdev->drvdata = drvdata;
+	platform_set_drvdata(pdev, fdev);
+
+	fdev->irq = platform_get_irq(pdev, 0);
+	if (fdev->irq < 0) {
+		dev_err(&pdev->dev, "Failed to get irq resource\n");
+		return -EINVAL;
+	}
+
+	ret = devm_request_irq(&pdev->dev, fdev->irq, st_fdma_irq_handler, 0,
+			       dev_name(&pdev->dev), fdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to request irq (%d)\n", ret);
+		goto err;
+	}
+
+	fdev->slim_rproc = st_slim_rproc_alloc(pdev, fdev->fw_name);
+	if (IS_ERR(fdev->slim_rproc)) {
+		ret = PTR_ERR(fdev->slim_rproc);
+		dev_err(&pdev->dev, "slim_rproc_alloc failed (%d)\n", ret);
+		goto err;
+	}
+
+	/* Initialise list of FDMA channels */
+	INIT_LIST_HEAD(&fdev->dma_device.channels);
+	for (i = 0; i < fdev->nr_channels; i++) {
+		struct st_fdma_chan *fchan = &fdev->chans[i];
+
+		fchan->fdev = fdev;
+		fchan->vchan.desc_free = st_fdma_free_desc;
+		vchan_init(&fchan->vchan, &fdev->dma_device);
+	}
+
+	/* Initialise the FDMA dreq (reserve 0 & 31 for FDMA use) */
+	fdev->dreq_mask = BIT(0) | BIT(31);
+
+	dma_cap_set(DMA_SLAVE, fdev->dma_device.cap_mask);
+	dma_cap_set(DMA_CYCLIC, fdev->dma_device.cap_mask);
+	dma_cap_set(DMA_MEMCPY, fdev->dma_device.cap_mask);
+
+	fdev->dma_device.dev = &pdev->dev;
+	fdev->dma_device.device_alloc_chan_resources = st_fdma_alloc_chan_res;
+	fdev->dma_device.device_free_chan_resources = st_fdma_free_chan_res;
+	fdev->dma_device.device_prep_dma_cyclic	= st_fdma_prep_dma_cyclic;
+	fdev->dma_device.device_prep_slave_sg = st_fdma_prep_slave_sg;
+	fdev->dma_device.device_prep_dma_memcpy = st_fdma_prep_dma_memcpy;
+	fdev->dma_device.device_tx_status = st_fdma_tx_status;
+	fdev->dma_device.device_issue_pending = st_fdma_issue_pending;
+	fdev->dma_device.device_terminate_all = st_fdma_terminate_all;
+	fdev->dma_device.device_config = st_fdma_slave_config;
+	fdev->dma_device.device_pause = st_fdma_pause;
+	fdev->dma_device.device_resume = st_fdma_resume;
+
+	fdev->dma_device.src_addr_widths = FDMA_DMA_BUSWIDTHS;
+	fdev->dma_device.dst_addr_widths = FDMA_DMA_BUSWIDTHS;
+	fdev->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	fdev->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	ret = dma_async_device_register(&fdev->dma_device);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to register DMA device (%d)\n", ret);
+		goto err_rproc;
+	}
+
+	ret = of_dma_controller_register(np, st_fdma_of_xlate, fdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Failed to register controller (%d)\n", ret);
+		goto err_dma_dev;
+	}
+
+	dev_info(&pdev->dev, "ST FDMA engine driver, irq:%d\n", fdev->irq);
+
+	return 0;
+
+err_dma_dev:
+	dma_async_device_unregister(&fdev->dma_device);
+err_rproc:
+	st_fdma_free(fdev);
+	st_slim_rproc_put(fdev->slim_rproc);
+err:
+	return ret;
+}
+
+static int st_fdma_remove(struct platform_device *pdev)
+{
+	struct st_fdma_dev *fdev = platform_get_drvdata(pdev);
+
+	devm_free_irq(&pdev->dev, fdev->irq, fdev);
+	st_slim_rproc_put(fdev->slim_rproc);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&fdev->dma_device);
+
+	return 0;
+}
+
+static struct platform_driver st_fdma_platform_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = st_fdma_match,
+	},
+	.probe = st_fdma_probe,
+	.remove = st_fdma_remove,
+};
+module_platform_driver(st_fdma_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver");
+MODULE_AUTHOR("Ludovic.barre <Ludovic.barre@st.com>");
+MODULE_AUTHOR("Peter Griffin <peter.griffin@linaro.org>");
+MODULE_ALIAS("platform: " DRIVER_NAME);
diff --git a/drivers/dma/st_fdma.h b/drivers/dma/st_fdma.h
new file mode 100644
index 0000000..c58e00d
--- /dev/null
+++ b/drivers/dma/st_fdma.h
@@ -0,0 +1,249 @@
+/*
+ * DMA driver header for STMicroelectronics STi FDMA controller
+ *
+ * Copyright (C) 2014 STMicroelectronics
+ *
+ * Author: Ludovic Barre <Ludovic.barre@st.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef __DMA_ST_FDMA_H
+#define __DMA_ST_FDMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/io.h>
+#include <linux/remoteproc/st_slim_rproc.h>
+#include "virt-dma.h"
+
+#define ST_FDMA_NR_DREQS 32
+#define FW_NAME_SIZE 30
+#define DRIVER_NAME "st-fdma"
+
+/**
+ * struct st_fdma_generic_node - Free running/paced generic node
+ *
+ * @length: Length in bytes of a line in a 2D mem to mem
+ * @sstride: Stride, in bytes, between source lines in a 2D data move
+ * @dstride: Stride, in bytes, between destination lines in a 2D data move
+ */
+struct st_fdma_generic_node {
+	u32 length;
+	u32 sstride;
+	u32 dstride;
+};
+
+/**
+ * struct st_fdma_hw_node - Node structure used by fdma hw
+ *
+ * @next: Pointer to next node
+ * @control: Transfer Control Parameters
+ * @nbytes: Number of Bytes to read
+ * @saddr: Source address
+ * @daddr: Destination address
+ *
+ * @generic: generic node for free running/paced transfert type
+ * 2 others transfert type are possible, but not yet implemented
+ *
+ * The NODE structures must be aligned to a 32 byte boundary
+ */
+struct st_fdma_hw_node {
+	u32 next;
+	u32 control;
+	u32 nbytes;
+	u32 saddr;
+	u32 daddr;
+	union {
+		struct st_fdma_generic_node generic;
+	};
+} __aligned(32);
+
+/*
+ * node control parameters
+ */
+#define FDMA_NODE_CTRL_REQ_MAP_MASK	GENMASK(4, 0)
+#define FDMA_NODE_CTRL_REQ_MAP_FREE_RUN	0x0
+#define FDMA_NODE_CTRL_REQ_MAP_DREQ(n)	((n)&FDMA_NODE_CTRL_REQ_MAP_MASK)
+#define FDMA_NODE_CTRL_REQ_MAP_EXT		FDMA_NODE_CTRL_REQ_MAP_MASK
+#define FDMA_NODE_CTRL_SRC_MASK		GENMASK(6, 5)
+#define FDMA_NODE_CTRL_SRC_STATIC	BIT(5)
+#define FDMA_NODE_CTRL_SRC_INCR		BIT(6)
+#define FDMA_NODE_CTRL_DST_MASK		GENMASK(8, 7)
+#define FDMA_NODE_CTRL_DST_STATIC	BIT(7)
+#define FDMA_NODE_CTRL_DST_INCR		BIT(8)
+#define FDMA_NODE_CTRL_SECURE		BIT(15)
+#define FDMA_NODE_CTRL_PAUSE_EON	BIT(30)
+#define FDMA_NODE_CTRL_INT_EON		BIT(31)
+
+/**
+ * struct st_fdma_sw_node - descriptor structure for link list
+ *
+ * @pdesc: Physical address of desc
+ * @node: link used for putting this into a channel queue
+ */
+struct st_fdma_sw_node {
+	dma_addr_t pdesc;
+	struct st_fdma_hw_node *desc;
+};
+
+#define NAME_SZ 10
+
+struct st_fdma_driverdata {
+	u32 id;
+	char name[NAME_SZ];
+};
+
+struct st_fdma_desc {
+	struct virt_dma_desc vdesc;
+	struct st_fdma_chan *fchan;
+	bool iscyclic;
+	unsigned int n_nodes;
+	struct st_fdma_sw_node node[];
+};
+
+enum st_fdma_type {
+	ST_FDMA_TYPE_FREE_RUN,
+	ST_FDMA_TYPE_PACED,
+};
+
+struct st_fdma_cfg {
+	struct device_node *of_node;
+	enum st_fdma_type type;
+	dma_addr_t dev_addr;
+	enum dma_transfer_direction dir;
+	int req_line; /* request line */
+	long req_ctrl; /* Request control */
+};
+
+struct st_fdma_chan {
+	struct st_fdma_dev *fdev;
+	struct dma_pool *node_pool;
+	struct dma_slave_config scfg;
+	struct st_fdma_cfg cfg;
+
+	int dreq_line;
+
+	struct virt_dma_chan vchan;
+	struct st_fdma_desc *fdesc;
+	enum dma_status	status;
+};
+
+struct st_fdma_dev {
+	struct device *dev;
+	const struct st_fdma_driverdata *drvdata;
+	struct dma_device dma_device;
+
+	struct st_slim_rproc *slim_rproc;
+
+	int irq;
+
+	struct st_fdma_chan *chans;
+
+	spinlock_t dreq_lock;
+	unsigned long dreq_mask;
+
+	u32 nr_channels;
+	char fw_name[FW_NAME_SIZE];
+};
+
+/* Peripheral Registers*/
+
+#define FDMA_CMD_STA_OFST	0xFC0
+#define FDMA_CMD_SET_OFST	0xFC4
+#define FDMA_CMD_CLR_OFST	0xFC8
+#define FDMA_CMD_MASK_OFST	0xFCC
+#define FDMA_CMD_START(ch)		(0x1 << (ch << 1))
+#define FDMA_CMD_PAUSE(ch)		(0x2 << (ch << 1))
+#define FDMA_CMD_FLUSH(ch)		(0x3 << (ch << 1))
+
+#define FDMA_INT_STA_OFST	0xFD0
+#define FDMA_INT_STA_CH			0x1
+#define FDMA_INT_STA_ERR		0x2
+
+#define FDMA_INT_SET_OFST	0xFD4
+#define FDMA_INT_CLR_OFST	0xFD8
+#define FDMA_INT_MASK_OFST	0xFDC
+
+#define fdma_read(fdev, name) \
+	readl((fdev)->slim_rproc->peri + name)
+
+#define fdma_write(fdev, val, name) \
+	writel((val), (fdev)->slim_rproc->peri + name)
+
+/* fchan interface (dmem) */
+#define FDMA_CH_CMD_OFST	0x200
+#define FDMA_CH_CMD_STA_MASK		GENMASK(1, 0)
+#define FDMA_CH_CMD_STA_IDLE		(0x0)
+#define FDMA_CH_CMD_STA_START		(0x1)
+#define FDMA_CH_CMD_STA_RUNNING		(0x2)
+#define FDMA_CH_CMD_STA_PAUSED		(0x3)
+#define FDMA_CH_CMD_ERR_MASK		GENMASK(4, 2)
+#define FDMA_CH_CMD_ERR_INT		(0x0 << 2)
+#define FDMA_CH_CMD_ERR_NAND		(0x1 << 2)
+#define FDMA_CH_CMD_ERR_MCHI		(0x2 << 2)
+#define FDMA_CH_CMD_DATA_MASK		GENMASK(31, 5)
+#define fchan_read(fchan, name) \
+	readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+			+ (fchan)->vchan.chan.chan_id * 0x4 \
+			+ name)
+
+#define fchan_write(fchan, val, name) \
+	writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+			+ (fchan)->vchan.chan.chan_id * 0x4 \
+			+ name)
+
+/* req interface */
+#define FDMA_REQ_CTRL_OFST	0x240
+#define dreq_write(fchan, val, name) \
+	writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+			+ fchan->dreq_line * 0x04 \
+			+ name)
+/* node interface */
+#define FDMA_NODE_SZ 128
+#define FDMA_PTRN_OFST		0x800
+#define FDMA_CNTN_OFST		0x808
+#define FDMA_SADDRN_OFST	0x80c
+#define FDMA_DADDRN_OFST	0x810
+#define fnode_read(fchan, name) \
+	readl((fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+			+ (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \
+			+ name)
+
+#define fnode_write(fchan, val, name) \
+	writel((val), (fchan)->fdev->slim_rproc->mem[ST_SLIM_DMEM].cpu_addr \
+			+ (fchan)->vchan.chan.chan_id * FDMA_NODE_SZ \
+			+ name)
+
+/*
+ * request control bits
+ */
+#define FDMA_REQ_CTRL_NUM_OPS_MASK	GENMASK(31, 24)
+#define FDMA_REQ_CTRL_NUM_OPS(n)	(FDMA_REQ_CTRL_NUM_OPS_MASK & \
+					((n) << 24))
+#define FDMA_REQ_CTRL_INITIATOR_MASK	BIT(22)
+#define FDMA_REQ_CTRL_INIT0		(0x0 << 22)
+#define FDMA_REQ_CTRL_INIT1		(0x1 << 22)
+#define FDMA_REQ_CTRL_INC_ADDR_ON	BIT(21)
+#define FDMA_REQ_CTRL_DATA_SWAP_ON	BIT(17)
+#define FDMA_REQ_CTRL_WNR		BIT(14)
+#define FDMA_REQ_CTRL_OPCODE_MASK	GENMASK(7, 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST1	(0x0 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST2	(0x1 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST4	(0x2 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST8	(0x3 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST16	(0x4 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST32	(0x5 << 4)
+#define FDMA_REQ_CTRL_OPCODE_LD_ST64	(0x6 << 4)
+#define FDMA_REQ_CTRL_HOLDOFF_MASK	GENMASK(2, 0)
+#define FDMA_REQ_CTRL_HOLDOFF(n)	((n) & FDMA_REQ_CTRL_HOLDOFF_MASK)
+
+/* bits used by client to configure request control */
+#define FDMA_REQ_CTRL_CFG_MASK (FDMA_REQ_CTRL_HOLDOFF_MASK | \
+				FDMA_REQ_CTRL_DATA_SWAP_ON | \
+				FDMA_REQ_CTRL_INC_ADDR_ON | \
+				FDMA_REQ_CTRL_INITIATOR_MASK)
+
+#endif	/* __DMA_ST_FDMA_H */
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
new file mode 100644
index 0000000..f4edfc5
--- /dev/null
+++ b/drivers/dma/ste_dma40.c
@@ -0,0 +1,3709 @@
+/*
+ * Copyright (C) Ericsson AB 2007-2008
+ * Copyright (C) ST-Ericsson SA 2008-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/log2.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/err.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/amba/bus.h>
+#include <linux/regulator/consumer.h>
+#include <linux/platform_data/dma-ste-dma40.h>
+
+#include "dmaengine.h"
+#include "ste_dma40_ll.h"
+
+#define D40_NAME "dma40"
+
+#define D40_PHY_CHAN -1
+
+/* For masking out/in 2 bit channel positions */
+#define D40_CHAN_POS(chan)  (2 * (chan / 2))
+#define D40_CHAN_POS_MASK(chan) (0x3 << D40_CHAN_POS(chan))
+
+/* Maximum iterations taken before giving up suspending a channel */
+#define D40_SUSPEND_MAX_IT 500
+
+/* Milliseconds */
+#define DMA40_AUTOSUSPEND_DELAY	100
+
+/* Hardware requirement on LCLA alignment */
+#define LCLA_ALIGNMENT 0x40000
+
+/* Max number of links per event group */
+#define D40_LCLA_LINK_PER_EVENT_GRP 128
+#define D40_LCLA_END D40_LCLA_LINK_PER_EVENT_GRP
+
+/* Max number of logical channels per physical channel */
+#define D40_MAX_LOG_CHAN_PER_PHY 32
+
+/* Attempts before giving up to trying to get pages that are aligned */
+#define MAX_LCLA_ALLOC_ATTEMPTS 256
+
+/* Bit markings for allocation map */
+#define D40_ALLOC_FREE		BIT(31)
+#define D40_ALLOC_PHY		BIT(30)
+#define D40_ALLOC_LOG_FREE	0
+
+#define D40_MEMCPY_MAX_CHANS	8
+
+/* Reserved event lines for memcpy only. */
+#define DB8500_DMA_MEMCPY_EV_0	51
+#define DB8500_DMA_MEMCPY_EV_1	56
+#define DB8500_DMA_MEMCPY_EV_2	57
+#define DB8500_DMA_MEMCPY_EV_3	58
+#define DB8500_DMA_MEMCPY_EV_4	59
+#define DB8500_DMA_MEMCPY_EV_5	60
+
+static int dma40_memcpy_channels[] = {
+	DB8500_DMA_MEMCPY_EV_0,
+	DB8500_DMA_MEMCPY_EV_1,
+	DB8500_DMA_MEMCPY_EV_2,
+	DB8500_DMA_MEMCPY_EV_3,
+	DB8500_DMA_MEMCPY_EV_4,
+	DB8500_DMA_MEMCPY_EV_5,
+};
+
+/* Default configuration for physcial memcpy */
+static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
+	.mode = STEDMA40_MODE_PHYSICAL,
+	.dir = DMA_MEM_TO_MEM,
+
+	.src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+	.src_info.psize = STEDMA40_PSIZE_PHY_1,
+	.src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+
+	.dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+	.dst_info.psize = STEDMA40_PSIZE_PHY_1,
+	.dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+};
+
+/* Default configuration for logical memcpy */
+static const struct stedma40_chan_cfg dma40_memcpy_conf_log = {
+	.mode = STEDMA40_MODE_LOGICAL,
+	.dir = DMA_MEM_TO_MEM,
+
+	.src_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+	.src_info.psize = STEDMA40_PSIZE_LOG_1,
+	.src_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+
+	.dst_info.data_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
+	.dst_info.psize = STEDMA40_PSIZE_LOG_1,
+	.dst_info.flow_ctrl = STEDMA40_NO_FLOW_CTRL,
+};
+
+/**
+ * enum 40_command - The different commands and/or statuses.
+ *
+ * @D40_DMA_STOP: DMA channel command STOP or status STOPPED,
+ * @D40_DMA_RUN: The DMA channel is RUNNING of the command RUN.
+ * @D40_DMA_SUSPEND_REQ: Request the DMA to SUSPEND as soon as possible.
+ * @D40_DMA_SUSPENDED: The DMA channel is SUSPENDED.
+ */
+enum d40_command {
+	D40_DMA_STOP		= 0,
+	D40_DMA_RUN		= 1,
+	D40_DMA_SUSPEND_REQ	= 2,
+	D40_DMA_SUSPENDED	= 3
+};
+
+/*
+ * enum d40_events - The different Event Enables for the event lines.
+ *
+ * @D40_DEACTIVATE_EVENTLINE: De-activate Event line, stopping the logical chan.
+ * @D40_ACTIVATE_EVENTLINE: Activate the Event line, to start a logical chan.
+ * @D40_SUSPEND_REQ_EVENTLINE: Requesting for suspending a event line.
+ * @D40_ROUND_EVENTLINE: Status check for event line.
+ */
+
+enum d40_events {
+	D40_DEACTIVATE_EVENTLINE	= 0,
+	D40_ACTIVATE_EVENTLINE		= 1,
+	D40_SUSPEND_REQ_EVENTLINE	= 2,
+	D40_ROUND_EVENTLINE		= 3
+};
+
+/*
+ * These are the registers that has to be saved and later restored
+ * when the DMA hw is powered off.
+ * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works.
+ */
+static u32 d40_backup_regs[] = {
+	D40_DREG_LCPA,
+	D40_DREG_LCLA,
+	D40_DREG_PRMSE,
+	D40_DREG_PRMSO,
+	D40_DREG_PRMOE,
+	D40_DREG_PRMOO,
+};
+
+#define BACKUP_REGS_SZ ARRAY_SIZE(d40_backup_regs)
+
+/*
+ * since 9540 and 8540 has the same HW revision
+ * use v4a for 9540 or ealier
+ * use v4b for 8540 or later
+ * HW revision:
+ * DB8500ed has revision 0
+ * DB8500v1 has revision 2
+ * DB8500v2 has revision 3
+ * AP9540v1 has revision 4
+ * DB8540v1 has revision 4
+ * TODO: Check if all these registers have to be saved/restored on dma40 v4a
+ */
+static u32 d40_backup_regs_v4a[] = {
+	D40_DREG_PSEG1,
+	D40_DREG_PSEG2,
+	D40_DREG_PSEG3,
+	D40_DREG_PSEG4,
+	D40_DREG_PCEG1,
+	D40_DREG_PCEG2,
+	D40_DREG_PCEG3,
+	D40_DREG_PCEG4,
+	D40_DREG_RSEG1,
+	D40_DREG_RSEG2,
+	D40_DREG_RSEG3,
+	D40_DREG_RSEG4,
+	D40_DREG_RCEG1,
+	D40_DREG_RCEG2,
+	D40_DREG_RCEG3,
+	D40_DREG_RCEG4,
+};
+
+#define BACKUP_REGS_SZ_V4A ARRAY_SIZE(d40_backup_regs_v4a)
+
+static u32 d40_backup_regs_v4b[] = {
+	D40_DREG_CPSEG1,
+	D40_DREG_CPSEG2,
+	D40_DREG_CPSEG3,
+	D40_DREG_CPSEG4,
+	D40_DREG_CPSEG5,
+	D40_DREG_CPCEG1,
+	D40_DREG_CPCEG2,
+	D40_DREG_CPCEG3,
+	D40_DREG_CPCEG4,
+	D40_DREG_CPCEG5,
+	D40_DREG_CRSEG1,
+	D40_DREG_CRSEG2,
+	D40_DREG_CRSEG3,
+	D40_DREG_CRSEG4,
+	D40_DREG_CRSEG5,
+	D40_DREG_CRCEG1,
+	D40_DREG_CRCEG2,
+	D40_DREG_CRCEG3,
+	D40_DREG_CRCEG4,
+	D40_DREG_CRCEG5,
+};
+
+#define BACKUP_REGS_SZ_V4B ARRAY_SIZE(d40_backup_regs_v4b)
+
+static u32 d40_backup_regs_chan[] = {
+	D40_CHAN_REG_SSCFG,
+	D40_CHAN_REG_SSELT,
+	D40_CHAN_REG_SSPTR,
+	D40_CHAN_REG_SSLNK,
+	D40_CHAN_REG_SDCFG,
+	D40_CHAN_REG_SDELT,
+	D40_CHAN_REG_SDPTR,
+	D40_CHAN_REG_SDLNK,
+};
+
+#define BACKUP_REGS_SZ_MAX ((BACKUP_REGS_SZ_V4A > BACKUP_REGS_SZ_V4B) ? \
+			     BACKUP_REGS_SZ_V4A : BACKUP_REGS_SZ_V4B)
+
+/**
+ * struct d40_interrupt_lookup - lookup table for interrupt handler
+ *
+ * @src: Interrupt mask register.
+ * @clr: Interrupt clear register.
+ * @is_error: true if this is an error interrupt.
+ * @offset: start delta in the lookup_log_chans in d40_base. If equals to
+ * D40_PHY_CHAN, the lookup_phy_chans shall be used instead.
+ */
+struct d40_interrupt_lookup {
+	u32 src;
+	u32 clr;
+	bool is_error;
+	int offset;
+};
+
+
+static struct d40_interrupt_lookup il_v4a[] = {
+	{D40_DREG_LCTIS0, D40_DREG_LCICR0, false,  0},
+	{D40_DREG_LCTIS1, D40_DREG_LCICR1, false, 32},
+	{D40_DREG_LCTIS2, D40_DREG_LCICR2, false, 64},
+	{D40_DREG_LCTIS3, D40_DREG_LCICR3, false, 96},
+	{D40_DREG_LCEIS0, D40_DREG_LCICR0, true,   0},
+	{D40_DREG_LCEIS1, D40_DREG_LCICR1, true,  32},
+	{D40_DREG_LCEIS2, D40_DREG_LCICR2, true,  64},
+	{D40_DREG_LCEIS3, D40_DREG_LCICR3, true,  96},
+	{D40_DREG_PCTIS,  D40_DREG_PCICR,  false, D40_PHY_CHAN},
+	{D40_DREG_PCEIS,  D40_DREG_PCICR,  true,  D40_PHY_CHAN},
+};
+
+static struct d40_interrupt_lookup il_v4b[] = {
+	{D40_DREG_CLCTIS1, D40_DREG_CLCICR1, false,  0},
+	{D40_DREG_CLCTIS2, D40_DREG_CLCICR2, false, 32},
+	{D40_DREG_CLCTIS3, D40_DREG_CLCICR3, false, 64},
+	{D40_DREG_CLCTIS4, D40_DREG_CLCICR4, false, 96},
+	{D40_DREG_CLCTIS5, D40_DREG_CLCICR5, false, 128},
+	{D40_DREG_CLCEIS1, D40_DREG_CLCICR1, true,   0},
+	{D40_DREG_CLCEIS2, D40_DREG_CLCICR2, true,  32},
+	{D40_DREG_CLCEIS3, D40_DREG_CLCICR3, true,  64},
+	{D40_DREG_CLCEIS4, D40_DREG_CLCICR4, true,  96},
+	{D40_DREG_CLCEIS5, D40_DREG_CLCICR5, true,  128},
+	{D40_DREG_CPCTIS,  D40_DREG_CPCICR,  false, D40_PHY_CHAN},
+	{D40_DREG_CPCEIS,  D40_DREG_CPCICR,  true,  D40_PHY_CHAN},
+};
+
+/**
+ * struct d40_reg_val - simple lookup struct
+ *
+ * @reg: The register.
+ * @val: The value that belongs to the register in reg.
+ */
+struct d40_reg_val {
+	unsigned int reg;
+	unsigned int val;
+};
+
+static __initdata struct d40_reg_val dma_init_reg_v4a[] = {
+	/* Clock every part of the DMA block from start */
+	{ .reg = D40_DREG_GCC,    .val = D40_DREG_GCC_ENABLE_ALL},
+
+	/* Interrupts on all logical channels */
+	{ .reg = D40_DREG_LCMIS0, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCMIS1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCMIS2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCMIS3, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCICR0, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCICR1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCICR2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCICR3, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCTIS0, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCTIS1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCTIS2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_LCTIS3, .val = 0xFFFFFFFF}
+};
+static __initdata struct d40_reg_val dma_init_reg_v4b[] = {
+	/* Clock every part of the DMA block from start */
+	{ .reg = D40_DREG_GCC,    .val = D40_DREG_GCC_ENABLE_ALL},
+
+	/* Interrupts on all logical channels */
+	{ .reg = D40_DREG_CLCMIS1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCMIS2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCMIS3, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCMIS4, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCMIS5, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCICR1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCICR2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCICR3, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCICR4, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCICR5, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCTIS1, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCTIS2, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCTIS3, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCTIS4, .val = 0xFFFFFFFF},
+	{ .reg = D40_DREG_CLCTIS5, .val = 0xFFFFFFFF}
+};
+
+/**
+ * struct d40_lli_pool - Structure for keeping LLIs in memory
+ *
+ * @base: Pointer to memory area when the pre_alloc_lli's are not large
+ * enough, IE bigger than the most common case, 1 dst and 1 src. NULL if
+ * pre_alloc_lli is used.
+ * @dma_addr: DMA address, if mapped
+ * @size: The size in bytes of the memory at base or the size of pre_alloc_lli.
+ * @pre_alloc_lli: Pre allocated area for the most common case of transfers,
+ * one buffer to one buffer.
+ */
+struct d40_lli_pool {
+	void	*base;
+	int	 size;
+	dma_addr_t	dma_addr;
+	/* Space for dst and src, plus an extra for padding */
+	u8	 pre_alloc_lli[3 * sizeof(struct d40_phy_lli)];
+};
+
+/**
+ * struct d40_desc - A descriptor is one DMA job.
+ *
+ * @lli_phy: LLI settings for physical channel. Both src and dst=
+ * points into the lli_pool, to base if lli_len > 1 or to pre_alloc_lli if
+ * lli_len equals one.
+ * @lli_log: Same as above but for logical channels.
+ * @lli_pool: The pool with two entries pre-allocated.
+ * @lli_len: Number of llis of current descriptor.
+ * @lli_current: Number of transferred llis.
+ * @lcla_alloc: Number of LCLA entries allocated.
+ * @txd: DMA engine struct. Used for among other things for communication
+ * during a transfer.
+ * @node: List entry.
+ * @is_in_client_list: true if the client owns this descriptor.
+ * @cyclic: true if this is a cyclic job
+ *
+ * This descriptor is used for both logical and physical transfers.
+ */
+struct d40_desc {
+	/* LLI physical */
+	struct d40_phy_lli_bidir	 lli_phy;
+	/* LLI logical */
+	struct d40_log_lli_bidir	 lli_log;
+
+	struct d40_lli_pool		 lli_pool;
+	int				 lli_len;
+	int				 lli_current;
+	int				 lcla_alloc;
+
+	struct dma_async_tx_descriptor	 txd;
+	struct list_head		 node;
+
+	bool				 is_in_client_list;
+	bool				 cyclic;
+};
+
+/**
+ * struct d40_lcla_pool - LCLA pool settings and data.
+ *
+ * @base: The virtual address of LCLA. 18 bit aligned.
+ * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
+ * This pointer is only there for clean-up on error.
+ * @pages: The number of pages needed for all physical channels.
+ * Only used later for clean-up on error
+ * @lock: Lock to protect the content in this struct.
+ * @alloc_map: big map over which LCLA entry is own by which job.
+ */
+struct d40_lcla_pool {
+	void		*base;
+	dma_addr_t	dma_addr;
+	void		*base_unaligned;
+	int		 pages;
+	spinlock_t	 lock;
+	struct d40_desc	**alloc_map;
+};
+
+/**
+ * struct d40_phy_res - struct for handling eventlines mapped to physical
+ * channels.
+ *
+ * @lock: A lock protection this entity.
+ * @reserved: True if used by secure world or otherwise.
+ * @num: The physical channel number of this entity.
+ * @allocated_src: Bit mapped to show which src event line's are mapped to
+ * this physical channel. Can also be free or physically allocated.
+ * @allocated_dst: Same as for src but is dst.
+ * allocated_dst and allocated_src uses the D40_ALLOC* defines as well as
+ * event line number.
+ * @use_soft_lli: To mark if the linked lists of channel are managed by SW.
+ */
+struct d40_phy_res {
+	spinlock_t lock;
+	bool	   reserved;
+	int	   num;
+	u32	   allocated_src;
+	u32	   allocated_dst;
+	bool	   use_soft_lli;
+};
+
+struct d40_base;
+
+/**
+ * struct d40_chan - Struct that describes a channel.
+ *
+ * @lock: A spinlock to protect this struct.
+ * @log_num: The logical number, if any of this channel.
+ * @pending_tx: The number of pending transfers. Used between interrupt handler
+ * and tasklet.
+ * @busy: Set to true when transfer is ongoing on this channel.
+ * @phy_chan: Pointer to physical channel which this instance runs on. If this
+ * point is NULL, then the channel is not allocated.
+ * @chan: DMA engine handle.
+ * @tasklet: Tasklet that gets scheduled from interrupt context to complete a
+ * transfer and call client callback.
+ * @client: Cliented owned descriptor list.
+ * @pending_queue: Submitted jobs, to be issued by issue_pending()
+ * @active: Active descriptor.
+ * @done: Completed jobs
+ * @queue: Queued jobs.
+ * @prepare_queue: Prepared jobs.
+ * @dma_cfg: The client configuration of this dma channel.
+ * @configured: whether the dma_cfg configuration is valid
+ * @base: Pointer to the device instance struct.
+ * @src_def_cfg: Default cfg register setting for src.
+ * @dst_def_cfg: Default cfg register setting for dst.
+ * @log_def: Default logical channel settings.
+ * @lcpa: Pointer to dst and src lcpa settings.
+ * @runtime_addr: runtime configured address.
+ * @runtime_direction: runtime configured direction.
+ *
+ * This struct can either "be" a logical or a physical channel.
+ */
+struct d40_chan {
+	spinlock_t			 lock;
+	int				 log_num;
+	int				 pending_tx;
+	bool				 busy;
+	struct d40_phy_res		*phy_chan;
+	struct dma_chan			 chan;
+	struct tasklet_struct		 tasklet;
+	struct list_head		 client;
+	struct list_head		 pending_queue;
+	struct list_head		 active;
+	struct list_head		 done;
+	struct list_head		 queue;
+	struct list_head		 prepare_queue;
+	struct stedma40_chan_cfg	 dma_cfg;
+	bool				 configured;
+	struct d40_base			*base;
+	/* Default register configurations */
+	u32				 src_def_cfg;
+	u32				 dst_def_cfg;
+	struct d40_def_lcsp		 log_def;
+	struct d40_log_lli_full		*lcpa;
+	/* Runtime reconfiguration */
+	dma_addr_t			runtime_addr;
+	enum dma_transfer_direction	runtime_direction;
+};
+
+/**
+ * struct d40_gen_dmac - generic values to represent u8500/u8540 DMA
+ * controller
+ *
+ * @backup: the pointer to the registers address array for backup
+ * @backup_size: the size of the registers address array for backup
+ * @realtime_en: the realtime enable register
+ * @realtime_clear: the realtime clear register
+ * @high_prio_en: the high priority enable register
+ * @high_prio_clear: the high priority clear register
+ * @interrupt_en: the interrupt enable register
+ * @interrupt_clear: the interrupt clear register
+ * @il: the pointer to struct d40_interrupt_lookup
+ * @il_size: the size of d40_interrupt_lookup array
+ * @init_reg: the pointer to the struct d40_reg_val
+ * @init_reg_size: the size of d40_reg_val array
+ */
+struct d40_gen_dmac {
+	u32				*backup;
+	u32				 backup_size;
+	u32				 realtime_en;
+	u32				 realtime_clear;
+	u32				 high_prio_en;
+	u32				 high_prio_clear;
+	u32				 interrupt_en;
+	u32				 interrupt_clear;
+	struct d40_interrupt_lookup	*il;
+	u32				 il_size;
+	struct d40_reg_val		*init_reg;
+	u32				 init_reg_size;
+};
+
+/**
+ * struct d40_base - The big global struct, one for each probe'd instance.
+ *
+ * @interrupt_lock: Lock used to make sure one interrupt is handle a time.
+ * @execmd_lock: Lock for execute command usage since several channels share
+ * the same physical register.
+ * @dev: The device structure.
+ * @virtbase: The virtual base address of the DMA's register.
+ * @rev: silicon revision detected.
+ * @clk: Pointer to the DMA clock structure.
+ * @phy_start: Physical memory start of the DMA registers.
+ * @phy_size: Size of the DMA register map.
+ * @irq: The IRQ number.
+ * @num_memcpy_chans: The number of channels used for memcpy (mem-to-mem
+ * transfers).
+ * @num_phy_chans: The number of physical channels. Read from HW. This
+ * is the number of available channels for this driver, not counting "Secure
+ * mode" allocated physical channels.
+ * @num_log_chans: The number of logical channels. Calculated from
+ * num_phy_chans.
+ * @dma_both: dma_device channels that can do both memcpy and slave transfers.
+ * @dma_slave: dma_device channels that can do only do slave transfers.
+ * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
+ * @phy_chans: Room for all possible physical channels in system.
+ * @log_chans: Room for all possible logical channels in system.
+ * @lookup_log_chans: Used to map interrupt number to logical channel. Points
+ * to log_chans entries.
+ * @lookup_phy_chans: Used to map interrupt number to physical channel. Points
+ * to phy_chans entries.
+ * @plat_data: Pointer to provided platform_data which is the driver
+ * configuration.
+ * @lcpa_regulator: Pointer to hold the regulator for the esram bank for lcla.
+ * @phy_res: Vector containing all physical channels.
+ * @lcla_pool: lcla pool settings and data.
+ * @lcpa_base: The virtual mapped address of LCPA.
+ * @phy_lcpa: The physical address of the LCPA.
+ * @lcpa_size: The size of the LCPA area.
+ * @desc_slab: cache for descriptors.
+ * @reg_val_backup: Here the values of some hardware registers are stored
+ * before the DMA is powered off. They are restored when the power is back on.
+ * @reg_val_backup_v4: Backup of registers that only exits on dma40 v3 and
+ * later
+ * @reg_val_backup_chan: Backup data for standard channel parameter registers.
+ * @regs_interrupt: Scratch space for registers during interrupt.
+ * @gcc_pwr_off_mask: Mask to maintain the channels that can be turned off.
+ * @gen_dmac: the struct for generic registers values to represent u8500/8540
+ * DMA controller
+ */
+struct d40_base {
+	spinlock_t			 interrupt_lock;
+	spinlock_t			 execmd_lock;
+	struct device			 *dev;
+	void __iomem			 *virtbase;
+	u8				  rev:4;
+	struct clk			 *clk;
+	phys_addr_t			  phy_start;
+	resource_size_t			  phy_size;
+	int				  irq;
+	int				  num_memcpy_chans;
+	int				  num_phy_chans;
+	int				  num_log_chans;
+	struct device_dma_parameters	  dma_parms;
+	struct dma_device		  dma_both;
+	struct dma_device		  dma_slave;
+	struct dma_device		  dma_memcpy;
+	struct d40_chan			 *phy_chans;
+	struct d40_chan			 *log_chans;
+	struct d40_chan			**lookup_log_chans;
+	struct d40_chan			**lookup_phy_chans;
+	struct stedma40_platform_data	 *plat_data;
+	struct regulator		 *lcpa_regulator;
+	/* Physical half channels */
+	struct d40_phy_res		 *phy_res;
+	struct d40_lcla_pool		  lcla_pool;
+	void				 *lcpa_base;
+	dma_addr_t			  phy_lcpa;
+	resource_size_t			  lcpa_size;
+	struct kmem_cache		 *desc_slab;
+	u32				  reg_val_backup[BACKUP_REGS_SZ];
+	u32				  reg_val_backup_v4[BACKUP_REGS_SZ_MAX];
+	u32				 *reg_val_backup_chan;
+	u32				 *regs_interrupt;
+	u16				  gcc_pwr_off_mask;
+	struct d40_gen_dmac		  gen_dmac;
+};
+
+static struct device *chan2dev(struct d40_chan *d40c)
+{
+	return &d40c->chan.dev->device;
+}
+
+static bool chan_is_physical(struct d40_chan *chan)
+{
+	return chan->log_num == D40_PHY_CHAN;
+}
+
+static bool chan_is_logical(struct d40_chan *chan)
+{
+	return !chan_is_physical(chan);
+}
+
+static void __iomem *chan_base(struct d40_chan *chan)
+{
+	return chan->base->virtbase + D40_DREG_PCBASE +
+	       chan->phy_chan->num * D40_DREG_PCDELTA;
+}
+
+#define d40_err(dev, format, arg...)		\
+	dev_err(dev, "[%s] " format, __func__, ## arg)
+
+#define chan_err(d40c, format, arg...)		\
+	d40_err(chan2dev(d40c), format, ## arg)
+
+static int d40_pool_lli_alloc(struct d40_chan *d40c, struct d40_desc *d40d,
+			      int lli_len)
+{
+	bool is_log = chan_is_logical(d40c);
+	u32 align;
+	void *base;
+
+	if (is_log)
+		align = sizeof(struct d40_log_lli);
+	else
+		align = sizeof(struct d40_phy_lli);
+
+	if (lli_len == 1) {
+		base = d40d->lli_pool.pre_alloc_lli;
+		d40d->lli_pool.size = sizeof(d40d->lli_pool.pre_alloc_lli);
+		d40d->lli_pool.base = NULL;
+	} else {
+		d40d->lli_pool.size = lli_len * 2 * align;
+
+		base = kmalloc(d40d->lli_pool.size + align, GFP_NOWAIT);
+		d40d->lli_pool.base = base;
+
+		if (d40d->lli_pool.base == NULL)
+			return -ENOMEM;
+	}
+
+	if (is_log) {
+		d40d->lli_log.src = PTR_ALIGN(base, align);
+		d40d->lli_log.dst = d40d->lli_log.src + lli_len;
+
+		d40d->lli_pool.dma_addr = 0;
+	} else {
+		d40d->lli_phy.src = PTR_ALIGN(base, align);
+		d40d->lli_phy.dst = d40d->lli_phy.src + lli_len;
+
+		d40d->lli_pool.dma_addr = dma_map_single(d40c->base->dev,
+							 d40d->lli_phy.src,
+							 d40d->lli_pool.size,
+							 DMA_TO_DEVICE);
+
+		if (dma_mapping_error(d40c->base->dev,
+				      d40d->lli_pool.dma_addr)) {
+			kfree(d40d->lli_pool.base);
+			d40d->lli_pool.base = NULL;
+			d40d->lli_pool.dma_addr = 0;
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void d40_pool_lli_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (d40d->lli_pool.dma_addr)
+		dma_unmap_single(d40c->base->dev, d40d->lli_pool.dma_addr,
+				 d40d->lli_pool.size, DMA_TO_DEVICE);
+
+	kfree(d40d->lli_pool.base);
+	d40d->lli_pool.base = NULL;
+	d40d->lli_pool.size = 0;
+	d40d->lli_log.src = NULL;
+	d40d->lli_log.dst = NULL;
+	d40d->lli_phy.src = NULL;
+	d40d->lli_phy.dst = NULL;
+}
+
+static int d40_lcla_alloc_one(struct d40_chan *d40c,
+			      struct d40_desc *d40d)
+{
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	/*
+	 * Allocate both src and dst at the same time, therefore the half
+	 * start on 1 since 0 can't be used since zero is used as end marker.
+	 */
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i;
+
+		if (!d40c->base->lcla_pool.alloc_map[idx]) {
+			d40c->base->lcla_pool.alloc_map[idx] = d40d;
+			d40d->lcla_alloc++;
+			ret = i;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+}
+
+static int d40_lcla_free_all(struct d40_chan *d40c,
+			     struct d40_desc *d40d)
+{
+	unsigned long flags;
+	int i;
+	int ret = -EINVAL;
+
+	if (chan_is_physical(d40c))
+		return 0;
+
+	spin_lock_irqsave(&d40c->base->lcla_pool.lock, flags);
+
+	for (i = 1 ; i < D40_LCLA_LINK_PER_EVENT_GRP / 2; i++) {
+		int idx = d40c->phy_chan->num * D40_LCLA_LINK_PER_EVENT_GRP + i;
+
+		if (d40c->base->lcla_pool.alloc_map[idx] == d40d) {
+			d40c->base->lcla_pool.alloc_map[idx] = NULL;
+			d40d->lcla_alloc--;
+			if (d40d->lcla_alloc == 0) {
+				ret = 0;
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&d40c->base->lcla_pool.lock, flags);
+
+	return ret;
+
+}
+
+static void d40_desc_remove(struct d40_desc *d40d)
+{
+	list_del(&d40d->node);
+}
+
+static struct d40_desc *d40_desc_get(struct d40_chan *d40c)
+{
+	struct d40_desc *desc = NULL;
+
+	if (!list_empty(&d40c->client)) {
+		struct d40_desc *d;
+		struct d40_desc *_d;
+
+		list_for_each_entry_safe(d, _d, &d40c->client, node) {
+			if (async_tx_test_ack(&d->txd)) {
+				d40_desc_remove(d);
+				desc = d;
+				memset(desc, 0, sizeof(*desc));
+				break;
+			}
+		}
+	}
+
+	if (!desc)
+		desc = kmem_cache_zalloc(d40c->base->desc_slab, GFP_NOWAIT);
+
+	if (desc)
+		INIT_LIST_HEAD(&desc->node);
+
+	return desc;
+}
+
+static void d40_desc_free(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+
+	d40_pool_lli_free(d40c, d40d);
+	d40_lcla_free_all(d40c, d40d);
+	kmem_cache_free(d40c->base->desc_slab, d40d);
+}
+
+static void d40_desc_submit(struct d40_chan *d40c, struct d40_desc *desc)
+{
+	list_add_tail(&desc->node, &d40c->active);
+}
+
+static void d40_phy_lli_load(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_phy_lli *lli_dst = desc->lli_phy.dst;
+	struct d40_phy_lli *lli_src = desc->lli_phy.src;
+	void __iomem *base = chan_base(chan);
+
+	writel(lli_src->reg_cfg, base + D40_CHAN_REG_SSCFG);
+	writel(lli_src->reg_elt, base + D40_CHAN_REG_SSELT);
+	writel(lli_src->reg_ptr, base + D40_CHAN_REG_SSPTR);
+	writel(lli_src->reg_lnk, base + D40_CHAN_REG_SSLNK);
+
+	writel(lli_dst->reg_cfg, base + D40_CHAN_REG_SDCFG);
+	writel(lli_dst->reg_elt, base + D40_CHAN_REG_SDELT);
+	writel(lli_dst->reg_ptr, base + D40_CHAN_REG_SDPTR);
+	writel(lli_dst->reg_lnk, base + D40_CHAN_REG_SDLNK);
+}
+
+static void d40_desc_done(struct d40_chan *d40c, struct d40_desc *desc)
+{
+	list_add_tail(&desc->node, &d40c->done);
+}
+
+static void d40_log_lli_to_lcxa(struct d40_chan *chan, struct d40_desc *desc)
+{
+	struct d40_lcla_pool *pool = &chan->base->lcla_pool;
+	struct d40_log_lli_bidir *lli = &desc->lli_log;
+	int lli_current = desc->lli_current;
+	int lli_len = desc->lli_len;
+	bool cyclic = desc->cyclic;
+	int curr_lcla = -EINVAL;
+	int first_lcla = 0;
+	bool use_esram_lcla = chan->base->plat_data->use_esram_lcla;
+	bool linkback;
+
+	/*
+	 * We may have partially running cyclic transfers, in case we did't get
+	 * enough LCLA entries.
+	 */
+	linkback = cyclic && lli_current == 0;
+
+	/*
+	 * For linkback, we need one LCLA even with only one link, because we
+	 * can't link back to the one in LCPA space
+	 */
+	if (linkback || (lli_len - lli_current > 1)) {
+		/*
+		 * If the channel is expected to use only soft_lli don't
+		 * allocate a lcla. This is to avoid a HW issue that exists
+		 * in some controller during a peripheral to memory transfer
+		 * that uses linked lists.
+		 */
+		if (!(chan->phy_chan->use_soft_lli &&
+			chan->dma_cfg.dir == DMA_DEV_TO_MEM))
+			curr_lcla = d40_lcla_alloc_one(chan, desc);
+
+		first_lcla = curr_lcla;
+	}
+
+	/*
+	 * For linkback, we normally load the LCPA in the loop since we need to
+	 * link it to the second LCLA and not the first.  However, if we
+	 * couldn't even get a first LCLA, then we have to run in LCPA and
+	 * reload manually.
+	 */
+	if (!linkback || curr_lcla == -EINVAL) {
+		unsigned int flags = 0;
+
+		if (curr_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
+
+		d40_log_lli_lcpa_write(chan->lcpa,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       curr_lcla,
+				       flags);
+		lli_current++;
+	}
+
+	if (curr_lcla < 0)
+		goto set_current;
+
+	for (; lli_current < lli_len; lli_current++) {
+		unsigned int lcla_offset = chan->phy_chan->num * 1024 +
+					   8 * curr_lcla * 2;
+		struct d40_log_lli *lcla = pool->base + lcla_offset;
+		unsigned int flags = 0;
+		int next_lcla;
+
+		if (lli_current + 1 < lli_len)
+			next_lcla = d40_lcla_alloc_one(chan, desc);
+		else
+			next_lcla = linkback ? first_lcla : -EINVAL;
+
+		if (cyclic || next_lcla == -EINVAL)
+			flags |= LLI_TERM_INT;
+
+		if (linkback && curr_lcla == first_lcla) {
+			/* First link goes in both LCPA and LCLA */
+			d40_log_lli_lcpa_write(chan->lcpa,
+					       &lli->dst[lli_current],
+					       &lli->src[lli_current],
+					       next_lcla, flags);
+		}
+
+		/*
+		 * One unused LCLA in the cyclic case if the very first
+		 * next_lcla fails...
+		 */
+		d40_log_lli_lcla_write(lcla,
+				       &lli->dst[lli_current],
+				       &lli->src[lli_current],
+				       next_lcla, flags);
+
+		/*
+		 * Cache maintenance is not needed if lcla is
+		 * mapped in esram
+		 */
+		if (!use_esram_lcla) {
+			dma_sync_single_range_for_device(chan->base->dev,
+						pool->dma_addr, lcla_offset,
+						2 * sizeof(struct d40_log_lli),
+						DMA_TO_DEVICE);
+		}
+		curr_lcla = next_lcla;
+
+		if (curr_lcla == -EINVAL || curr_lcla == first_lcla) {
+			lli_current++;
+			break;
+		}
+	}
+ set_current:
+	desc->lli_current = lli_current;
+}
+
+static void d40_desc_load(struct d40_chan *d40c, struct d40_desc *d40d)
+{
+	if (chan_is_physical(d40c)) {
+		d40_phy_lli_load(d40c, d40d);
+		d40d->lli_current = d40d->lli_len;
+	} else
+		d40_log_lli_to_lcxa(d40c, d40d);
+}
+
+static struct d40_desc *d40_first_active_get(struct d40_chan *d40c)
+{
+	return list_first_entry_or_null(&d40c->active, struct d40_desc, node);
+}
+
+/* remove desc from current queue and add it to the pending_queue */
+static void d40_desc_queue(struct d40_chan *d40c, struct d40_desc *desc)
+{
+	d40_desc_remove(desc);
+	desc->is_in_client_list = false;
+	list_add_tail(&desc->node, &d40c->pending_queue);
+}
+
+static struct d40_desc *d40_first_pending(struct d40_chan *d40c)
+{
+	return list_first_entry_or_null(&d40c->pending_queue, struct d40_desc,
+					node);
+}
+
+static struct d40_desc *d40_first_queued(struct d40_chan *d40c)
+{
+	return list_first_entry_or_null(&d40c->queue, struct d40_desc, node);
+}
+
+static struct d40_desc *d40_first_done(struct d40_chan *d40c)
+{
+	return list_first_entry_or_null(&d40c->done, struct d40_desc, node);
+}
+
+static int d40_psize_2_burst_size(bool is_log, int psize)
+{
+	if (is_log) {
+		if (psize == STEDMA40_PSIZE_LOG_1)
+			return 1;
+	} else {
+		if (psize == STEDMA40_PSIZE_PHY_1)
+			return 1;
+	}
+
+	return 2 << psize;
+}
+
+/*
+ * The dma only supports transmitting packages up to
+ * STEDMA40_MAX_SEG_SIZE * data_width, where data_width is stored in Bytes.
+ *
+ * Calculate the total number of dma elements required to send the entire sg list.
+ */
+static int d40_size_2_dmalen(int size, u32 data_width1, u32 data_width2)
+{
+	int dmalen;
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= max_w;
+
+	if (!IS_ALIGNED(size, max_w))
+		return -EINVAL;
+
+	if (size <= seg_max)
+		dmalen = 1;
+	else {
+		dmalen = size / seg_max;
+		if (dmalen * seg_max < size)
+			dmalen++;
+	}
+	return dmalen;
+}
+
+static int d40_sg_2_dmalen(struct scatterlist *sgl, int sg_len,
+			   u32 data_width1, u32 data_width2)
+{
+	struct scatterlist *sg;
+	int i;
+	int len = 0;
+	int ret;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		ret = d40_size_2_dmalen(sg_dma_len(sg),
+					data_width1, data_width2);
+		if (ret < 0)
+			return ret;
+		len += ret;
+	}
+	return len;
+}
+
+static int __d40_execute_command_phy(struct d40_chan *d40c,
+				     enum d40_command command)
+{
+	u32 status;
+	int i;
+	void __iomem *active_reg;
+	int ret = 0;
+	unsigned long flags;
+	u32 wmask;
+
+	if (command == D40_DMA_STOP) {
+		ret = __d40_execute_command_phy(d40c, D40_DMA_SUSPEND_REQ);
+		if (ret)
+			return ret;
+	}
+
+	spin_lock_irqsave(&d40c->base->execmd_lock, flags);
+
+	if (d40c->phy_chan->num % 2 == 0)
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+	else
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+	if (command == D40_DMA_SUSPEND_REQ) {
+		status = (readl(active_reg) &
+			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+			D40_CHAN_POS(d40c->phy_chan->num);
+
+		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+			goto unlock;
+	}
+
+	wmask = 0xffffffff & ~(D40_CHAN_POS_MASK(d40c->phy_chan->num));
+	writel(wmask | (command << D40_CHAN_POS(d40c->phy_chan->num)),
+	       active_reg);
+
+	if (command == D40_DMA_SUSPEND_REQ) {
+
+		for (i = 0 ; i < D40_SUSPEND_MAX_IT; i++) {
+			status = (readl(active_reg) &
+				  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+				D40_CHAN_POS(d40c->phy_chan->num);
+
+			cpu_relax();
+			/*
+			 * Reduce the number of bus accesses while
+			 * waiting for the DMA to suspend.
+			 */
+			udelay(3);
+
+			if (status == D40_DMA_STOP ||
+			    status == D40_DMA_SUSPENDED)
+				break;
+		}
+
+		if (i == D40_SUSPEND_MAX_IT) {
+			chan_err(d40c,
+				"unable to suspend the chl %d (log: %d) status %x\n",
+				d40c->phy_chan->num, d40c->log_num,
+				status);
+			dump_stack();
+			ret = -EBUSY;
+		}
+
+	}
+ unlock:
+	spin_unlock_irqrestore(&d40c->base->execmd_lock, flags);
+	return ret;
+}
+
+static void d40_term_all(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+	struct d40_desc *_d;
+
+	/* Release completed descriptors */
+	while ((d40d = d40_first_done(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release active descriptors */
+	while ((d40d = d40_first_active_get(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release queued descriptors waiting for transfer */
+	while ((d40d = d40_first_queued(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release pending descriptors */
+	while ((d40d = d40_first_pending(d40c))) {
+		d40_desc_remove(d40d);
+		d40_desc_free(d40c, d40d);
+	}
+
+	/* Release client owned descriptors */
+	if (!list_empty(&d40c->client))
+		list_for_each_entry_safe(d40d, _d, &d40c->client, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
+
+	/* Release descriptors in prepare queue */
+	if (!list_empty(&d40c->prepare_queue))
+		list_for_each_entry_safe(d40d, _d,
+					 &d40c->prepare_queue, node) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		}
+
+	d40c->pending_tx = 0;
+}
+
+static void __d40_config_set_event(struct d40_chan *d40c,
+				   enum d40_events event_type, u32 event,
+				   int reg)
+{
+	void __iomem *addr = chan_base(d40c) + reg;
+	int tries;
+	u32 status;
+
+	switch (event_type) {
+
+	case D40_DEACTIVATE_EVENTLINE:
+
+		writel((D40_DEACTIVATE_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+		break;
+
+	case D40_SUSPEND_REQ_EVENTLINE:
+		status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+			  D40_EVENTLINE_POS(event);
+
+		if (status == D40_DEACTIVATE_EVENTLINE ||
+		    status == D40_SUSPEND_REQ_EVENTLINE)
+			break;
+
+		writel((D40_SUSPEND_REQ_EVENTLINE << D40_EVENTLINE_POS(event))
+		       | ~D40_EVENTLINE_MASK(event), addr);
+
+		for (tries = 0 ; tries < D40_SUSPEND_MAX_IT; tries++) {
+
+			status = (readl(addr) & D40_EVENTLINE_MASK(event)) >>
+				  D40_EVENTLINE_POS(event);
+
+			cpu_relax();
+			/*
+			 * Reduce the number of bus accesses while
+			 * waiting for the DMA to suspend.
+			 */
+			udelay(3);
+
+			if (status == D40_DEACTIVATE_EVENTLINE)
+				break;
+		}
+
+		if (tries == D40_SUSPEND_MAX_IT) {
+			chan_err(d40c,
+				"unable to stop the event_line chl %d (log: %d)"
+				"status %x\n", d40c->phy_chan->num,
+				 d40c->log_num, status);
+		}
+		break;
+
+	case D40_ACTIVATE_EVENTLINE:
+	/*
+	 * The hardware sometimes doesn't register the enable when src and dst
+	 * event lines are active on the same logical channel.  Retry to ensure
+	 * it does.  Usually only one retry is sufficient.
+	 */
+		tries = 100;
+		while (--tries) {
+			writel((D40_ACTIVATE_EVENTLINE <<
+				D40_EVENTLINE_POS(event)) |
+				~D40_EVENTLINE_MASK(event), addr);
+
+			if (readl(addr) & D40_EVENTLINE_MASK(event))
+				break;
+		}
+
+		if (tries != 99)
+			dev_dbg(chan2dev(d40c),
+				"[%s] workaround enable S%cLNK (%d tries)\n",
+				__func__, reg == D40_CHAN_REG_SSLNK ? 'S' : 'D',
+				100 - tries);
+
+		WARN_ON(!tries);
+		break;
+
+	case D40_ROUND_EVENTLINE:
+		BUG();
+		break;
+
+	}
+}
+
+static void d40_config_set_event(struct d40_chan *d40c,
+				 enum d40_events event_type)
+{
+	u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+
+	/* Enable event line connected to device (or memcpy) */
+	if ((d40c->dma_cfg.dir == DMA_DEV_TO_MEM) ||
+	    (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+		__d40_config_set_event(d40c, event_type, event,
+				       D40_CHAN_REG_SSLNK);
+
+	if (d40c->dma_cfg.dir !=  DMA_DEV_TO_MEM)
+		__d40_config_set_event(d40c, event_type, event,
+				       D40_CHAN_REG_SDLNK);
+}
+
+static u32 d40_chan_has_events(struct d40_chan *d40c)
+{
+	void __iomem *chanbase = chan_base(d40c);
+	u32 val;
+
+	val = readl(chanbase + D40_CHAN_REG_SSLNK);
+	val |= readl(chanbase + D40_CHAN_REG_SDLNK);
+
+	return val;
+}
+
+static int
+__d40_execute_command_log(struct d40_chan *d40c, enum d40_command command)
+{
+	unsigned long flags;
+	int ret = 0;
+	u32 active_status;
+	void __iomem *active_reg;
+
+	if (d40c->phy_chan->num % 2 == 0)
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+	else
+		active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+
+	spin_lock_irqsave(&d40c->phy_chan->lock, flags);
+
+	switch (command) {
+	case D40_DMA_STOP:
+	case D40_DMA_SUSPEND_REQ:
+
+		active_status = (readl(active_reg) &
+				 D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+				 D40_CHAN_POS(d40c->phy_chan->num);
+
+		if (active_status == D40_DMA_RUN)
+			d40_config_set_event(d40c, D40_SUSPEND_REQ_EVENTLINE);
+		else
+			d40_config_set_event(d40c, D40_DEACTIVATE_EVENTLINE);
+
+		if (!d40_chan_has_events(d40c) && (command == D40_DMA_STOP))
+			ret = __d40_execute_command_phy(d40c, command);
+
+		break;
+
+	case D40_DMA_RUN:
+
+		d40_config_set_event(d40c, D40_ACTIVATE_EVENTLINE);
+		ret = __d40_execute_command_phy(d40c, command);
+		break;
+
+	case D40_DMA_SUSPENDED:
+		BUG();
+		break;
+	}
+
+	spin_unlock_irqrestore(&d40c->phy_chan->lock, flags);
+	return ret;
+}
+
+static int d40_channel_execute_command(struct d40_chan *d40c,
+				       enum d40_command command)
+{
+	if (chan_is_logical(d40c))
+		return __d40_execute_command_log(d40c, command);
+	else
+		return __d40_execute_command_phy(d40c, command);
+}
+
+static u32 d40_get_prmo(struct d40_chan *d40c)
+{
+	static const unsigned int phy_map[] = {
+		[STEDMA40_PCHAN_BASIC_MODE]
+			= D40_DREG_PRMO_PCHAN_BASIC,
+		[STEDMA40_PCHAN_MODULO_MODE]
+			= D40_DREG_PRMO_PCHAN_MODULO,
+		[STEDMA40_PCHAN_DOUBLE_DST_MODE]
+			= D40_DREG_PRMO_PCHAN_DOUBLE_DST,
+	};
+	static const unsigned int log_map[] = {
+		[STEDMA40_LCHAN_SRC_PHY_DST_LOG]
+			= D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG,
+		[STEDMA40_LCHAN_SRC_LOG_DST_PHY]
+			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY,
+		[STEDMA40_LCHAN_SRC_LOG_DST_LOG]
+			= D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG,
+	};
+
+	if (chan_is_physical(d40c))
+		return phy_map[d40c->dma_cfg.mode_opt];
+	else
+		return log_map[d40c->dma_cfg.mode_opt];
+}
+
+static void d40_config_write(struct d40_chan *d40c)
+{
+	u32 addr_base;
+	u32 var;
+
+	/* Odd addresses are even addresses + 4 */
+	addr_base = (d40c->phy_chan->num % 2) * 4;
+	/* Setup channel mode to logical or physical */
+	var = ((u32)(chan_is_logical(d40c)) + 1) <<
+		D40_CHAN_POS(d40c->phy_chan->num);
+	writel(var, d40c->base->virtbase + D40_DREG_PRMSE + addr_base);
+
+	/* Setup operational mode option register */
+	var = d40_get_prmo(d40c) << D40_CHAN_POS(d40c->phy_chan->num);
+
+	writel(var, d40c->base->virtbase + D40_DREG_PRMOE + addr_base);
+
+	if (chan_is_logical(d40c)) {
+		int lidx = (d40c->phy_chan->num << D40_SREG_ELEM_LOG_LIDX_POS)
+			   & D40_SREG_ELEM_LOG_LIDX_MASK;
+		void __iomem *chanbase = chan_base(d40c);
+
+		/* Set default config for CFG reg */
+		writel(d40c->src_def_cfg, chanbase + D40_CHAN_REG_SSCFG);
+		writel(d40c->dst_def_cfg, chanbase + D40_CHAN_REG_SDCFG);
+
+		/* Set LIDX for lcla */
+		writel(lidx, chanbase + D40_CHAN_REG_SSELT);
+		writel(lidx, chanbase + D40_CHAN_REG_SDELT);
+
+		/* Clear LNK which will be used by d40_chan_has_events() */
+		writel(0, chanbase + D40_CHAN_REG_SSLNK);
+		writel(0, chanbase + D40_CHAN_REG_SDLNK);
+	}
+}
+
+static u32 d40_residue(struct d40_chan *d40c)
+{
+	u32 num_elt;
+
+	if (chan_is_logical(d40c))
+		num_elt = (readl(&d40c->lcpa->lcsp2) & D40_MEM_LCSP2_ECNT_MASK)
+			>> D40_MEM_LCSP2_ECNT_POS;
+	else {
+		u32 val = readl(chan_base(d40c) + D40_CHAN_REG_SDELT);
+		num_elt = (val & D40_SREG_ELEM_PHY_ECNT_MASK)
+			  >> D40_SREG_ELEM_PHY_ECNT_POS;
+	}
+
+	return num_elt * d40c->dma_cfg.dst_info.data_width;
+}
+
+static bool d40_tx_is_linked(struct d40_chan *d40c)
+{
+	bool is_link;
+
+	if (chan_is_logical(d40c))
+		is_link = readl(&d40c->lcpa->lcsp3) &  D40_MEM_LCSP3_DLOS_MASK;
+	else
+		is_link = readl(chan_base(d40c) + D40_CHAN_REG_SDLNK)
+			  & D40_SREG_LNK_PHYS_LNK_MASK;
+
+	return is_link;
+}
+
+static int d40_pause(struct dma_chan *chan)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	int res = 0;
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return -EINVAL;
+	}
+
+	if (!d40c->busy)
+		return 0;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	pm_runtime_get_sync(d40c->base->dev);
+
+	res = d40_channel_execute_command(d40c, D40_DMA_SUSPEND_REQ);
+
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return res;
+}
+
+static int d40_resume(struct dma_chan *chan)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	int res = 0;
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return -EINVAL;
+	}
+
+	if (!d40c->busy)
+		return 0;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	pm_runtime_get_sync(d40c->base->dev);
+
+	/* If bytes left to transfer or linked tx resume job */
+	if (d40_residue(d40c) || d40_tx_is_linked(d40c))
+		res = d40_channel_execute_command(d40c, D40_DMA_RUN);
+
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return res;
+}
+
+static dma_cookie_t d40_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct d40_chan *d40c = container_of(tx->chan,
+					     struct d40_chan,
+					     chan);
+	struct d40_desc *d40d = container_of(tx, struct d40_desc, txd);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	cookie = dma_cookie_assign(tx);
+	d40_desc_queue(d40c, d40d);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	return cookie;
+}
+
+static int d40_start(struct d40_chan *d40c)
+{
+	return d40_channel_execute_command(d40c, D40_DMA_RUN);
+}
+
+static struct d40_desc *d40_queue_start(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+	int err;
+
+	/* Start queued jobs, if any */
+	d40d = d40_first_queued(d40c);
+
+	if (d40d != NULL) {
+		if (!d40c->busy) {
+			d40c->busy = true;
+			pm_runtime_get_sync(d40c->base->dev);
+		}
+
+		/* Remove from queue */
+		d40_desc_remove(d40d);
+
+		/* Add to active queue */
+		d40_desc_submit(d40c, d40d);
+
+		/* Initiate DMA job */
+		d40_desc_load(d40c, d40d);
+
+		/* Start dma job */
+		err = d40_start(d40c);
+
+		if (err)
+			return NULL;
+	}
+
+	return d40d;
+}
+
+/* called from interrupt context */
+static void dma_tc_handle(struct d40_chan *d40c)
+{
+	struct d40_desc *d40d;
+
+	/* Get first active entry from list */
+	d40d = d40_first_active_get(d40c);
+
+	if (d40d == NULL)
+		return;
+
+	if (d40d->cyclic) {
+		/*
+		 * If this was a paritially loaded list, we need to reloaded
+		 * it, and only when the list is completed.  We need to check
+		 * for done because the interrupt will hit for every link, and
+		 * not just the last one.
+		 */
+		if (d40d->lli_current < d40d->lli_len
+		    && !d40_tx_is_linked(d40c)
+		    && !d40_residue(d40c)) {
+			d40_lcla_free_all(d40c, d40d);
+			d40_desc_load(d40c, d40d);
+			(void) d40_start(d40c);
+
+			if (d40d->lli_current == d40d->lli_len)
+				d40d->lli_current = 0;
+		}
+	} else {
+		d40_lcla_free_all(d40c, d40d);
+
+		if (d40d->lli_current < d40d->lli_len) {
+			d40_desc_load(d40c, d40d);
+			/* Start dma job */
+			(void) d40_start(d40c);
+			return;
+		}
+
+		if (d40_queue_start(d40c) == NULL) {
+			d40c->busy = false;
+
+			pm_runtime_mark_last_busy(d40c->base->dev);
+			pm_runtime_put_autosuspend(d40c->base->dev);
+		}
+
+		d40_desc_remove(d40d);
+		d40_desc_done(d40c, d40d);
+	}
+
+	d40c->pending_tx++;
+	tasklet_schedule(&d40c->tasklet);
+
+}
+
+static void dma_tasklet(unsigned long data)
+{
+	struct d40_chan *d40c = (struct d40_chan *) data;
+	struct d40_desc *d40d;
+	unsigned long flags;
+	bool callback_active;
+	struct dmaengine_desc_callback cb;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	/* Get first entry from the done list */
+	d40d = d40_first_done(d40c);
+	if (d40d == NULL) {
+		/* Check if we have reached here for cyclic job */
+		d40d = d40_first_active_get(d40c);
+		if (d40d == NULL || !d40d->cyclic)
+			goto check_pending_tx;
+	}
+
+	if (!d40d->cyclic)
+		dma_cookie_complete(&d40d->txd);
+
+	/*
+	 * If terminating a channel pending_tx is set to zero.
+	 * This prevents any finished active jobs to return to the client.
+	 */
+	if (d40c->pending_tx == 0) {
+		spin_unlock_irqrestore(&d40c->lock, flags);
+		return;
+	}
+
+	/* Callback to client */
+	callback_active = !!(d40d->txd.flags & DMA_PREP_INTERRUPT);
+	dmaengine_desc_get_callback(&d40d->txd, &cb);
+
+	if (!d40d->cyclic) {
+		if (async_tx_test_ack(&d40d->txd)) {
+			d40_desc_remove(d40d);
+			d40_desc_free(d40c, d40d);
+		} else if (!d40d->is_in_client_list) {
+			d40_desc_remove(d40d);
+			d40_lcla_free_all(d40c, d40d);
+			list_add_tail(&d40d->node, &d40c->client);
+			d40d->is_in_client_list = true;
+		}
+	}
+
+	d40c->pending_tx--;
+
+	if (d40c->pending_tx)
+		tasklet_schedule(&d40c->tasklet);
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	if (callback_active)
+		dmaengine_desc_callback_invoke(&cb, NULL);
+
+	return;
+ check_pending_tx:
+	/* Rescue manouver if receiving double interrupts */
+	if (d40c->pending_tx > 0)
+		d40c->pending_tx--;
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static irqreturn_t d40_handle_interrupt(int irq, void *data)
+{
+	int i;
+	u32 idx;
+	u32 row;
+	long chan = -1;
+	struct d40_chan *d40c;
+	unsigned long flags;
+	struct d40_base *base = data;
+	u32 *regs = base->regs_interrupt;
+	struct d40_interrupt_lookup *il = base->gen_dmac.il;
+	u32 il_size = base->gen_dmac.il_size;
+
+	spin_lock_irqsave(&base->interrupt_lock, flags);
+
+	/* Read interrupt status of both logical and physical channels */
+	for (i = 0; i < il_size; i++)
+		regs[i] = readl(base->virtbase + il[i].src);
+
+	for (;;) {
+
+		chan = find_next_bit((unsigned long *)regs,
+				     BITS_PER_LONG * il_size, chan + 1);
+
+		/* No more set bits found? */
+		if (chan == BITS_PER_LONG * il_size)
+			break;
+
+		row = chan / BITS_PER_LONG;
+		idx = chan & (BITS_PER_LONG - 1);
+
+		if (il[row].offset == D40_PHY_CHAN)
+			d40c = base->lookup_phy_chans[idx];
+		else
+			d40c = base->lookup_log_chans[il[row].offset + idx];
+
+		if (!d40c) {
+			/*
+			 * No error because this can happen if something else
+			 * in the system is using the channel.
+			 */
+			continue;
+		}
+
+		/* ACK interrupt */
+		writel(BIT(idx), base->virtbase + il[row].clr);
+
+		spin_lock(&d40c->lock);
+
+		if (!il[row].is_error)
+			dma_tc_handle(d40c);
+		else
+			d40_err(base->dev, "IRQ chan: %ld offset %d idx %d\n",
+				chan, il[row].offset, idx);
+
+		spin_unlock(&d40c->lock);
+	}
+
+	spin_unlock_irqrestore(&base->interrupt_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static int d40_validate_conf(struct d40_chan *d40c,
+			     struct stedma40_chan_cfg *conf)
+{
+	int res = 0;
+	bool is_log = conf->mode == STEDMA40_MODE_LOGICAL;
+
+	if (!conf->dir) {
+		chan_err(d40c, "Invalid direction.\n");
+		res = -EINVAL;
+	}
+
+	if ((is_log && conf->dev_type > d40c->base->num_log_chans)  ||
+	    (!is_log && conf->dev_type > d40c->base->num_phy_chans) ||
+	    (conf->dev_type < 0)) {
+		chan_err(d40c, "Invalid device type (%d)\n", conf->dev_type);
+		res = -EINVAL;
+	}
+
+	if (conf->dir == DMA_DEV_TO_DEV) {
+		/*
+		 * DMAC HW supports it. Will be added to this driver,
+		 * in case any dma client requires it.
+		 */
+		chan_err(d40c, "periph to periph not supported\n");
+		res = -EINVAL;
+	}
+
+	if (d40_psize_2_burst_size(is_log, conf->src_info.psize) *
+	    conf->src_info.data_width !=
+	    d40_psize_2_burst_size(is_log, conf->dst_info.psize) *
+	    conf->dst_info.data_width) {
+		/*
+		 * The DMAC hardware only supports
+		 * src (burst x width) == dst (burst x width)
+		 */
+
+		chan_err(d40c, "src (burst x width) != dst (burst x width)\n");
+		res = -EINVAL;
+	}
+
+	return res;
+}
+
+static bool d40_alloc_mask_set(struct d40_phy_res *phy,
+			       bool is_src, int log_event_line, bool is_log,
+			       bool *first_user)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&phy->lock, flags);
+
+	*first_user = ((phy->allocated_src | phy->allocated_dst)
+			== D40_ALLOC_FREE);
+
+	if (!is_log) {
+		/* Physical interrupts are masked per physical full channel */
+		if (phy->allocated_src == D40_ALLOC_FREE &&
+		    phy->allocated_dst == D40_ALLOC_FREE) {
+			phy->allocated_dst = D40_ALLOC_PHY;
+			phy->allocated_src = D40_ALLOC_PHY;
+			goto found_unlock;
+		} else
+			goto not_found_unlock;
+	}
+
+	/* Logical channel */
+	if (is_src) {
+		if (phy->allocated_src == D40_ALLOC_PHY)
+			goto not_found_unlock;
+
+		if (phy->allocated_src == D40_ALLOC_FREE)
+			phy->allocated_src = D40_ALLOC_LOG_FREE;
+
+		if (!(phy->allocated_src & BIT(log_event_line))) {
+			phy->allocated_src |= BIT(log_event_line);
+			goto found_unlock;
+		} else
+			goto not_found_unlock;
+	} else {
+		if (phy->allocated_dst == D40_ALLOC_PHY)
+			goto not_found_unlock;
+
+		if (phy->allocated_dst == D40_ALLOC_FREE)
+			phy->allocated_dst = D40_ALLOC_LOG_FREE;
+
+		if (!(phy->allocated_dst & BIT(log_event_line))) {
+			phy->allocated_dst |= BIT(log_event_line);
+			goto found_unlock;
+		}
+	}
+ not_found_unlock:
+	spin_unlock_irqrestore(&phy->lock, flags);
+	return false;
+ found_unlock:
+	spin_unlock_irqrestore(&phy->lock, flags);
+	return true;
+}
+
+static bool d40_alloc_mask_free(struct d40_phy_res *phy, bool is_src,
+			       int log_event_line)
+{
+	unsigned long flags;
+	bool is_free = false;
+
+	spin_lock_irqsave(&phy->lock, flags);
+	if (!log_event_line) {
+		phy->allocated_dst = D40_ALLOC_FREE;
+		phy->allocated_src = D40_ALLOC_FREE;
+		is_free = true;
+		goto unlock;
+	}
+
+	/* Logical channel */
+	if (is_src) {
+		phy->allocated_src &= ~BIT(log_event_line);
+		if (phy->allocated_src == D40_ALLOC_LOG_FREE)
+			phy->allocated_src = D40_ALLOC_FREE;
+	} else {
+		phy->allocated_dst &= ~BIT(log_event_line);
+		if (phy->allocated_dst == D40_ALLOC_LOG_FREE)
+			phy->allocated_dst = D40_ALLOC_FREE;
+	}
+
+	is_free = ((phy->allocated_src | phy->allocated_dst) ==
+		   D40_ALLOC_FREE);
+ unlock:
+	spin_unlock_irqrestore(&phy->lock, flags);
+
+	return is_free;
+}
+
+static int d40_allocate_channel(struct d40_chan *d40c, bool *first_phy_user)
+{
+	int dev_type = d40c->dma_cfg.dev_type;
+	int event_group;
+	int event_line;
+	struct d40_phy_res *phys;
+	int i;
+	int j;
+	int log_num;
+	int num_phy_chans;
+	bool is_src;
+	bool is_log = d40c->dma_cfg.mode == STEDMA40_MODE_LOGICAL;
+
+	phys = d40c->base->phy_res;
+	num_phy_chans = d40c->base->num_phy_chans;
+
+	if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) {
+		log_num = 2 * dev_type;
+		is_src = true;
+	} else if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+		   d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+		/* dst event lines are used for logical memcpy */
+		log_num = 2 * dev_type + 1;
+		is_src = false;
+	} else
+		return -EINVAL;
+
+	event_group = D40_TYPE_TO_GROUP(dev_type);
+	event_line = D40_TYPE_TO_EVENT(dev_type);
+
+	if (!is_log) {
+		if (d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+			/* Find physical half channel */
+			if (d40c->dma_cfg.use_fixed_channel) {
+				i = d40c->dma_cfg.phy_channel;
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       0, is_log,
+						       first_phy_user))
+					goto found_phy;
+			} else {
+				for (i = 0; i < num_phy_chans; i++) {
+					if (d40_alloc_mask_set(&phys[i], is_src,
+						       0, is_log,
+						       first_phy_user))
+						goto found_phy;
+				}
+			}
+		} else
+			for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+				int phy_num = j  + event_group * 2;
+				for (i = phy_num; i < phy_num + 2; i++) {
+					if (d40_alloc_mask_set(&phys[i],
+							       is_src,
+							       0,
+							       is_log,
+							       first_phy_user))
+						goto found_phy;
+				}
+			}
+		return -EINVAL;
+found_phy:
+		d40c->phy_chan = &phys[i];
+		d40c->log_num = D40_PHY_CHAN;
+		goto out;
+	}
+	if (dev_type == -1)
+		return -EINVAL;
+
+	/* Find logical channel */
+	for (j = 0; j < d40c->base->num_phy_chans; j += 8) {
+		int phy_num = j + event_group * 2;
+
+		if (d40c->dma_cfg.use_fixed_channel) {
+			i = d40c->dma_cfg.phy_channel;
+
+			if ((i != phy_num) && (i != phy_num + 1)) {
+				dev_err(chan2dev(d40c),
+					"invalid fixed phy channel %d\n", i);
+				return -EINVAL;
+			}
+
+			if (d40_alloc_mask_set(&phys[i], is_src, event_line,
+					       is_log, first_phy_user))
+				goto found_log;
+
+			dev_err(chan2dev(d40c),
+				"could not allocate fixed phy channel %d\n", i);
+			return -EINVAL;
+		}
+
+		/*
+		 * Spread logical channels across all available physical rather
+		 * than pack every logical channel at the first available phy
+		 * channels.
+		 */
+		if (is_src) {
+			for (i = phy_num; i < phy_num + 2; i++) {
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       event_line, is_log,
+						       first_phy_user))
+					goto found_log;
+			}
+		} else {
+			for (i = phy_num + 1; i >= phy_num; i--) {
+				if (d40_alloc_mask_set(&phys[i], is_src,
+						       event_line, is_log,
+						       first_phy_user))
+					goto found_log;
+			}
+		}
+	}
+	return -EINVAL;
+
+found_log:
+	d40c->phy_chan = &phys[i];
+	d40c->log_num = log_num;
+out:
+
+	if (is_log)
+		d40c->base->lookup_log_chans[d40c->log_num] = d40c;
+	else
+		d40c->base->lookup_phy_chans[d40c->phy_chan->num] = d40c;
+
+	return 0;
+
+}
+
+static int d40_config_memcpy(struct d40_chan *d40c)
+{
+	dma_cap_mask_t cap = d40c->chan.device->cap_mask;
+
+	if (dma_has_cap(DMA_MEMCPY, cap) && !dma_has_cap(DMA_SLAVE, cap)) {
+		d40c->dma_cfg = dma40_memcpy_conf_log;
+		d40c->dma_cfg.dev_type = dma40_memcpy_channels[d40c->chan.chan_id];
+
+		d40_log_cfg(&d40c->dma_cfg,
+			    &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+
+	} else if (dma_has_cap(DMA_MEMCPY, cap) &&
+		   dma_has_cap(DMA_SLAVE, cap)) {
+		d40c->dma_cfg = dma40_memcpy_conf_phy;
+
+		/* Generate interrrupt at end of transfer or relink. */
+		d40c->dst_def_cfg |= BIT(D40_SREG_CFG_TIM_POS);
+
+		/* Generate interrupt on error. */
+		d40c->src_def_cfg |= BIT(D40_SREG_CFG_EIM_POS);
+		d40c->dst_def_cfg |= BIT(D40_SREG_CFG_EIM_POS);
+
+	} else {
+		chan_err(d40c, "No memcpy\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int d40_free_dma(struct d40_chan *d40c)
+{
+
+	int res = 0;
+	u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+	struct d40_phy_res *phy = d40c->phy_chan;
+	bool is_src;
+
+	/* Terminate all queued and active transfers */
+	d40_term_all(d40c);
+
+	if (phy == NULL) {
+		chan_err(d40c, "phy == null\n");
+		return -EINVAL;
+	}
+
+	if (phy->allocated_src == D40_ALLOC_FREE &&
+	    phy->allocated_dst == D40_ALLOC_FREE) {
+		chan_err(d40c, "channel already free\n");
+		return -EINVAL;
+	}
+
+	if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+	    d40c->dma_cfg.dir == DMA_MEM_TO_MEM)
+		is_src = false;
+	else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM)
+		is_src = true;
+	else {
+		chan_err(d40c, "Unknown direction\n");
+		return -EINVAL;
+	}
+
+	pm_runtime_get_sync(d40c->base->dev);
+	res = d40_channel_execute_command(d40c, D40_DMA_STOP);
+	if (res) {
+		chan_err(d40c, "stop failed\n");
+		goto mark_last_busy;
+	}
+
+	d40_alloc_mask_free(phy, is_src, chan_is_logical(d40c) ? event : 0);
+
+	if (chan_is_logical(d40c))
+		d40c->base->lookup_log_chans[d40c->log_num] = NULL;
+	else
+		d40c->base->lookup_phy_chans[phy->num] = NULL;
+
+	if (d40c->busy) {
+		pm_runtime_mark_last_busy(d40c->base->dev);
+		pm_runtime_put_autosuspend(d40c->base->dev);
+	}
+
+	d40c->busy = false;
+	d40c->phy_chan = NULL;
+	d40c->configured = false;
+ mark_last_busy:
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	return res;
+}
+
+static bool d40_is_paused(struct d40_chan *d40c)
+{
+	void __iomem *chanbase = chan_base(d40c);
+	bool is_paused = false;
+	unsigned long flags;
+	void __iomem *active_reg;
+	u32 status;
+	u32 event = D40_TYPE_TO_EVENT(d40c->dma_cfg.dev_type);
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	if (chan_is_physical(d40c)) {
+		if (d40c->phy_chan->num % 2 == 0)
+			active_reg = d40c->base->virtbase + D40_DREG_ACTIVE;
+		else
+			active_reg = d40c->base->virtbase + D40_DREG_ACTIVO;
+
+		status = (readl(active_reg) &
+			  D40_CHAN_POS_MASK(d40c->phy_chan->num)) >>
+			D40_CHAN_POS(d40c->phy_chan->num);
+		if (status == D40_DMA_SUSPENDED || status == D40_DMA_STOP)
+			is_paused = true;
+		goto unlock;
+	}
+
+	if (d40c->dma_cfg.dir == DMA_MEM_TO_DEV ||
+	    d40c->dma_cfg.dir == DMA_MEM_TO_MEM) {
+		status = readl(chanbase + D40_CHAN_REG_SDLNK);
+	} else if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM) {
+		status = readl(chanbase + D40_CHAN_REG_SSLNK);
+	} else {
+		chan_err(d40c, "Unknown direction\n");
+		goto unlock;
+	}
+
+	status = (status & D40_EVENTLINE_MASK(event)) >>
+		D40_EVENTLINE_POS(event);
+
+	if (status != D40_DMA_RUN)
+		is_paused = true;
+ unlock:
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return is_paused;
+
+}
+
+static u32 stedma40_residue(struct dma_chan *chan)
+{
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	u32 bytes_left;
+	unsigned long flags;
+
+	spin_lock_irqsave(&d40c->lock, flags);
+	bytes_left = d40_residue(d40c);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+
+	return bytes_left;
+}
+
+static int
+d40_prep_sg_log(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	int ret;
+
+	ret = d40_log_sg_to_lli(sg_src, sg_len,
+				src_dev_addr,
+				desc->lli_log.src,
+				chan->log_def.lcsp1,
+				src_info->data_width,
+				dst_info->data_width);
+
+	ret = d40_log_sg_to_lli(sg_dst, sg_len,
+				dst_dev_addr,
+				desc->lli_log.dst,
+				chan->log_def.lcsp3,
+				dst_info->data_width,
+				src_info->data_width);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int
+d40_prep_sg_phy(struct d40_chan *chan, struct d40_desc *desc,
+		struct scatterlist *sg_src, struct scatterlist *sg_dst,
+		unsigned int sg_len, dma_addr_t src_dev_addr,
+		dma_addr_t dst_dev_addr)
+{
+	struct stedma40_chan_cfg *cfg = &chan->dma_cfg;
+	struct stedma40_half_channel_info *src_info = &cfg->src_info;
+	struct stedma40_half_channel_info *dst_info = &cfg->dst_info;
+	unsigned long flags = 0;
+	int ret;
+
+	if (desc->cyclic)
+		flags |= LLI_CYCLIC | LLI_TERM_INT;
+
+	ret = d40_phy_sg_to_lli(sg_src, sg_len, src_dev_addr,
+				desc->lli_phy.src,
+				virt_to_phys(desc->lli_phy.src),
+				chan->src_def_cfg,
+				src_info, dst_info, flags);
+
+	ret = d40_phy_sg_to_lli(sg_dst, sg_len, dst_dev_addr,
+				desc->lli_phy.dst,
+				virt_to_phys(desc->lli_phy.dst),
+				chan->dst_def_cfg,
+				dst_info, src_info, flags);
+
+	dma_sync_single_for_device(chan->base->dev, desc->lli_pool.dma_addr,
+				   desc->lli_pool.size, DMA_TO_DEVICE);
+
+	return ret < 0 ? ret : 0;
+}
+
+static struct d40_desc *
+d40_prep_desc(struct d40_chan *chan, struct scatterlist *sg,
+	      unsigned int sg_len, unsigned long dma_flags)
+{
+	struct stedma40_chan_cfg *cfg;
+	struct d40_desc *desc;
+	int ret;
+
+	desc = d40_desc_get(chan);
+	if (!desc)
+		return NULL;
+
+	cfg = &chan->dma_cfg;
+	desc->lli_len = d40_sg_2_dmalen(sg, sg_len, cfg->src_info.data_width,
+					cfg->dst_info.data_width);
+	if (desc->lli_len < 0) {
+		chan_err(chan, "Unaligned size\n");
+		goto free_desc;
+	}
+
+	ret = d40_pool_lli_alloc(chan, desc, desc->lli_len);
+	if (ret < 0) {
+		chan_err(chan, "Could not allocate lli\n");
+		goto free_desc;
+	}
+
+	desc->lli_current = 0;
+	desc->txd.flags = dma_flags;
+	desc->txd.tx_submit = d40_tx_submit;
+
+	dma_async_tx_descriptor_init(&desc->txd, &chan->chan);
+
+	return desc;
+ free_desc:
+	d40_desc_free(chan, desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_sg(struct dma_chan *dchan, struct scatterlist *sg_src,
+	    struct scatterlist *sg_dst, unsigned int sg_len,
+	    enum dma_transfer_direction direction, unsigned long dma_flags)
+{
+	struct d40_chan *chan = container_of(dchan, struct d40_chan, chan);
+	dma_addr_t src_dev_addr;
+	dma_addr_t dst_dev_addr;
+	struct d40_desc *desc;
+	unsigned long flags;
+	int ret;
+
+	if (!chan->phy_chan) {
+		chan_err(chan, "Cannot prepare unallocated channel\n");
+		return NULL;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = d40_prep_desc(chan, sg_src, sg_len, dma_flags);
+	if (desc == NULL)
+		goto unlock;
+
+	if (sg_next(&sg_src[sg_len - 1]) == sg_src)
+		desc->cyclic = true;
+
+	src_dev_addr = 0;
+	dst_dev_addr = 0;
+	if (direction == DMA_DEV_TO_MEM)
+		src_dev_addr = chan->runtime_addr;
+	else if (direction == DMA_MEM_TO_DEV)
+		dst_dev_addr = chan->runtime_addr;
+
+	if (chan_is_logical(chan))
+		ret = d40_prep_sg_log(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+	else
+		ret = d40_prep_sg_phy(chan, desc, sg_src, sg_dst,
+				      sg_len, src_dev_addr, dst_dev_addr);
+
+	if (ret) {
+		chan_err(chan, "Failed to prepare %s sg job: %d\n",
+			 chan_is_logical(chan) ? "log" : "phy", ret);
+		goto free_desc;
+	}
+
+	/*
+	 * add descriptor to the prepare queue in order to be able
+	 * to free them later in terminate_all
+	 */
+	list_add_tail(&desc->node, &chan->prepare_queue);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return &desc->txd;
+ free_desc:
+	d40_desc_free(chan, desc);
+ unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return NULL;
+}
+
+bool stedma40_filter(struct dma_chan *chan, void *data)
+{
+	struct stedma40_chan_cfg *info = data;
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int err;
+
+	if (data) {
+		err = d40_validate_conf(d40c, info);
+		if (!err)
+			d40c->dma_cfg = *info;
+	} else
+		err = d40_config_memcpy(d40c);
+
+	if (!err)
+		d40c->configured = true;
+
+	return err == 0;
+}
+EXPORT_SYMBOL(stedma40_filter);
+
+static void __d40_set_prio_rt(struct d40_chan *d40c, int dev_type, bool src)
+{
+	bool realtime = d40c->dma_cfg.realtime;
+	bool highprio = d40c->dma_cfg.high_priority;
+	u32 rtreg;
+	u32 event = D40_TYPE_TO_EVENT(dev_type);
+	u32 group = D40_TYPE_TO_GROUP(dev_type);
+	u32 bit = BIT(event);
+	u32 prioreg;
+	struct d40_gen_dmac *dmac = &d40c->base->gen_dmac;
+
+	rtreg = realtime ? dmac->realtime_en : dmac->realtime_clear;
+	/*
+	 * Due to a hardware bug, in some cases a logical channel triggered by
+	 * a high priority destination event line can generate extra packet
+	 * transactions.
+	 *
+	 * The workaround is to not set the high priority level for the
+	 * destination event lines that trigger logical channels.
+	 */
+	if (!src && chan_is_logical(d40c))
+		highprio = false;
+
+	prioreg = highprio ? dmac->high_prio_en : dmac->high_prio_clear;
+
+	/* Destination event lines are stored in the upper halfword */
+	if (!src)
+		bit <<= 16;
+
+	writel(bit, d40c->base->virtbase + prioreg + group * 4);
+	writel(bit, d40c->base->virtbase + rtreg + group * 4);
+}
+
+static void d40_set_prio_realtime(struct d40_chan *d40c)
+{
+	if (d40c->base->rev < 3)
+		return;
+
+	if ((d40c->dma_cfg.dir ==  DMA_DEV_TO_MEM) ||
+	    (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, true);
+
+	if ((d40c->dma_cfg.dir ==  DMA_MEM_TO_DEV) ||
+	    (d40c->dma_cfg.dir == DMA_DEV_TO_DEV))
+		__d40_set_prio_rt(d40c, d40c->dma_cfg.dev_type, false);
+}
+
+#define D40_DT_FLAGS_MODE(flags)       ((flags >> 0) & 0x1)
+#define D40_DT_FLAGS_DIR(flags)        ((flags >> 1) & 0x1)
+#define D40_DT_FLAGS_BIG_ENDIAN(flags) ((flags >> 2) & 0x1)
+#define D40_DT_FLAGS_FIXED_CHAN(flags) ((flags >> 3) & 0x1)
+#define D40_DT_FLAGS_HIGH_PRIO(flags)  ((flags >> 4) & 0x1)
+
+static struct dma_chan *d40_xlate(struct of_phandle_args *dma_spec,
+				  struct of_dma *ofdma)
+{
+	struct stedma40_chan_cfg cfg;
+	dma_cap_mask_t cap;
+	u32 flags;
+
+	memset(&cfg, 0, sizeof(struct stedma40_chan_cfg));
+
+	dma_cap_zero(cap);
+	dma_cap_set(DMA_SLAVE, cap);
+
+	cfg.dev_type = dma_spec->args[0];
+	flags = dma_spec->args[2];
+
+	switch (D40_DT_FLAGS_MODE(flags)) {
+	case 0: cfg.mode = STEDMA40_MODE_LOGICAL; break;
+	case 1: cfg.mode = STEDMA40_MODE_PHYSICAL; break;
+	}
+
+	switch (D40_DT_FLAGS_DIR(flags)) {
+	case 0:
+		cfg.dir = DMA_MEM_TO_DEV;
+		cfg.dst_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags);
+		break;
+	case 1:
+		cfg.dir = DMA_DEV_TO_MEM;
+		cfg.src_info.big_endian = D40_DT_FLAGS_BIG_ENDIAN(flags);
+		break;
+	}
+
+	if (D40_DT_FLAGS_FIXED_CHAN(flags)) {
+		cfg.phy_channel = dma_spec->args[1];
+		cfg.use_fixed_channel = true;
+	}
+
+	if (D40_DT_FLAGS_HIGH_PRIO(flags))
+		cfg.high_priority = true;
+
+	return dma_request_channel(cap, stedma40_filter, &cfg);
+}
+
+/* DMA ENGINE functions */
+static int d40_alloc_chan_resources(struct dma_chan *chan)
+{
+	int err;
+	unsigned long flags;
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	bool is_free_phy;
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	dma_cookie_init(chan);
+
+	/* If no dma configuration is set use default configuration (memcpy) */
+	if (!d40c->configured) {
+		err = d40_config_memcpy(d40c);
+		if (err) {
+			chan_err(d40c, "Failed to configure memcpy channel\n");
+			goto mark_last_busy;
+		}
+	}
+
+	err = d40_allocate_channel(d40c, &is_free_phy);
+	if (err) {
+		chan_err(d40c, "Failed to allocate channel\n");
+		d40c->configured = false;
+		goto mark_last_busy;
+	}
+
+	pm_runtime_get_sync(d40c->base->dev);
+
+	d40_set_prio_realtime(d40c);
+
+	if (chan_is_logical(d40c)) {
+		if (d40c->dma_cfg.dir == DMA_DEV_TO_MEM)
+			d40c->lcpa = d40c->base->lcpa_base +
+				d40c->dma_cfg.dev_type * D40_LCPA_CHAN_SIZE;
+		else
+			d40c->lcpa = d40c->base->lcpa_base +
+				d40c->dma_cfg.dev_type *
+				D40_LCPA_CHAN_SIZE + D40_LCPA_CHAN_DST_DELTA;
+
+		/* Unmask the Global Interrupt Mask. */
+		d40c->src_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS);
+		d40c->dst_def_cfg |= BIT(D40_SREG_CFG_LOG_GIM_POS);
+	}
+
+	dev_dbg(chan2dev(d40c), "allocated %s channel (phy %d%s)\n",
+		 chan_is_logical(d40c) ? "logical" : "physical",
+		 d40c->phy_chan->num,
+		 d40c->dma_cfg.use_fixed_channel ? ", fixed" : "");
+
+
+	/*
+	 * Only write channel configuration to the DMA if the physical
+	 * resource is free. In case of multiple logical channels
+	 * on the same physical resource, only the first write is necessary.
+	 */
+	if (is_free_phy)
+		d40_config_write(d40c);
+ mark_last_busy:
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return err;
+}
+
+static void d40_free_chan_resources(struct dma_chan *chan)
+{
+	struct d40_chan *d40c =
+		container_of(chan, struct d40_chan, chan);
+	int err;
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Cannot free unallocated channel\n");
+		return;
+	}
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	err = d40_free_dma(d40c);
+
+	if (err)
+		chan_err(d40c, "Failed to free channel\n");
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
+						       dma_addr_t dst,
+						       dma_addr_t src,
+						       size_t size,
+						       unsigned long dma_flags)
+{
+	struct scatterlist dst_sg;
+	struct scatterlist src_sg;
+
+	sg_init_table(&dst_sg, 1);
+	sg_init_table(&src_sg, 1);
+
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_address(&src_sg) = src;
+
+	sg_dma_len(&dst_sg) = size;
+	sg_dma_len(&src_sg) = size;
+
+	return d40_prep_sg(chan, &src_sg, &dst_sg, 1,
+			   DMA_MEM_TO_MEM, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+d40_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		  unsigned int sg_len, enum dma_transfer_direction direction,
+		  unsigned long dma_flags, void *context)
+{
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	return d40_prep_sg(chan, sgl, sgl, sg_len, direction, dma_flags);
+}
+
+static struct dma_async_tx_descriptor *
+dma40_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t dma_addr,
+		     size_t buf_len, size_t period_len,
+		     enum dma_transfer_direction direction, unsigned long flags)
+{
+	unsigned int periods = buf_len / period_len;
+	struct dma_async_tx_descriptor *txd;
+	struct scatterlist *sg;
+	int i;
+
+	sg = kcalloc(periods + 1, sizeof(struct scatterlist), GFP_NOWAIT);
+	if (!sg)
+		return NULL;
+
+	for (i = 0; i < periods; i++) {
+		sg_dma_address(&sg[i]) = dma_addr;
+		sg_dma_len(&sg[i]) = period_len;
+		dma_addr += period_len;
+	}
+
+	sg_chain(sg, periods + 1, sg);
+
+	txd = d40_prep_sg(chan, sg, sg, periods, direction,
+			  DMA_PREP_INTERRUPT);
+
+	kfree(sg);
+
+	return txd;
+}
+
+static enum dma_status d40_tx_status(struct dma_chan *chan,
+				     dma_cookie_t cookie,
+				     struct dma_tx_state *txstate)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	enum dma_status ret;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Cannot read status of unallocated channel\n");
+		return -EINVAL;
+	}
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret != DMA_COMPLETE && txstate)
+		dma_set_residue(txstate, stedma40_residue(chan));
+
+	if (d40_is_paused(d40c))
+		ret = DMA_PAUSED;
+
+	return ret;
+}
+
+static void d40_issue_pending(struct dma_chan *chan)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	unsigned long flags;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return;
+	}
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	list_splice_tail_init(&d40c->pending_queue, &d40c->queue);
+
+	/* Busy means that queued jobs are already being processed */
+	if (!d40c->busy)
+		(void) d40_queue_start(d40c);
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+}
+
+static int d40_terminate_all(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	int ret;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return -EINVAL;
+	}
+
+	spin_lock_irqsave(&d40c->lock, flags);
+
+	pm_runtime_get_sync(d40c->base->dev);
+	ret = d40_channel_execute_command(d40c, D40_DMA_STOP);
+	if (ret)
+		chan_err(d40c, "Failed to stop channel\n");
+
+	d40_term_all(d40c);
+	pm_runtime_mark_last_busy(d40c->base->dev);
+	pm_runtime_put_autosuspend(d40c->base->dev);
+	if (d40c->busy) {
+		pm_runtime_mark_last_busy(d40c->base->dev);
+		pm_runtime_put_autosuspend(d40c->base->dev);
+	}
+	d40c->busy = false;
+
+	spin_unlock_irqrestore(&d40c->lock, flags);
+	return 0;
+}
+
+static int
+dma40_config_to_halfchannel(struct d40_chan *d40c,
+			    struct stedma40_half_channel_info *info,
+			    u32 maxburst)
+{
+	int psize;
+
+	if (chan_is_logical(d40c)) {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_LOG_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_LOG_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_LOG_4;
+		else
+			psize = STEDMA40_PSIZE_LOG_1;
+	} else {
+		if (maxburst >= 16)
+			psize = STEDMA40_PSIZE_PHY_16;
+		else if (maxburst >= 8)
+			psize = STEDMA40_PSIZE_PHY_8;
+		else if (maxburst >= 4)
+			psize = STEDMA40_PSIZE_PHY_4;
+		else
+			psize = STEDMA40_PSIZE_PHY_1;
+	}
+
+	info->psize = psize;
+	info->flow_ctrl = STEDMA40_NO_FLOW_CTRL;
+
+	return 0;
+}
+
+/* Runtime reconfiguration extension */
+static int d40_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct d40_chan *d40c = container_of(chan, struct d40_chan, chan);
+	struct stedma40_chan_cfg *cfg = &d40c->dma_cfg;
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	dma_addr_t config_addr;
+	u32 src_maxburst, dst_maxburst;
+	int ret;
+
+	if (d40c->phy_chan == NULL) {
+		chan_err(d40c, "Channel is not allocated!\n");
+		return -EINVAL;
+	}
+
+	src_addr_width = config->src_addr_width;
+	src_maxburst = config->src_maxburst;
+	dst_addr_width = config->dst_addr_width;
+	dst_maxburst = config->dst_maxburst;
+
+	if (config->direction == DMA_DEV_TO_MEM) {
+		config_addr = config->src_addr;
+
+		if (cfg->dir != DMA_DEV_TO_MEM)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for peripheral "
+				"to memory transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = DMA_DEV_TO_MEM;
+
+		/* Configure the memory side */
+		if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			dst_addr_width = src_addr_width;
+		if (dst_maxburst == 0)
+			dst_maxburst = src_maxburst;
+
+	} else if (config->direction == DMA_MEM_TO_DEV) {
+		config_addr = config->dst_addr;
+
+		if (cfg->dir != DMA_MEM_TO_DEV)
+			dev_dbg(d40c->base->dev,
+				"channel was not configured for memory "
+				"to peripheral transfer (%d) overriding\n",
+				cfg->dir);
+		cfg->dir = DMA_MEM_TO_DEV;
+
+		/* Configure the memory side */
+		if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			src_addr_width = dst_addr_width;
+		if (src_maxburst == 0)
+			src_maxburst = dst_maxburst;
+	} else {
+		dev_err(d40c->base->dev,
+			"unrecognized channel direction %d\n",
+			config->direction);
+		return -EINVAL;
+	}
+
+	if (config_addr <= 0) {
+		dev_err(d40c->base->dev, "no address supplied\n");
+		return -EINVAL;
+	}
+
+	if (src_maxburst * src_addr_width != dst_maxburst * dst_addr_width) {
+		dev_err(d40c->base->dev,
+			"src/dst width/maxburst mismatch: %d*%d != %d*%d\n",
+			src_maxburst,
+			src_addr_width,
+			dst_maxburst,
+			dst_addr_width);
+		return -EINVAL;
+	}
+
+	if (src_maxburst > 16) {
+		src_maxburst = 16;
+		dst_maxburst = src_maxburst * src_addr_width / dst_addr_width;
+	} else if (dst_maxburst > 16) {
+		dst_maxburst = 16;
+		src_maxburst = dst_maxburst * dst_addr_width / src_addr_width;
+	}
+
+	/* Only valid widths are; 1, 2, 4 and 8. */
+	if (src_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
+	    src_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
+	    dst_addr_width <= DMA_SLAVE_BUSWIDTH_UNDEFINED ||
+	    dst_addr_width >  DMA_SLAVE_BUSWIDTH_8_BYTES   ||
+	    !is_power_of_2(src_addr_width) ||
+	    !is_power_of_2(dst_addr_width))
+		return -EINVAL;
+
+	cfg->src_info.data_width = src_addr_width;
+	cfg->dst_info.data_width = dst_addr_width;
+
+	ret = dma40_config_to_halfchannel(d40c, &cfg->src_info,
+					  src_maxburst);
+	if (ret)
+		return ret;
+
+	ret = dma40_config_to_halfchannel(d40c, &cfg->dst_info,
+					  dst_maxburst);
+	if (ret)
+		return ret;
+
+	/* Fill in register values */
+	if (chan_is_logical(d40c))
+		d40_log_cfg(cfg, &d40c->log_def.lcsp1, &d40c->log_def.lcsp3);
+	else
+		d40_phy_cfg(cfg, &d40c->src_def_cfg, &d40c->dst_def_cfg);
+
+	/* These settings will take precedence later */
+	d40c->runtime_addr = config_addr;
+	d40c->runtime_direction = config->direction;
+	dev_dbg(d40c->base->dev,
+		"configured channel %s for %s, data width %d/%d, "
+		"maxburst %d/%d elements, LE, no flow control\n",
+		dma_chan_name(chan),
+		(config->direction == DMA_DEV_TO_MEM) ? "RX" : "TX",
+		src_addr_width, dst_addr_width,
+		src_maxburst, dst_maxburst);
+
+	return 0;
+}
+
+/* Initialization functions */
+
+static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
+				 struct d40_chan *chans, int offset,
+				 int num_chans)
+{
+	int i = 0;
+	struct d40_chan *d40c;
+
+	INIT_LIST_HEAD(&dma->channels);
+
+	for (i = offset; i < offset + num_chans; i++) {
+		d40c = &chans[i];
+		d40c->base = base;
+		d40c->chan.device = dma;
+
+		spin_lock_init(&d40c->lock);
+
+		d40c->log_num = D40_PHY_CHAN;
+
+		INIT_LIST_HEAD(&d40c->done);
+		INIT_LIST_HEAD(&d40c->active);
+		INIT_LIST_HEAD(&d40c->queue);
+		INIT_LIST_HEAD(&d40c->pending_queue);
+		INIT_LIST_HEAD(&d40c->client);
+		INIT_LIST_HEAD(&d40c->prepare_queue);
+
+		tasklet_init(&d40c->tasklet, dma_tasklet,
+			     (unsigned long) d40c);
+
+		list_add_tail(&d40c->chan.device_node,
+			      &dma->channels);
+	}
+}
+
+static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
+{
+	if (dma_has_cap(DMA_SLAVE, dev->cap_mask)) {
+		dev->device_prep_slave_sg = d40_prep_slave_sg;
+		dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	}
+
+	if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
+		dev->device_prep_dma_memcpy = d40_prep_memcpy;
+		dev->directions = BIT(DMA_MEM_TO_MEM);
+		/*
+		 * This controller can only access address at even
+		 * 32bit boundaries, i.e. 2^2
+		 */
+		dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
+	}
+
+	if (dma_has_cap(DMA_CYCLIC, dev->cap_mask))
+		dev->device_prep_dma_cyclic = dma40_prep_dma_cyclic;
+
+	dev->device_alloc_chan_resources = d40_alloc_chan_resources;
+	dev->device_free_chan_resources = d40_free_chan_resources;
+	dev->device_issue_pending = d40_issue_pending;
+	dev->device_tx_status = d40_tx_status;
+	dev->device_config = d40_set_runtime_config;
+	dev->device_pause = d40_pause;
+	dev->device_resume = d40_resume;
+	dev->device_terminate_all = d40_terminate_all;
+	dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dev->dev = base->dev;
+}
+
+static int __init d40_dmaengine_init(struct d40_base *base,
+				     int num_reserved_chans)
+{
+	int err ;
+
+	d40_chan_init(base, &base->dma_slave, base->log_chans,
+		      0, base->num_log_chans);
+
+	dma_cap_zero(base->dma_slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+	d40_ops_init(base, &base->dma_slave);
+
+	err = dma_async_device_register(&base->dma_slave);
+
+	if (err) {
+		d40_err(base->dev, "Failed to register slave channels\n");
+		goto exit;
+	}
+
+	d40_chan_init(base, &base->dma_memcpy, base->log_chans,
+		      base->num_log_chans, base->num_memcpy_chans);
+
+	dma_cap_zero(base->dma_memcpy.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
+
+	d40_ops_init(base, &base->dma_memcpy);
+
+	err = dma_async_device_register(&base->dma_memcpy);
+
+	if (err) {
+		d40_err(base->dev,
+			"Failed to register memcpy only channels\n");
+		goto unregister_slave;
+	}
+
+	d40_chan_init(base, &base->dma_both, base->phy_chans,
+		      0, num_reserved_chans);
+
+	dma_cap_zero(base->dma_both.cap_mask);
+	dma_cap_set(DMA_SLAVE, base->dma_both.cap_mask);
+	dma_cap_set(DMA_MEMCPY, base->dma_both.cap_mask);
+	dma_cap_set(DMA_CYCLIC, base->dma_slave.cap_mask);
+
+	d40_ops_init(base, &base->dma_both);
+	err = dma_async_device_register(&base->dma_both);
+
+	if (err) {
+		d40_err(base->dev,
+			"Failed to register logical and physical capable channels\n");
+		goto unregister_memcpy;
+	}
+	return 0;
+ unregister_memcpy:
+	dma_async_device_unregister(&base->dma_memcpy);
+ unregister_slave:
+	dma_async_device_unregister(&base->dma_slave);
+ exit:
+	return err;
+}
+
+/* Suspend resume functionality */
+#ifdef CONFIG_PM_SLEEP
+static int dma40_suspend(struct device *dev)
+{
+	struct d40_base *base = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (base->lcpa_regulator)
+		ret = regulator_disable(base->lcpa_regulator);
+	return ret;
+}
+
+static int dma40_resume(struct device *dev)
+{
+	struct d40_base *base = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (base->lcpa_regulator) {
+		ret = regulator_enable(base->lcpa_regulator);
+		if (ret)
+			return ret;
+	}
+
+	return pm_runtime_force_resume(dev);
+}
+#endif
+
+#ifdef CONFIG_PM
+static void dma40_backup(void __iomem *baseaddr, u32 *backup,
+			 u32 *regaddr, int num, bool save)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		void __iomem *addr = baseaddr + regaddr[i];
+
+		if (save)
+			backup[i] = readl_relaxed(addr);
+		else
+			writel_relaxed(backup[i], addr);
+	}
+}
+
+static void d40_save_restore_registers(struct d40_base *base, bool save)
+{
+	int i;
+
+	/* Save/Restore channel specific registers */
+	for (i = 0; i < base->num_phy_chans; i++) {
+		void __iomem *addr;
+		int idx;
+
+		if (base->phy_res[i].reserved)
+			continue;
+
+		addr = base->virtbase + D40_DREG_PCBASE + i * D40_DREG_PCDELTA;
+		idx = i * ARRAY_SIZE(d40_backup_regs_chan);
+
+		dma40_backup(addr, &base->reg_val_backup_chan[idx],
+			     d40_backup_regs_chan,
+			     ARRAY_SIZE(d40_backup_regs_chan),
+			     save);
+	}
+
+	/* Save/Restore global registers */
+	dma40_backup(base->virtbase, base->reg_val_backup,
+		     d40_backup_regs, ARRAY_SIZE(d40_backup_regs),
+		     save);
+
+	/* Save/Restore registers only existing on dma40 v3 and later */
+	if (base->gen_dmac.backup)
+		dma40_backup(base->virtbase, base->reg_val_backup_v4,
+			     base->gen_dmac.backup,
+			base->gen_dmac.backup_size,
+			save);
+}
+
+static int dma40_runtime_suspend(struct device *dev)
+{
+	struct d40_base *base = dev_get_drvdata(dev);
+
+	d40_save_restore_registers(base, true);
+
+	/* Don't disable/enable clocks for v1 due to HW bugs */
+	if (base->rev != 1)
+		writel_relaxed(base->gcc_pwr_off_mask,
+			       base->virtbase + D40_DREG_GCC);
+
+	return 0;
+}
+
+static int dma40_runtime_resume(struct device *dev)
+{
+	struct d40_base *base = dev_get_drvdata(dev);
+
+	d40_save_restore_registers(base, false);
+
+	writel_relaxed(D40_DREG_GCC_ENABLE_ALL,
+		       base->virtbase + D40_DREG_GCC);
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops dma40_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume)
+	SET_RUNTIME_PM_OPS(dma40_runtime_suspend,
+				dma40_runtime_resume,
+				NULL)
+};
+
+/* Initialization functions. */
+
+static int __init d40_phy_res_init(struct d40_base *base)
+{
+	int i;
+	int num_phy_chans_avail = 0;
+	u32 val[2];
+	int odd_even_bit = -2;
+	int gcc = D40_DREG_GCC_ENA;
+
+	val[0] = readl(base->virtbase + D40_DREG_PRSME);
+	val[1] = readl(base->virtbase + D40_DREG_PRSMO);
+
+	for (i = 0; i < base->num_phy_chans; i++) {
+		base->phy_res[i].num = i;
+		odd_even_bit += 2 * ((i % 2) == 0);
+		if (((val[i % 2] >> odd_even_bit) & 3) == 1) {
+			/* Mark security only channels as occupied */
+			base->phy_res[i].allocated_src = D40_ALLOC_PHY;
+			base->phy_res[i].allocated_dst = D40_ALLOC_PHY;
+			base->phy_res[i].reserved = true;
+			gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+						       D40_DREG_GCC_SRC);
+			gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(i),
+						       D40_DREG_GCC_DST);
+
+
+		} else {
+			base->phy_res[i].allocated_src = D40_ALLOC_FREE;
+			base->phy_res[i].allocated_dst = D40_ALLOC_FREE;
+			base->phy_res[i].reserved = false;
+			num_phy_chans_avail++;
+		}
+		spin_lock_init(&base->phy_res[i].lock);
+	}
+
+	/* Mark disabled channels as occupied */
+	for (i = 0; base->plat_data->disabled_channels[i] != -1; i++) {
+		int chan = base->plat_data->disabled_channels[i];
+
+		base->phy_res[chan].allocated_src = D40_ALLOC_PHY;
+		base->phy_res[chan].allocated_dst = D40_ALLOC_PHY;
+		base->phy_res[chan].reserved = true;
+		gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+					       D40_DREG_GCC_SRC);
+		gcc |= D40_DREG_GCC_EVTGRP_ENA(D40_PHYS_TO_GROUP(chan),
+					       D40_DREG_GCC_DST);
+		num_phy_chans_avail--;
+	}
+
+	/* Mark soft_lli channels */
+	for (i = 0; i < base->plat_data->num_of_soft_lli_chans; i++) {
+		int chan = base->plat_data->soft_lli_chans[i];
+
+		base->phy_res[chan].use_soft_lli = true;
+	}
+
+	dev_info(base->dev, "%d of %d physical DMA channels available\n",
+		 num_phy_chans_avail, base->num_phy_chans);
+
+	/* Verify settings extended vs standard */
+	val[0] = readl(base->virtbase + D40_DREG_PRTYP);
+
+	for (i = 0; i < base->num_phy_chans; i++) {
+
+		if (base->phy_res[i].allocated_src == D40_ALLOC_FREE &&
+		    (val[0] & 0x3) != 1)
+			dev_info(base->dev,
+				 "[%s] INFO: channel %d is misconfigured (%d)\n",
+				 __func__, i, val[0] & 0x3);
+
+		val[0] = val[0] >> 2;
+	}
+
+	/*
+	 * To keep things simple, Enable all clocks initially.
+	 * The clocks will get managed later post channel allocation.
+	 * The clocks for the event lines on which reserved channels exists
+	 * are not managed here.
+	 */
+	writel(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+	base->gcc_pwr_off_mask = gcc;
+
+	return num_phy_chans_avail;
+}
+
+static struct d40_base * __init d40_hw_detect_init(struct platform_device *pdev)
+{
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+	struct clk *clk;
+	void __iomem *virtbase;
+	struct resource *res;
+	struct d40_base *base;
+	int num_log_chans;
+	int num_phy_chans;
+	int num_memcpy_chans;
+	int clk_ret = -EINVAL;
+	int i;
+	u32 pid;
+	u32 cid;
+	u8 rev;
+
+	clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		d40_err(&pdev->dev, "No matching clock found\n");
+		goto check_prepare_enabled;
+	}
+
+	clk_ret = clk_prepare_enable(clk);
+	if (clk_ret) {
+		d40_err(&pdev->dev, "Failed to prepare/enable clock\n");
+		goto disable_unprepare;
+	}
+
+	/* Get IO for DMAC base address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "base");
+	if (!res)
+		goto disable_unprepare;
+
+	if (request_mem_region(res->start, resource_size(res),
+			       D40_NAME " I/O base") == NULL)
+		goto release_region;
+
+	virtbase = ioremap(res->start, resource_size(res));
+	if (!virtbase)
+		goto release_region;
+
+	/* This is just a regular AMBA PrimeCell ID actually */
+	for (pid = 0, i = 0; i < 4; i++)
+		pid |= (readl(virtbase + resource_size(res) - 0x20 + 4 * i)
+			& 255) << (i * 8);
+	for (cid = 0, i = 0; i < 4; i++)
+		cid |= (readl(virtbase + resource_size(res) - 0x10 + 4 * i)
+			& 255) << (i * 8);
+
+	if (cid != AMBA_CID) {
+		d40_err(&pdev->dev, "Unknown hardware! No PrimeCell ID\n");
+		goto unmap_io;
+	}
+	if (AMBA_MANF_BITS(pid) != AMBA_VENDOR_ST) {
+		d40_err(&pdev->dev, "Unknown designer! Got %x wanted %x\n",
+			AMBA_MANF_BITS(pid),
+			AMBA_VENDOR_ST);
+		goto unmap_io;
+	}
+	/*
+	 * HW revision:
+	 * DB8500ed has revision 0
+	 * ? has revision 1
+	 * DB8500v1 has revision 2
+	 * DB8500v2 has revision 3
+	 * AP9540v1 has revision 4
+	 * DB8540v1 has revision 4
+	 */
+	rev = AMBA_REV_BITS(pid);
+	if (rev < 2) {
+		d40_err(&pdev->dev, "hardware revision: %d is not supported", rev);
+		goto unmap_io;
+	}
+
+	/* The number of physical channels on this HW */
+	if (plat_data->num_of_phy_chans)
+		num_phy_chans = plat_data->num_of_phy_chans;
+	else
+		num_phy_chans = 4 * (readl(virtbase + D40_DREG_ICFG) & 0x7) + 4;
+
+	/* The number of channels used for memcpy */
+	if (plat_data->num_of_memcpy_chans)
+		num_memcpy_chans = plat_data->num_of_memcpy_chans;
+	else
+		num_memcpy_chans = ARRAY_SIZE(dma40_memcpy_channels);
+
+	num_log_chans = num_phy_chans * D40_MAX_LOG_CHAN_PER_PHY;
+
+	dev_info(&pdev->dev,
+		 "hardware rev: %d @ %pa with %d physical and %d logical channels\n",
+		 rev, &res->start, num_phy_chans, num_log_chans);
+
+	base = kzalloc(ALIGN(sizeof(struct d40_base), 4) +
+		       (num_phy_chans + num_log_chans + num_memcpy_chans) *
+		       sizeof(struct d40_chan), GFP_KERNEL);
+
+	if (base == NULL)
+		goto unmap_io;
+
+	base->rev = rev;
+	base->clk = clk;
+	base->num_memcpy_chans = num_memcpy_chans;
+	base->num_phy_chans = num_phy_chans;
+	base->num_log_chans = num_log_chans;
+	base->phy_start = res->start;
+	base->phy_size = resource_size(res);
+	base->virtbase = virtbase;
+	base->plat_data = plat_data;
+	base->dev = &pdev->dev;
+	base->phy_chans = ((void *)base) + ALIGN(sizeof(struct d40_base), 4);
+	base->log_chans = &base->phy_chans[num_phy_chans];
+
+	if (base->plat_data->num_of_phy_chans == 14) {
+		base->gen_dmac.backup = d40_backup_regs_v4b;
+		base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4B;
+		base->gen_dmac.interrupt_en = D40_DREG_CPCMIS;
+		base->gen_dmac.interrupt_clear = D40_DREG_CPCICR;
+		base->gen_dmac.realtime_en = D40_DREG_CRSEG1;
+		base->gen_dmac.realtime_clear = D40_DREG_CRCEG1;
+		base->gen_dmac.high_prio_en = D40_DREG_CPSEG1;
+		base->gen_dmac.high_prio_clear = D40_DREG_CPCEG1;
+		base->gen_dmac.il = il_v4b;
+		base->gen_dmac.il_size = ARRAY_SIZE(il_v4b);
+		base->gen_dmac.init_reg = dma_init_reg_v4b;
+		base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4b);
+	} else {
+		if (base->rev >= 3) {
+			base->gen_dmac.backup = d40_backup_regs_v4a;
+			base->gen_dmac.backup_size = BACKUP_REGS_SZ_V4A;
+		}
+		base->gen_dmac.interrupt_en = D40_DREG_PCMIS;
+		base->gen_dmac.interrupt_clear = D40_DREG_PCICR;
+		base->gen_dmac.realtime_en = D40_DREG_RSEG1;
+		base->gen_dmac.realtime_clear = D40_DREG_RCEG1;
+		base->gen_dmac.high_prio_en = D40_DREG_PSEG1;
+		base->gen_dmac.high_prio_clear = D40_DREG_PCEG1;
+		base->gen_dmac.il = il_v4a;
+		base->gen_dmac.il_size = ARRAY_SIZE(il_v4a);
+		base->gen_dmac.init_reg = dma_init_reg_v4a;
+		base->gen_dmac.init_reg_size = ARRAY_SIZE(dma_init_reg_v4a);
+	}
+
+	base->phy_res = kcalloc(num_phy_chans,
+				sizeof(*base->phy_res),
+				GFP_KERNEL);
+	if (!base->phy_res)
+		goto free_base;
+
+	base->lookup_phy_chans = kcalloc(num_phy_chans,
+					 sizeof(*base->lookup_phy_chans),
+					 GFP_KERNEL);
+	if (!base->lookup_phy_chans)
+		goto free_phy_res;
+
+	base->lookup_log_chans = kcalloc(num_log_chans,
+					 sizeof(*base->lookup_log_chans),
+					 GFP_KERNEL);
+	if (!base->lookup_log_chans)
+		goto free_phy_chans;
+
+	base->reg_val_backup_chan = kmalloc_array(base->num_phy_chans,
+						  sizeof(d40_backup_regs_chan),
+						  GFP_KERNEL);
+	if (!base->reg_val_backup_chan)
+		goto free_log_chans;
+
+	base->lcla_pool.alloc_map = kcalloc(num_phy_chans
+					    * D40_LCLA_LINK_PER_EVENT_GRP,
+					    sizeof(*base->lcla_pool.alloc_map),
+					    GFP_KERNEL);
+	if (!base->lcla_pool.alloc_map)
+		goto free_backup_chan;
+
+	base->regs_interrupt = kmalloc_array(base->gen_dmac.il_size,
+					     sizeof(*base->regs_interrupt),
+					     GFP_KERNEL);
+	if (!base->regs_interrupt)
+		goto free_map;
+
+	base->desc_slab = kmem_cache_create(D40_NAME, sizeof(struct d40_desc),
+					    0, SLAB_HWCACHE_ALIGN,
+					    NULL);
+	if (base->desc_slab == NULL)
+		goto free_regs;
+
+
+	return base;
+ free_regs:
+	kfree(base->regs_interrupt);
+ free_map:
+	kfree(base->lcla_pool.alloc_map);
+ free_backup_chan:
+	kfree(base->reg_val_backup_chan);
+ free_log_chans:
+	kfree(base->lookup_log_chans);
+ free_phy_chans:
+	kfree(base->lookup_phy_chans);
+ free_phy_res:
+	kfree(base->phy_res);
+ free_base:
+	kfree(base);
+ unmap_io:
+	iounmap(virtbase);
+ release_region:
+	release_mem_region(res->start, resource_size(res));
+ check_prepare_enabled:
+	if (!clk_ret)
+ disable_unprepare:
+		clk_disable_unprepare(clk);
+	if (!IS_ERR(clk))
+		clk_put(clk);
+	return NULL;
+}
+
+static void __init d40_hw_init(struct d40_base *base)
+{
+
+	int i;
+	u32 prmseo[2] = {0, 0};
+	u32 activeo[2] = {0xFFFFFFFF, 0xFFFFFFFF};
+	u32 pcmis = 0;
+	u32 pcicr = 0;
+	struct d40_reg_val *dma_init_reg = base->gen_dmac.init_reg;
+	u32 reg_size = base->gen_dmac.init_reg_size;
+
+	for (i = 0; i < reg_size; i++)
+		writel(dma_init_reg[i].val,
+		       base->virtbase + dma_init_reg[i].reg);
+
+	/* Configure all our dma channels to default settings */
+	for (i = 0; i < base->num_phy_chans; i++) {
+
+		activeo[i % 2] = activeo[i % 2] << 2;
+
+		if (base->phy_res[base->num_phy_chans - i - 1].allocated_src
+		    == D40_ALLOC_PHY) {
+			activeo[i % 2] |= 3;
+			continue;
+		}
+
+		/* Enable interrupt # */
+		pcmis = (pcmis << 1) | 1;
+
+		/* Clear interrupt # */
+		pcicr = (pcicr << 1) | 1;
+
+		/* Set channel to physical mode */
+		prmseo[i % 2] = prmseo[i % 2] << 2;
+		prmseo[i % 2] |= 1;
+
+	}
+
+	writel(prmseo[1], base->virtbase + D40_DREG_PRMSE);
+	writel(prmseo[0], base->virtbase + D40_DREG_PRMSO);
+	writel(activeo[1], base->virtbase + D40_DREG_ACTIVE);
+	writel(activeo[0], base->virtbase + D40_DREG_ACTIVO);
+
+	/* Write which interrupt to enable */
+	writel(pcmis, base->virtbase + base->gen_dmac.interrupt_en);
+
+	/* Write which interrupt to clear */
+	writel(pcicr, base->virtbase + base->gen_dmac.interrupt_clear);
+
+	/* These are __initdata and cannot be accessed after init */
+	base->gen_dmac.init_reg = NULL;
+	base->gen_dmac.init_reg_size = 0;
+}
+
+static int __init d40_lcla_allocate(struct d40_base *base)
+{
+	struct d40_lcla_pool *pool = &base->lcla_pool;
+	unsigned long *page_list;
+	int i, j;
+	int ret;
+
+	/*
+	 * This is somewhat ugly. We need 8192 bytes that are 18 bit aligned,
+	 * To full fill this hardware requirement without wasting 256 kb
+	 * we allocate pages until we get an aligned one.
+	 */
+	page_list = kmalloc_array(MAX_LCLA_ALLOC_ATTEMPTS,
+				  sizeof(*page_list),
+				  GFP_KERNEL);
+	if (!page_list)
+		return -ENOMEM;
+
+	/* Calculating how many pages that are required */
+	base->lcla_pool.pages = SZ_1K * base->num_phy_chans / PAGE_SIZE;
+
+	for (i = 0; i < MAX_LCLA_ALLOC_ATTEMPTS; i++) {
+		page_list[i] = __get_free_pages(GFP_KERNEL,
+						base->lcla_pool.pages);
+		if (!page_list[i]) {
+
+			d40_err(base->dev, "Failed to allocate %d pages.\n",
+				base->lcla_pool.pages);
+			ret = -ENOMEM;
+
+			for (j = 0; j < i; j++)
+				free_pages(page_list[j], base->lcla_pool.pages);
+			goto free_page_list;
+		}
+
+		if ((virt_to_phys((void *)page_list[i]) &
+		     (LCLA_ALIGNMENT - 1)) == 0)
+			break;
+	}
+
+	for (j = 0; j < i; j++)
+		free_pages(page_list[j], base->lcla_pool.pages);
+
+	if (i < MAX_LCLA_ALLOC_ATTEMPTS) {
+		base->lcla_pool.base = (void *)page_list[i];
+	} else {
+		/*
+		 * After many attempts and no succees with finding the correct
+		 * alignment, try with allocating a big buffer.
+		 */
+		dev_warn(base->dev,
+			 "[%s] Failed to get %d pages @ 18 bit align.\n",
+			 __func__, base->lcla_pool.pages);
+		base->lcla_pool.base_unaligned = kmalloc(SZ_1K *
+							 base->num_phy_chans +
+							 LCLA_ALIGNMENT,
+							 GFP_KERNEL);
+		if (!base->lcla_pool.base_unaligned) {
+			ret = -ENOMEM;
+			goto free_page_list;
+		}
+
+		base->lcla_pool.base = PTR_ALIGN(base->lcla_pool.base_unaligned,
+						 LCLA_ALIGNMENT);
+	}
+
+	pool->dma_addr = dma_map_single(base->dev, pool->base,
+					SZ_1K * base->num_phy_chans,
+					DMA_TO_DEVICE);
+	if (dma_mapping_error(base->dev, pool->dma_addr)) {
+		pool->dma_addr = 0;
+		ret = -ENOMEM;
+		goto free_page_list;
+	}
+
+	writel(virt_to_phys(base->lcla_pool.base),
+	       base->virtbase + D40_DREG_LCLA);
+	ret = 0;
+ free_page_list:
+	kfree(page_list);
+	return ret;
+}
+
+static int __init d40_of_probe(struct platform_device *pdev,
+			       struct device_node *np)
+{
+	struct stedma40_platform_data *pdata;
+	int num_phy = 0, num_memcpy = 0, num_disabled = 0;
+	const __be32 *list;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	/* If absent this value will be obtained from h/w. */
+	of_property_read_u32(np, "dma-channels", &num_phy);
+	if (num_phy > 0)
+		pdata->num_of_phy_chans = num_phy;
+
+	list = of_get_property(np, "memcpy-channels", &num_memcpy);
+	num_memcpy /= sizeof(*list);
+
+	if (num_memcpy > D40_MEMCPY_MAX_CHANS || num_memcpy <= 0) {
+		d40_err(&pdev->dev,
+			"Invalid number of memcpy channels specified (%d)\n",
+			num_memcpy);
+		return -EINVAL;
+	}
+	pdata->num_of_memcpy_chans = num_memcpy;
+
+	of_property_read_u32_array(np, "memcpy-channels",
+				   dma40_memcpy_channels,
+				   num_memcpy);
+
+	list = of_get_property(np, "disabled-channels", &num_disabled);
+	num_disabled /= sizeof(*list);
+
+	if (num_disabled >= STEDMA40_MAX_PHYS || num_disabled < 0) {
+		d40_err(&pdev->dev,
+			"Invalid number of disabled channels specified (%d)\n",
+			num_disabled);
+		return -EINVAL;
+	}
+
+	of_property_read_u32_array(np, "disabled-channels",
+				   pdata->disabled_channels,
+				   num_disabled);
+	pdata->disabled_channels[num_disabled] = -1;
+
+	pdev->dev.platform_data = pdata;
+
+	return 0;
+}
+
+static int __init d40_probe(struct platform_device *pdev)
+{
+	struct stedma40_platform_data *plat_data = dev_get_platdata(&pdev->dev);
+	struct device_node *np = pdev->dev.of_node;
+	int ret = -ENOENT;
+	struct d40_base *base;
+	struct resource *res;
+	int num_reserved_chans;
+	u32 val;
+
+	if (!plat_data) {
+		if (np) {
+			if (d40_of_probe(pdev, np)) {
+				ret = -ENOMEM;
+				goto report_failure;
+			}
+		} else {
+			d40_err(&pdev->dev, "No pdata or Device Tree provided\n");
+			goto report_failure;
+		}
+	}
+
+	base = d40_hw_detect_init(pdev);
+	if (!base)
+		goto report_failure;
+
+	num_reserved_chans = d40_phy_res_init(base);
+
+	platform_set_drvdata(pdev, base);
+
+	spin_lock_init(&base->interrupt_lock);
+	spin_lock_init(&base->execmd_lock);
+
+	/* Get IO for logical channel parameter address */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "lcpa");
+	if (!res) {
+		ret = -ENOENT;
+		d40_err(&pdev->dev, "No \"lcpa\" memory resource\n");
+		goto destroy_cache;
+	}
+	base->lcpa_size = resource_size(res);
+	base->phy_lcpa = res->start;
+
+	if (request_mem_region(res->start, resource_size(res),
+			       D40_NAME " I/O lcpa") == NULL) {
+		ret = -EBUSY;
+		d40_err(&pdev->dev, "Failed to request LCPA region %pR\n", res);
+		goto destroy_cache;
+	}
+
+	/* We make use of ESRAM memory for this. */
+	val = readl(base->virtbase + D40_DREG_LCPA);
+	if (res->start != val && val != 0) {
+		dev_warn(&pdev->dev,
+			 "[%s] Mismatch LCPA dma 0x%x, def %pa\n",
+			 __func__, val, &res->start);
+	} else
+		writel(res->start, base->virtbase + D40_DREG_LCPA);
+
+	base->lcpa_base = ioremap(res->start, resource_size(res));
+	if (!base->lcpa_base) {
+		ret = -ENOMEM;
+		d40_err(&pdev->dev, "Failed to ioremap LCPA region\n");
+		goto destroy_cache;
+	}
+	/* If lcla has to be located in ESRAM we don't need to allocate */
+	if (base->plat_data->use_esram_lcla) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"lcla_esram");
+		if (!res) {
+			ret = -ENOENT;
+			d40_err(&pdev->dev,
+				"No \"lcla_esram\" memory resource\n");
+			goto destroy_cache;
+		}
+		base->lcla_pool.base = ioremap(res->start,
+						resource_size(res));
+		if (!base->lcla_pool.base) {
+			ret = -ENOMEM;
+			d40_err(&pdev->dev, "Failed to ioremap LCLA region\n");
+			goto destroy_cache;
+		}
+		writel(res->start, base->virtbase + D40_DREG_LCLA);
+
+	} else {
+		ret = d40_lcla_allocate(base);
+		if (ret) {
+			d40_err(&pdev->dev, "Failed to allocate LCLA area\n");
+			goto destroy_cache;
+		}
+	}
+
+	spin_lock_init(&base->lcla_pool.lock);
+
+	base->irq = platform_get_irq(pdev, 0);
+
+	ret = request_irq(base->irq, d40_handle_interrupt, 0, D40_NAME, base);
+	if (ret) {
+		d40_err(&pdev->dev, "No IRQ defined\n");
+		goto destroy_cache;
+	}
+
+	if (base->plat_data->use_esram_lcla) {
+
+		base->lcpa_regulator = regulator_get(base->dev, "lcla_esram");
+		if (IS_ERR(base->lcpa_regulator)) {
+			d40_err(&pdev->dev, "Failed to get lcpa_regulator\n");
+			ret = PTR_ERR(base->lcpa_regulator);
+			base->lcpa_regulator = NULL;
+			goto destroy_cache;
+		}
+
+		ret = regulator_enable(base->lcpa_regulator);
+		if (ret) {
+			d40_err(&pdev->dev,
+				"Failed to enable lcpa_regulator\n");
+			regulator_put(base->lcpa_regulator);
+			base->lcpa_regulator = NULL;
+			goto destroy_cache;
+		}
+	}
+
+	writel_relaxed(D40_DREG_GCC_ENABLE_ALL, base->virtbase + D40_DREG_GCC);
+
+	pm_runtime_irq_safe(base->dev);
+	pm_runtime_set_autosuspend_delay(base->dev, DMA40_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(base->dev);
+	pm_runtime_mark_last_busy(base->dev);
+	pm_runtime_set_active(base->dev);
+	pm_runtime_enable(base->dev);
+
+	ret = d40_dmaengine_init(base, num_reserved_chans);
+	if (ret)
+		goto destroy_cache;
+
+	base->dev->dma_parms = &base->dma_parms;
+	ret = dma_set_max_seg_size(base->dev, STEDMA40_MAX_SEG_SIZE);
+	if (ret) {
+		d40_err(&pdev->dev, "Failed to set dma max seg size\n");
+		goto destroy_cache;
+	}
+
+	d40_hw_init(base);
+
+	if (np) {
+		ret = of_dma_controller_register(np, d40_xlate, NULL);
+		if (ret)
+			dev_err(&pdev->dev,
+				"could not register of_dma_controller\n");
+	}
+
+	dev_info(base->dev, "initialized\n");
+	return 0;
+ destroy_cache:
+	kmem_cache_destroy(base->desc_slab);
+	if (base->virtbase)
+		iounmap(base->virtbase);
+
+	if (base->lcla_pool.base && base->plat_data->use_esram_lcla) {
+		iounmap(base->lcla_pool.base);
+		base->lcla_pool.base = NULL;
+	}
+
+	if (base->lcla_pool.dma_addr)
+		dma_unmap_single(base->dev, base->lcla_pool.dma_addr,
+				 SZ_1K * base->num_phy_chans,
+				 DMA_TO_DEVICE);
+
+	if (!base->lcla_pool.base_unaligned && base->lcla_pool.base)
+		free_pages((unsigned long)base->lcla_pool.base,
+			   base->lcla_pool.pages);
+
+	kfree(base->lcla_pool.base_unaligned);
+
+	if (base->phy_lcpa)
+		release_mem_region(base->phy_lcpa,
+				   base->lcpa_size);
+	if (base->phy_start)
+		release_mem_region(base->phy_start,
+				   base->phy_size);
+	if (base->clk) {
+		clk_disable_unprepare(base->clk);
+		clk_put(base->clk);
+	}
+
+	if (base->lcpa_regulator) {
+		regulator_disable(base->lcpa_regulator);
+		regulator_put(base->lcpa_regulator);
+	}
+
+	kfree(base->lcla_pool.alloc_map);
+	kfree(base->lookup_log_chans);
+	kfree(base->lookup_phy_chans);
+	kfree(base->phy_res);
+	kfree(base);
+ report_failure:
+	d40_err(&pdev->dev, "probe failed\n");
+	return ret;
+}
+
+static const struct of_device_id d40_match[] = {
+        { .compatible = "stericsson,dma40", },
+        {}
+};
+
+static struct platform_driver d40_driver = {
+	.driver = {
+		.name  = D40_NAME,
+		.pm = &dma40_pm_ops,
+		.of_match_table = d40_match,
+	},
+};
+
+static int __init stedma40_init(void)
+{
+	return platform_driver_probe(&d40_driver, d40_probe);
+}
+subsys_initcall(stedma40_init);
diff --git a/drivers/dma/ste_dma40_ll.c b/drivers/dma/ste_dma40_ll.c
new file mode 100644
index 0000000..13b42dd
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.c
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Forlin <per.forlin@stericsson.com> for ST-Ericsson
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/platform_data/dma-ste-dma40.h>
+
+#include "ste_dma40_ll.h"
+
+static u8 d40_width_to_bits(enum dma_slave_buswidth width)
+{
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		return STEDMA40_ESIZE_8_BIT;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		return STEDMA40_ESIZE_16_BIT;
+	else if (width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return STEDMA40_ESIZE_64_BIT;
+	else
+		return STEDMA40_ESIZE_32_BIT;
+}
+
+/* Sets up proper LCSP1 and LCSP3 register for a logical channel */
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *lcsp1, u32 *lcsp3)
+{
+	u32 l3 = 0; /* dst */
+	u32 l1 = 0; /* src */
+
+	/* src is mem? -> increase address pos */
+	if (cfg->dir ==  DMA_MEM_TO_DEV ||
+	    cfg->dir ==  DMA_MEM_TO_MEM)
+		l1 |= BIT(D40_MEM_LCSP1_SCFG_INCR_POS);
+
+	/* dst is mem? -> increase address pos */
+	if (cfg->dir ==  DMA_DEV_TO_MEM ||
+	    cfg->dir ==  DMA_MEM_TO_MEM)
+		l3 |= BIT(D40_MEM_LCSP3_DCFG_INCR_POS);
+
+	/* src is hw? -> master port 1 */
+	if (cfg->dir ==  DMA_DEV_TO_MEM ||
+	    cfg->dir ==  DMA_DEV_TO_DEV)
+		l1 |= BIT(D40_MEM_LCSP1_SCFG_MST_POS);
+
+	/* dst is hw? -> master port 1 */
+	if (cfg->dir ==  DMA_MEM_TO_DEV ||
+	    cfg->dir ==  DMA_DEV_TO_DEV)
+		l3 |= BIT(D40_MEM_LCSP3_DCFG_MST_POS);
+
+	l3 |= BIT(D40_MEM_LCSP3_DCFG_EIM_POS);
+	l3 |= cfg->dst_info.psize << D40_MEM_LCSP3_DCFG_PSIZE_POS;
+	l3 |= d40_width_to_bits(cfg->dst_info.data_width)
+		<< D40_MEM_LCSP3_DCFG_ESIZE_POS;
+
+	l1 |= BIT(D40_MEM_LCSP1_SCFG_EIM_POS);
+	l1 |= cfg->src_info.psize << D40_MEM_LCSP1_SCFG_PSIZE_POS;
+	l1 |= d40_width_to_bits(cfg->src_info.data_width)
+		<< D40_MEM_LCSP1_SCFG_ESIZE_POS;
+
+	*lcsp1 = l1;
+	*lcsp3 = l3;
+
+}
+
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg, u32 *src_cfg, u32 *dst_cfg)
+{
+	u32 src = 0;
+	u32 dst = 0;
+
+	if ((cfg->dir == DMA_DEV_TO_MEM) ||
+	    (cfg->dir == DMA_DEV_TO_DEV)) {
+		/* Set master port to 1 */
+		src |= BIT(D40_SREG_CFG_MST_POS);
+		src |= D40_TYPE_TO_EVENT(cfg->dev_type);
+
+		if (cfg->src_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+			src |= BIT(D40_SREG_CFG_PHY_TM_POS);
+		else
+			src |= 3 << D40_SREG_CFG_PHY_TM_POS;
+	}
+	if ((cfg->dir == DMA_MEM_TO_DEV) ||
+	    (cfg->dir == DMA_DEV_TO_DEV)) {
+		/* Set master port to 1 */
+		dst |= BIT(D40_SREG_CFG_MST_POS);
+		dst |= D40_TYPE_TO_EVENT(cfg->dev_type);
+
+		if (cfg->dst_info.flow_ctrl == STEDMA40_NO_FLOW_CTRL)
+			dst |= BIT(D40_SREG_CFG_PHY_TM_POS);
+		else
+			dst |= 3 << D40_SREG_CFG_PHY_TM_POS;
+	}
+	/* Interrupt on end of transfer for destination */
+	dst |= BIT(D40_SREG_CFG_TIM_POS);
+
+	/* Generate interrupt on error */
+	src |= BIT(D40_SREG_CFG_EIM_POS);
+	dst |= BIT(D40_SREG_CFG_EIM_POS);
+
+	/* PSIZE */
+	if (cfg->src_info.psize != STEDMA40_PSIZE_PHY_1) {
+		src |= BIT(D40_SREG_CFG_PHY_PEN_POS);
+		src |= cfg->src_info.psize << D40_SREG_CFG_PSIZE_POS;
+	}
+	if (cfg->dst_info.psize != STEDMA40_PSIZE_PHY_1) {
+		dst |= BIT(D40_SREG_CFG_PHY_PEN_POS);
+		dst |= cfg->dst_info.psize << D40_SREG_CFG_PSIZE_POS;
+	}
+
+	/* Element size */
+	src |= d40_width_to_bits(cfg->src_info.data_width)
+		<< D40_SREG_CFG_ESIZE_POS;
+	dst |= d40_width_to_bits(cfg->dst_info.data_width)
+		<< D40_SREG_CFG_ESIZE_POS;
+
+	/* Set the priority bit to high for the physical channel */
+	if (cfg->high_priority) {
+		src |= BIT(D40_SREG_CFG_PRI_POS);
+		dst |= BIT(D40_SREG_CFG_PRI_POS);
+	}
+
+	if (cfg->src_info.big_endian)
+		src |= BIT(D40_SREG_CFG_LBE_POS);
+	if (cfg->dst_info.big_endian)
+		dst |= BIT(D40_SREG_CFG_LBE_POS);
+
+	*src_cfg = src;
+	*dst_cfg = dst;
+}
+
+static int d40_phy_fill_lli(struct d40_phy_lli *lli,
+			    dma_addr_t data,
+			    u32 data_size,
+			    dma_addr_t next_lli,
+			    u32 reg_cfg,
+			    struct stedma40_half_channel_info *info,
+			    unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	unsigned int data_width = info->data_width;
+	int psize = info->psize;
+	int num_elems;
+
+	if (psize == STEDMA40_PSIZE_PHY_1)
+		num_elems = 1;
+	else
+		num_elems = 2 << psize;
+
+	/* Must be aligned */
+	if (!IS_ALIGNED(data, data_width))
+		return -EINVAL;
+
+	/* Transfer size can't be smaller than (num_elms * elem_size) */
+	if (data_size < num_elems * data_width)
+		return -EINVAL;
+
+	/* The number of elements. IE now many chunks */
+	lli->reg_elt = (data_size / data_width) << D40_SREG_ELEM_PHY_ECNT_POS;
+
+	/*
+	 * Distance to next element sized entry.
+	 * Usually the size of the element unless you want gaps.
+	 */
+	if (addr_inc)
+		lli->reg_elt |= data_width << D40_SREG_ELEM_PHY_EIDX_POS;
+
+	/* Where the data is */
+	lli->reg_ptr = data;
+	lli->reg_cfg = reg_cfg;
+
+	/* If this scatter list entry is the last one, no next link */
+	if (next_lli == 0)
+		lli->reg_lnk = BIT(D40_SREG_LNK_PHY_TCP_POS);
+	else
+		lli->reg_lnk = next_lli;
+
+	/* Set/clear interrupt generation on this link item.*/
+	if (term_int)
+		lli->reg_cfg |= BIT(D40_SREG_CFG_TIM_POS);
+	else
+		lli->reg_cfg &= ~BIT(D40_SREG_CFG_TIM_POS);
+
+	/*
+	 * Post link - D40_SREG_LNK_PHY_PRE_POS = 0
+	 * Relink happens after transfer completion.
+	 */
+
+	return 0;
+}
+
+static int d40_seg_size(int size, int data_width1, int data_width2)
+{
+	u32 max_w = max(data_width1, data_width2);
+	u32 min_w = min(data_width1, data_width2);
+	u32 seg_max = ALIGN(STEDMA40_MAX_SEG_SIZE * min_w, max_w);
+
+	if (seg_max > STEDMA40_MAX_SEG_SIZE)
+		seg_max -= max_w;
+
+	if (size <= seg_max)
+		return size;
+
+	if (size <= 2 * seg_max)
+		return ALIGN(size / 2, max_w);
+
+	return seg_max;
+}
+
+static struct d40_phy_lli *
+d40_phy_buf_to_lli(struct d40_phy_lli *lli, dma_addr_t addr, u32 size,
+		   dma_addr_t lli_phys, dma_addr_t first_phys, u32 reg_cfg,
+		   struct stedma40_half_channel_info *info,
+		   struct stedma40_half_channel_info *otherinfo,
+		   unsigned long flags)
+{
+	bool lastlink = flags & LLI_LAST_LINK;
+	bool addr_inc = flags & LLI_ADDR_INC;
+	bool term_int = flags & LLI_TERM_INT;
+	bool cyclic = flags & LLI_CYCLIC;
+	int err;
+	dma_addr_t next = lli_phys;
+	int size_rest = size;
+	int size_seg = 0;
+
+	/*
+	 * This piece may be split up based on d40_seg_size(); we only want the
+	 * term int on the last part.
+	 */
+	if (term_int)
+		flags &= ~LLI_TERM_INT;
+
+	do {
+		size_seg = d40_seg_size(size_rest, info->data_width,
+					otherinfo->data_width);
+		size_rest -= size_seg;
+
+		if (size_rest == 0 && term_int)
+			flags |= LLI_TERM_INT;
+
+		if (size_rest == 0 && lastlink)
+			next = cyclic ? first_phys : 0;
+		else
+			next = ALIGN(next + sizeof(struct d40_phy_lli),
+				     D40_LLI_ALIGN);
+
+		err = d40_phy_fill_lli(lli, addr, size_seg, next,
+				       reg_cfg, info, flags);
+
+		if (err)
+			goto err;
+
+		lli++;
+		if (addr_inc)
+			addr += size_seg;
+	} while (size_rest);
+
+	return lli;
+
+err:
+	return NULL;
+}
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t target,
+		      struct d40_phy_lli *lli_sg,
+		      dma_addr_t lli_phys,
+		      u32 reg_cfg,
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags)
+{
+	int total_size = 0;
+	int i;
+	struct scatterlist *current_sg = sg;
+	struct d40_phy_lli *lli = lli_sg;
+	dma_addr_t l_phys = lli_phys;
+
+	if (!target)
+		flags |= LLI_ADDR_INC;
+
+	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t dst = target ?: sg_addr;
+
+		total_size += sg_dma_len(current_sg);
+
+		if (i == sg_len - 1)
+			flags |= LLI_TERM_INT | LLI_LAST_LINK;
+
+		l_phys = ALIGN(lli_phys + (lli - lli_sg) *
+			       sizeof(struct d40_phy_lli), D40_LLI_ALIGN);
+
+		lli = d40_phy_buf_to_lli(lli, dst, len, l_phys, lli_phys,
+					 reg_cfg, info, otherinfo, flags);
+
+		if (lli == NULL)
+			return -EINVAL;
+	}
+
+	return total_size;
+}
+
+
+/* DMA logical lli operations */
+
+static void d40_log_lli_link(struct d40_log_lli *lli_dst,
+			     struct d40_log_lli *lli_src,
+			     int next, unsigned int flags)
+{
+	bool interrupt = flags & LLI_TERM_INT;
+	u32 slos = 0;
+	u32 dlos = 0;
+
+	if (next != -EINVAL) {
+		slos = next * 2;
+		dlos = next * 2 + 1;
+	}
+
+	if (interrupt) {
+		lli_dst->lcsp13 |= D40_MEM_LCSP1_SCFG_TIM_MASK;
+		lli_dst->lcsp13 |= D40_MEM_LCSP3_DTCP_MASK;
+	}
+
+	lli_src->lcsp13 = (lli_src->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(slos << D40_MEM_LCSP1_SLOS_POS);
+
+	lli_dst->lcsp13 = (lli_dst->lcsp13 & ~D40_MEM_LCSP1_SLOS_MASK) |
+		(dlos << D40_MEM_LCSP1_SLOS_POS);
+}
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next, unsigned int flags)
+{
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+	writel_relaxed(lli_src->lcsp02, &lcpa[0].lcsp0);
+	writel_relaxed(lli_src->lcsp13, &lcpa[0].lcsp1);
+	writel_relaxed(lli_dst->lcsp02, &lcpa[0].lcsp2);
+	writel_relaxed(lli_dst->lcsp13, &lcpa[0].lcsp3);
+}
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			   struct d40_log_lli *lli_dst,
+			   struct d40_log_lli *lli_src,
+			   int next, unsigned int flags)
+{
+	d40_log_lli_link(lli_dst, lli_src, next, flags);
+
+	writel_relaxed(lli_src->lcsp02, &lcla[0].lcsp02);
+	writel_relaxed(lli_src->lcsp13, &lcla[0].lcsp13);
+	writel_relaxed(lli_dst->lcsp02, &lcla[1].lcsp02);
+	writel_relaxed(lli_dst->lcsp13, &lcla[1].lcsp13);
+}
+
+static void d40_log_fill_lli(struct d40_log_lli *lli,
+			     dma_addr_t data, u32 data_size,
+			     u32 reg_cfg,
+			     u32 data_width,
+			     unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+
+	lli->lcsp13 = reg_cfg;
+
+	/* The number of elements to transfer */
+	lli->lcsp02 = ((data_size / data_width) <<
+		       D40_MEM_LCSP0_ECNT_POS) & D40_MEM_LCSP0_ECNT_MASK;
+
+	BUG_ON((data_size / data_width) > STEDMA40_MAX_SEG_SIZE);
+
+	/* 16 LSBs address of the current element */
+	lli->lcsp02 |= data & D40_MEM_LCSP0_SPTR_MASK;
+	/* 16 MSBs address of the current element */
+	lli->lcsp13 |= data & D40_MEM_LCSP1_SPTR_MASK;
+
+	if (addr_inc)
+		lli->lcsp13 |= D40_MEM_LCSP1_SCFG_INCR_MASK;
+
+}
+
+static struct d40_log_lli *d40_log_buf_to_lli(struct d40_log_lli *lli_sg,
+				       dma_addr_t addr,
+				       int size,
+				       u32 lcsp13, /* src or dst*/
+				       u32 data_width1,
+				       u32 data_width2,
+				       unsigned int flags)
+{
+	bool addr_inc = flags & LLI_ADDR_INC;
+	struct d40_log_lli *lli = lli_sg;
+	int size_rest = size;
+	int size_seg = 0;
+
+	do {
+		size_seg = d40_seg_size(size_rest, data_width1, data_width2);
+		size_rest -= size_seg;
+
+		d40_log_fill_lli(lli,
+				 addr,
+				 size_seg,
+				 lcsp13, data_width1,
+				 flags);
+		if (addr_inc)
+			addr += size_seg;
+		lli++;
+	} while (size_rest);
+
+	return lli;
+}
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t dev_addr,
+		      struct d40_log_lli *lli_sg,
+		      u32 lcsp13, /* src or dst*/
+		      u32 data_width1, u32 data_width2)
+{
+	int total_size = 0;
+	struct scatterlist *current_sg = sg;
+	int i;
+	struct d40_log_lli *lli = lli_sg;
+	unsigned long flags = 0;
+
+	if (!dev_addr)
+		flags |= LLI_ADDR_INC;
+
+	for_each_sg(sg, current_sg, sg_len, i) {
+		dma_addr_t sg_addr = sg_dma_address(current_sg);
+		unsigned int len = sg_dma_len(current_sg);
+		dma_addr_t addr = dev_addr ?: sg_addr;
+
+		total_size += sg_dma_len(current_sg);
+
+		lli = d40_log_buf_to_lli(lli, addr, len,
+					 lcsp13,
+					 data_width1,
+					 data_width2,
+					 flags);
+	}
+
+	return total_size;
+}
diff --git a/drivers/dma/ste_dma40_ll.h b/drivers/dma/ste_dma40_ll.h
new file mode 100644
index 0000000..1b47312
--- /dev/null
+++ b/drivers/dma/ste_dma40_ll.h
@@ -0,0 +1,470 @@
+/*
+ * Copyright (C) ST-Ericsson SA 2007-2010
+ * Author: Per Friden <per.friden@stericsson.com> for ST-Ericsson SA
+ * Author: Jonas Aaberg <jonas.aberg@stericsson.com> for ST-Ericsson SA
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#ifndef STE_DMA40_LL_H
+#define STE_DMA40_LL_H
+
+#define D40_DREG_PCBASE		0x400
+#define D40_DREG_PCDELTA	(8 * 4)
+#define D40_LLI_ALIGN		16 /* LLI alignment must be 16 bytes. */
+
+#define D40_LCPA_CHAN_SIZE 32
+#define D40_LCPA_CHAN_DST_DELTA 16
+
+#define D40_TYPE_TO_GROUP(type) (type / 16)
+#define D40_TYPE_TO_EVENT(type) (type % 16)
+#define D40_GROUP_SIZE 8
+#define D40_PHYS_TO_GROUP(phys) ((phys & (D40_GROUP_SIZE - 1)) / 2)
+
+/* Most bits of the CFG register are the same in log as in phy mode */
+#define D40_SREG_CFG_MST_POS		15
+#define D40_SREG_CFG_TIM_POS		14
+#define D40_SREG_CFG_EIM_POS		13
+#define D40_SREG_CFG_LOG_INCR_POS	12
+#define D40_SREG_CFG_PHY_PEN_POS	12
+#define D40_SREG_CFG_PSIZE_POS		10
+#define D40_SREG_CFG_ESIZE_POS		 8
+#define D40_SREG_CFG_PRI_POS		 7
+#define D40_SREG_CFG_LBE_POS		 6
+#define D40_SREG_CFG_LOG_GIM_POS	 5
+#define D40_SREG_CFG_LOG_MFU_POS	 4
+#define D40_SREG_CFG_PHY_TM_POS		 4
+#define D40_SREG_CFG_PHY_EVTL_POS	 0
+
+
+/* Standard channel parameters - basic mode (element register) */
+#define D40_SREG_ELEM_PHY_ECNT_POS	16
+#define D40_SREG_ELEM_PHY_EIDX_POS	 0
+
+#define D40_SREG_ELEM_PHY_ECNT_MASK	(0xFFFF << D40_SREG_ELEM_PHY_ECNT_POS)
+
+/* Standard channel parameters - basic mode (Link register) */
+#define D40_SREG_LNK_PHY_TCP_POS	0
+#define D40_SREG_LNK_PHY_LMP_POS	1
+#define D40_SREG_LNK_PHY_PRE_POS	2
+/*
+ * Source  destination link address. Contains the
+ * 29-bit byte word aligned address of the reload area.
+ */
+#define D40_SREG_LNK_PHYS_LNK_MASK	0xFFFFFFF8UL
+
+/* Standard basic channel logical mode */
+
+/* Element register */
+#define D40_SREG_ELEM_LOG_ECNT_POS	16
+#define D40_SREG_ELEM_LOG_LIDX_POS	 8
+#define D40_SREG_ELEM_LOG_LOS_POS	 1
+#define D40_SREG_ELEM_LOG_TCP_POS	 0
+
+#define D40_SREG_ELEM_LOG_LIDX_MASK	(0xFF << D40_SREG_ELEM_LOG_LIDX_POS)
+
+/* Link register */
+#define D40_EVENTLINE_POS(i)		(2 * i)
+#define D40_EVENTLINE_MASK(i)		(0x3 << D40_EVENTLINE_POS(i))
+
+/* Standard basic channel logical params in memory */
+
+/* LCSP0 */
+#define D40_MEM_LCSP0_ECNT_POS		16
+#define D40_MEM_LCSP0_SPTR_POS		 0
+
+#define D40_MEM_LCSP0_ECNT_MASK		(0xFFFF << D40_MEM_LCSP0_ECNT_POS)
+#define D40_MEM_LCSP0_SPTR_MASK		(0xFFFF << D40_MEM_LCSP0_SPTR_POS)
+
+/* LCSP1 */
+#define D40_MEM_LCSP1_SPTR_POS		16
+#define D40_MEM_LCSP1_SCFG_MST_POS	15
+#define D40_MEM_LCSP1_SCFG_TIM_POS	14
+#define D40_MEM_LCSP1_SCFG_EIM_POS	13
+#define D40_MEM_LCSP1_SCFG_INCR_POS	12
+#define D40_MEM_LCSP1_SCFG_PSIZE_POS	10
+#define D40_MEM_LCSP1_SCFG_ESIZE_POS	 8
+#define D40_MEM_LCSP1_SLOS_POS		 1
+#define D40_MEM_LCSP1_STCP_POS		 0
+
+#define D40_MEM_LCSP1_SPTR_MASK		(0xFFFF << D40_MEM_LCSP1_SPTR_POS)
+#define D40_MEM_LCSP1_SCFG_TIM_MASK	(0x1 << D40_MEM_LCSP1_SCFG_TIM_POS)
+#define D40_MEM_LCSP1_SCFG_INCR_MASK	(0x1 << D40_MEM_LCSP1_SCFG_INCR_POS)
+#define D40_MEM_LCSP1_SCFG_PSIZE_MASK	(0x3 << D40_MEM_LCSP1_SCFG_PSIZE_POS)
+#define D40_MEM_LCSP1_SLOS_MASK		(0x7F << D40_MEM_LCSP1_SLOS_POS)
+#define D40_MEM_LCSP1_STCP_MASK		(0x1 << D40_MEM_LCSP1_STCP_POS)
+
+/* LCSP2 */
+#define D40_MEM_LCSP2_ECNT_POS		16
+
+#define D40_MEM_LCSP2_ECNT_MASK		(0xFFFF << D40_MEM_LCSP2_ECNT_POS)
+
+/* LCSP3 */
+#define D40_MEM_LCSP3_DCFG_MST_POS	15
+#define D40_MEM_LCSP3_DCFG_TIM_POS	14
+#define D40_MEM_LCSP3_DCFG_EIM_POS	13
+#define D40_MEM_LCSP3_DCFG_INCR_POS	12
+#define D40_MEM_LCSP3_DCFG_PSIZE_POS	10
+#define D40_MEM_LCSP3_DCFG_ESIZE_POS	 8
+#define D40_MEM_LCSP3_DLOS_POS		 1
+#define D40_MEM_LCSP3_DTCP_POS		 0
+
+#define D40_MEM_LCSP3_DLOS_MASK		(0x7F << D40_MEM_LCSP3_DLOS_POS)
+#define D40_MEM_LCSP3_DTCP_MASK		(0x1 << D40_MEM_LCSP3_DTCP_POS)
+
+
+/* Standard channel parameter register offsets */
+#define D40_CHAN_REG_SSCFG	0x00
+#define D40_CHAN_REG_SSELT	0x04
+#define D40_CHAN_REG_SSPTR	0x08
+#define D40_CHAN_REG_SSLNK	0x0C
+#define D40_CHAN_REG_SDCFG	0x10
+#define D40_CHAN_REG_SDELT	0x14
+#define D40_CHAN_REG_SDPTR	0x18
+#define D40_CHAN_REG_SDLNK	0x1C
+
+/* DMA Register Offsets */
+#define D40_DREG_GCC		0x000
+#define D40_DREG_GCC_ENA	0x1
+/* This assumes that there are only 4 event groups */
+#define D40_DREG_GCC_ENABLE_ALL	0x3ff01
+#define D40_DREG_GCC_EVTGRP_POS 8
+#define D40_DREG_GCC_SRC 0
+#define D40_DREG_GCC_DST 1
+#define D40_DREG_GCC_EVTGRP_ENA(x, y) \
+	(1 << (D40_DREG_GCC_EVTGRP_POS + 2 * x + y))
+
+#define D40_DREG_PRTYP		0x004
+#define D40_DREG_PRSME		0x008
+#define D40_DREG_PRSMO		0x00C
+#define D40_DREG_PRMSE		0x010
+#define D40_DREG_PRMSO		0x014
+#define D40_DREG_PRMOE		0x018
+#define D40_DREG_PRMOO		0x01C
+#define D40_DREG_PRMO_PCHAN_BASIC		0x1
+#define D40_DREG_PRMO_PCHAN_MODULO		0x2
+#define D40_DREG_PRMO_PCHAN_DOUBLE_DST		0x3
+#define D40_DREG_PRMO_LCHAN_SRC_PHY_DST_LOG	0x1
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_PHY	0x2
+#define D40_DREG_PRMO_LCHAN_SRC_LOG_DST_LOG	0x3
+
+#define D40_DREG_LCPA		0x020
+#define D40_DREG_LCLA		0x024
+
+#define D40_DREG_SSEG1		0x030
+#define D40_DREG_SSEG2		0x034
+#define D40_DREG_SSEG3		0x038
+#define D40_DREG_SSEG4		0x03C
+
+#define D40_DREG_SCEG1		0x040
+#define D40_DREG_SCEG2		0x044
+#define D40_DREG_SCEG3		0x048
+#define D40_DREG_SCEG4		0x04C
+
+#define D40_DREG_ACTIVE		0x050
+#define D40_DREG_ACTIVO		0x054
+#define D40_DREG_CIDMOD		0x058
+#define D40_DREG_TCIDV		0x05C
+#define D40_DREG_PCMIS		0x060
+#define D40_DREG_PCICR		0x064
+#define D40_DREG_PCTIS		0x068
+#define D40_DREG_PCEIS		0x06C
+
+#define D40_DREG_SPCMIS		0x070
+#define D40_DREG_SPCICR		0x074
+#define D40_DREG_SPCTIS		0x078
+#define D40_DREG_SPCEIS		0x07C
+
+#define D40_DREG_LCMIS0		0x080
+#define D40_DREG_LCMIS1		0x084
+#define D40_DREG_LCMIS2		0x088
+#define D40_DREG_LCMIS3		0x08C
+#define D40_DREG_LCICR0		0x090
+#define D40_DREG_LCICR1		0x094
+#define D40_DREG_LCICR2		0x098
+#define D40_DREG_LCICR3		0x09C
+#define D40_DREG_LCTIS0		0x0A0
+#define D40_DREG_LCTIS1		0x0A4
+#define D40_DREG_LCTIS2		0x0A8
+#define D40_DREG_LCTIS3		0x0AC
+#define D40_DREG_LCEIS0		0x0B0
+#define D40_DREG_LCEIS1		0x0B4
+#define D40_DREG_LCEIS2		0x0B8
+#define D40_DREG_LCEIS3		0x0BC
+
+#define D40_DREG_SLCMIS1	0x0C0
+#define D40_DREG_SLCMIS2	0x0C4
+#define D40_DREG_SLCMIS3	0x0C8
+#define D40_DREG_SLCMIS4	0x0CC
+
+#define D40_DREG_SLCICR1	0x0D0
+#define D40_DREG_SLCICR2	0x0D4
+#define D40_DREG_SLCICR3	0x0D8
+#define D40_DREG_SLCICR4	0x0DC
+
+#define D40_DREG_SLCTIS1	0x0E0
+#define D40_DREG_SLCTIS2	0x0E4
+#define D40_DREG_SLCTIS3	0x0E8
+#define D40_DREG_SLCTIS4	0x0EC
+
+#define D40_DREG_SLCEIS1	0x0F0
+#define D40_DREG_SLCEIS2	0x0F4
+#define D40_DREG_SLCEIS3	0x0F8
+#define D40_DREG_SLCEIS4	0x0FC
+
+#define D40_DREG_FSESS1		0x100
+#define D40_DREG_FSESS2		0x104
+
+#define D40_DREG_FSEBS1		0x108
+#define D40_DREG_FSEBS2		0x10C
+
+#define D40_DREG_PSEG1		0x110
+#define D40_DREG_PSEG2		0x114
+#define D40_DREG_PSEG3		0x118
+#define D40_DREG_PSEG4		0x11C
+#define D40_DREG_PCEG1		0x120
+#define D40_DREG_PCEG2		0x124
+#define D40_DREG_PCEG3		0x128
+#define D40_DREG_PCEG4		0x12C
+#define D40_DREG_RSEG1		0x130
+#define D40_DREG_RSEG2		0x134
+#define D40_DREG_RSEG3		0x138
+#define D40_DREG_RSEG4		0x13C
+#define D40_DREG_RCEG1		0x140
+#define D40_DREG_RCEG2		0x144
+#define D40_DREG_RCEG3		0x148
+#define D40_DREG_RCEG4		0x14C
+
+#define D40_DREG_PREFOT		0x15C
+#define D40_DREG_EXTCFG		0x160
+
+#define D40_DREG_CPSEG1		0x200
+#define D40_DREG_CPSEG2		0x204
+#define D40_DREG_CPSEG3		0x208
+#define D40_DREG_CPSEG4		0x20C
+#define D40_DREG_CPSEG5		0x210
+
+#define D40_DREG_CPCEG1		0x220
+#define D40_DREG_CPCEG2		0x224
+#define D40_DREG_CPCEG3		0x228
+#define D40_DREG_CPCEG4		0x22C
+#define D40_DREG_CPCEG5		0x230
+
+#define D40_DREG_CRSEG1		0x240
+#define D40_DREG_CRSEG2		0x244
+#define D40_DREG_CRSEG3		0x248
+#define D40_DREG_CRSEG4		0x24C
+#define D40_DREG_CRSEG5		0x250
+
+#define D40_DREG_CRCEG1		0x260
+#define D40_DREG_CRCEG2		0x264
+#define D40_DREG_CRCEG3		0x268
+#define D40_DREG_CRCEG4		0x26C
+#define D40_DREG_CRCEG5		0x270
+
+#define D40_DREG_CFSESS1	0x280
+#define D40_DREG_CFSESS2	0x284
+#define D40_DREG_CFSESS3	0x288
+
+#define D40_DREG_CFSEBS1	0x290
+#define D40_DREG_CFSEBS2	0x294
+#define D40_DREG_CFSEBS3	0x298
+
+#define D40_DREG_CLCMIS1	0x300
+#define D40_DREG_CLCMIS2	0x304
+#define D40_DREG_CLCMIS3	0x308
+#define D40_DREG_CLCMIS4	0x30C
+#define D40_DREG_CLCMIS5	0x310
+
+#define D40_DREG_CLCICR1	0x320
+#define D40_DREG_CLCICR2	0x324
+#define D40_DREG_CLCICR3	0x328
+#define D40_DREG_CLCICR4	0x32C
+#define D40_DREG_CLCICR5	0x330
+
+#define D40_DREG_CLCTIS1	0x340
+#define D40_DREG_CLCTIS2	0x344
+#define D40_DREG_CLCTIS3	0x348
+#define D40_DREG_CLCTIS4	0x34C
+#define D40_DREG_CLCTIS5	0x350
+
+#define D40_DREG_CLCEIS1	0x360
+#define D40_DREG_CLCEIS2	0x364
+#define D40_DREG_CLCEIS3	0x368
+#define D40_DREG_CLCEIS4	0x36C
+#define D40_DREG_CLCEIS5	0x370
+
+#define D40_DREG_CPCMIS		0x380
+#define D40_DREG_CPCICR		0x384
+#define D40_DREG_CPCTIS		0x388
+#define D40_DREG_CPCEIS		0x38C
+
+#define D40_DREG_SCCIDA1	0xE80
+#define D40_DREG_SCCIDA2	0xE90
+#define D40_DREG_SCCIDA3	0xEA0
+#define D40_DREG_SCCIDA4	0xEB0
+#define D40_DREG_SCCIDA5	0xEC0
+
+#define D40_DREG_SCCIDB1	0xE84
+#define D40_DREG_SCCIDB2	0xE94
+#define D40_DREG_SCCIDB3	0xEA4
+#define D40_DREG_SCCIDB4	0xEB4
+#define D40_DREG_SCCIDB5	0xEC4
+
+#define D40_DREG_PRSCCIDA	0xF80
+#define D40_DREG_PRSCCIDB	0xF84
+
+#define D40_DREG_STFU		0xFC8
+#define D40_DREG_ICFG		0xFCC
+#define D40_DREG_PERIPHID0	0xFE0
+#define D40_DREG_PERIPHID1	0xFE4
+#define D40_DREG_PERIPHID2	0xFE8
+#define D40_DREG_PERIPHID3	0xFEC
+#define D40_DREG_CELLID0	0xFF0
+#define D40_DREG_CELLID1	0xFF4
+#define D40_DREG_CELLID2	0xFF8
+#define D40_DREG_CELLID3	0xFFC
+
+/* LLI related structures */
+
+/**
+ * struct d40_phy_lli - The basic configuration register for each physical
+ * channel.
+ *
+ * @reg_cfg: The configuration register.
+ * @reg_elt: The element register.
+ * @reg_ptr: The pointer register.
+ * @reg_lnk: The link register.
+ *
+ * These registers are set up for both physical and logical transfers
+ * Note that the bit in each register means differently in logical and
+ * physical(standard) mode.
+ *
+ * This struct must be 16 bytes aligned, and only contain physical registers
+ * since it will be directly accessed by the DMA.
+ */
+struct d40_phy_lli {
+	u32 reg_cfg;
+	u32 reg_elt;
+	u32 reg_ptr;
+	u32 reg_lnk;
+};
+
+/**
+ * struct d40_phy_lli_bidir - struct for a transfer.
+ *
+ * @src: Register settings for src channel.
+ * @dst: Register settings for dst channel.
+ *
+ * All DMA transfers have a source and a destination.
+ */
+
+struct d40_phy_lli_bidir {
+	struct d40_phy_lli	*src;
+	struct d40_phy_lli	*dst;
+};
+
+
+/**
+ * struct d40_log_lli - logical lli configuration
+ *
+ * @lcsp02: Either maps to register lcsp0 if src or lcsp2 if dst.
+ * @lcsp13: Either maps to register lcsp1 if src or lcsp3 if dst.
+ *
+ * This struct must be 8 bytes aligned since it will be accessed directy by
+ * the DMA. Never add any none hw mapped registers to this struct.
+ */
+
+struct d40_log_lli {
+	u32 lcsp02;
+	u32 lcsp13;
+};
+
+/**
+ * struct d40_log_lli_bidir - For both src and dst
+ *
+ * @src: pointer to src lli configuration.
+ * @dst: pointer to dst lli configuration.
+ *
+ * You always have a src and a dst when doing DMA transfers.
+ */
+
+struct d40_log_lli_bidir {
+	struct d40_log_lli *src;
+	struct d40_log_lli *dst;
+};
+
+/**
+ * struct d40_log_lli_full - LCPA layout
+ *
+ * @lcsp0: Logical Channel Standard Param 0 - Src.
+ * @lcsp1: Logical Channel Standard Param 1 - Src.
+ * @lcsp2: Logical Channel Standard Param 2 - Dst.
+ * @lcsp3: Logical Channel Standard Param 3 - Dst.
+ *
+ * This struct maps to LCPA physical memory layout. Must map to
+ * the hw.
+ */
+struct d40_log_lli_full {
+	u32 lcsp0;
+	u32 lcsp1;
+	u32 lcsp2;
+	u32 lcsp3;
+};
+
+/**
+ * struct d40_def_lcsp - Default LCSP1 and LCSP3 settings
+ *
+ * @lcsp3: The default configuration for dst.
+ * @lcsp1: The default configuration for src.
+ */
+struct d40_def_lcsp {
+	u32 lcsp3;
+	u32 lcsp1;
+};
+
+/* Physical channels */
+
+enum d40_lli_flags {
+	LLI_ADDR_INC	= 1 << 0,
+	LLI_TERM_INT	= 1 << 1,
+	LLI_CYCLIC	= 1 << 2,
+	LLI_LAST_LINK	= 1 << 3,
+};
+
+void d40_phy_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *src_cfg,
+		 u32 *dst_cfg);
+
+void d40_log_cfg(struct stedma40_chan_cfg *cfg,
+		 u32 *lcsp1,
+		 u32 *lcsp2);
+
+int d40_phy_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t target,
+		      struct d40_phy_lli *lli,
+		      dma_addr_t lli_phys,
+		      u32 reg_cfg,
+		      struct stedma40_half_channel_info *info,
+		      struct stedma40_half_channel_info *otherinfo,
+		      unsigned long flags);
+
+/* Logical channels */
+
+int d40_log_sg_to_lli(struct scatterlist *sg,
+		      int sg_len,
+		      dma_addr_t dev_addr,
+		      struct d40_log_lli *lli_sg,
+		      u32 lcsp13, /* src or dst*/
+		      u32 data_width1, u32 data_width2);
+
+void d40_log_lli_lcpa_write(struct d40_log_lli_full *lcpa,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next, unsigned int flags);
+
+void d40_log_lli_lcla_write(struct d40_log_lli *lcla,
+			    struct d40_log_lli *lli_dst,
+			    struct d40_log_lli *lli_src,
+			    int next, unsigned int flags);
+
+#endif /* STE_DMA40_LLI_H */
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
new file mode 100644
index 0000000..379e8d5
--- /dev/null
+++ b/drivers/dma/stm32-dma.c
@@ -0,0 +1,1353 @@
+/*
+ * Driver for STM32 DMA controller
+ *
+ * Inspired by dma-jz4740.c and tegra20-apb-dma.c
+ *
+ * Copyright (C) M'boumba Cedric Madianga 2015
+ * Author: M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *         Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ *
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define STM32_DMA_LISR			0x0000 /* DMA Low Int Status Reg */
+#define STM32_DMA_HISR			0x0004 /* DMA High Int Status Reg */
+#define STM32_DMA_LIFCR			0x0008 /* DMA Low Int Flag Clear Reg */
+#define STM32_DMA_HIFCR			0x000c /* DMA High Int Flag Clear Reg */
+#define STM32_DMA_TCI			BIT(5) /* Transfer Complete Interrupt */
+#define STM32_DMA_HTI			BIT(4) /* Half Transfer Interrupt */
+#define STM32_DMA_TEI			BIT(3) /* Transfer Error Interrupt */
+#define STM32_DMA_DMEI			BIT(2) /* Direct Mode Error Interrupt */
+#define STM32_DMA_FEI			BIT(0) /* FIFO Error Interrupt */
+#define STM32_DMA_MASKI			(STM32_DMA_TCI \
+					 | STM32_DMA_TEI \
+					 | STM32_DMA_DMEI \
+					 | STM32_DMA_FEI)
+
+/* DMA Stream x Configuration Register */
+#define STM32_DMA_SCR(x)		(0x0010 + 0x18 * (x)) /* x = 0..7 */
+#define STM32_DMA_SCR_REQ(n)		((n & 0x7) << 25)
+#define STM32_DMA_SCR_MBURST_MASK	GENMASK(24, 23)
+#define STM32_DMA_SCR_MBURST(n)	        ((n & 0x3) << 23)
+#define STM32_DMA_SCR_PBURST_MASK	GENMASK(22, 21)
+#define STM32_DMA_SCR_PBURST(n)	        ((n & 0x3) << 21)
+#define STM32_DMA_SCR_PL_MASK		GENMASK(17, 16)
+#define STM32_DMA_SCR_PL(n)		((n & 0x3) << 16)
+#define STM32_DMA_SCR_MSIZE_MASK	GENMASK(14, 13)
+#define STM32_DMA_SCR_MSIZE(n)		((n & 0x3) << 13)
+#define STM32_DMA_SCR_PSIZE_MASK	GENMASK(12, 11)
+#define STM32_DMA_SCR_PSIZE(n)		((n & 0x3) << 11)
+#define STM32_DMA_SCR_PSIZE_GET(n)	((n & STM32_DMA_SCR_PSIZE_MASK) >> 11)
+#define STM32_DMA_SCR_DIR_MASK		GENMASK(7, 6)
+#define STM32_DMA_SCR_DIR(n)		((n & 0x3) << 6)
+#define STM32_DMA_SCR_CT		BIT(19) /* Target in double buffer */
+#define STM32_DMA_SCR_DBM		BIT(18) /* Double Buffer Mode */
+#define STM32_DMA_SCR_PINCOS		BIT(15) /* Peripheral inc offset size */
+#define STM32_DMA_SCR_MINC		BIT(10) /* Memory increment mode */
+#define STM32_DMA_SCR_PINC		BIT(9) /* Peripheral increment mode */
+#define STM32_DMA_SCR_CIRC		BIT(8) /* Circular mode */
+#define STM32_DMA_SCR_PFCTRL		BIT(5) /* Peripheral Flow Controller */
+#define STM32_DMA_SCR_TCIE		BIT(4) /* Transfer Complete Int Enable
+						*/
+#define STM32_DMA_SCR_TEIE		BIT(2) /* Transfer Error Int Enable */
+#define STM32_DMA_SCR_DMEIE		BIT(1) /* Direct Mode Err Int Enable */
+#define STM32_DMA_SCR_EN		BIT(0) /* Stream Enable */
+#define STM32_DMA_SCR_CFG_MASK		(STM32_DMA_SCR_PINC \
+					| STM32_DMA_SCR_MINC \
+					| STM32_DMA_SCR_PINCOS \
+					| STM32_DMA_SCR_PL_MASK)
+#define STM32_DMA_SCR_IRQ_MASK		(STM32_DMA_SCR_TCIE \
+					| STM32_DMA_SCR_TEIE \
+					| STM32_DMA_SCR_DMEIE)
+
+/* DMA Stream x number of data register */
+#define STM32_DMA_SNDTR(x)		(0x0014 + 0x18 * (x))
+
+/* DMA stream peripheral address register */
+#define STM32_DMA_SPAR(x)		(0x0018 + 0x18 * (x))
+
+/* DMA stream x memory 0 address register */
+#define STM32_DMA_SM0AR(x)		(0x001c + 0x18 * (x))
+
+/* DMA stream x memory 1 address register */
+#define STM32_DMA_SM1AR(x)		(0x0020 + 0x18 * (x))
+
+/* DMA stream x FIFO control register */
+#define STM32_DMA_SFCR(x)		(0x0024 + 0x18 * (x))
+#define STM32_DMA_SFCR_FTH_MASK		GENMASK(1, 0)
+#define STM32_DMA_SFCR_FTH(n)		(n & STM32_DMA_SFCR_FTH_MASK)
+#define STM32_DMA_SFCR_FEIE		BIT(7) /* FIFO error interrupt enable */
+#define STM32_DMA_SFCR_DMDIS		BIT(2) /* Direct mode disable */
+#define STM32_DMA_SFCR_MASK		(STM32_DMA_SFCR_FEIE \
+					| STM32_DMA_SFCR_DMDIS)
+
+/* DMA direction */
+#define STM32_DMA_DEV_TO_MEM		0x00
+#define	STM32_DMA_MEM_TO_DEV		0x01
+#define	STM32_DMA_MEM_TO_MEM		0x02
+
+/* DMA priority level */
+#define STM32_DMA_PRIORITY_LOW		0x00
+#define STM32_DMA_PRIORITY_MEDIUM	0x01
+#define STM32_DMA_PRIORITY_HIGH		0x02
+#define STM32_DMA_PRIORITY_VERY_HIGH	0x03
+
+/* DMA FIFO threshold selection */
+#define STM32_DMA_FIFO_THRESHOLD_1QUARTERFULL		0x00
+#define STM32_DMA_FIFO_THRESHOLD_HALFFULL		0x01
+#define STM32_DMA_FIFO_THRESHOLD_3QUARTERSFULL		0x02
+#define STM32_DMA_FIFO_THRESHOLD_FULL			0x03
+
+#define STM32_DMA_MAX_DATA_ITEMS	0xffff
+/*
+ * Valid transfer starts from @0 to @0xFFFE leading to unaligned scatter
+ * gather at boundary. Thus it's safer to round down this value on FIFO
+ * size (16 Bytes)
+ */
+#define STM32_DMA_ALIGNED_MAX_DATA_ITEMS	\
+	ALIGN_DOWN(STM32_DMA_MAX_DATA_ITEMS, 16)
+#define STM32_DMA_MAX_CHANNELS		0x08
+#define STM32_DMA_MAX_REQUEST_ID	0x08
+#define STM32_DMA_MAX_DATA_PARAM	0x03
+#define STM32_DMA_FIFO_SIZE		16	/* FIFO is 16 bytes */
+#define STM32_DMA_MIN_BURST		4
+#define STM32_DMA_MAX_BURST		16
+
+/* DMA Features */
+#define STM32_DMA_THRESHOLD_FTR_MASK	GENMASK(1, 0)
+#define STM32_DMA_THRESHOLD_FTR_GET(n)	((n) & STM32_DMA_THRESHOLD_FTR_MASK)
+
+enum stm32_dma_width {
+	STM32_DMA_BYTE,
+	STM32_DMA_HALF_WORD,
+	STM32_DMA_WORD,
+};
+
+enum stm32_dma_burst_size {
+	STM32_DMA_BURST_SINGLE,
+	STM32_DMA_BURST_INCR4,
+	STM32_DMA_BURST_INCR8,
+	STM32_DMA_BURST_INCR16,
+};
+
+/**
+ * struct stm32_dma_cfg - STM32 DMA custom configuration
+ * @channel_id: channel ID
+ * @request_line: DMA request
+ * @stream_config: 32bit mask specifying the DMA channel configuration
+ * @features: 32bit mask specifying the DMA Feature list
+ */
+struct stm32_dma_cfg {
+	u32 channel_id;
+	u32 request_line;
+	u32 stream_config;
+	u32 features;
+};
+
+struct stm32_dma_chan_reg {
+	u32 dma_lisr;
+	u32 dma_hisr;
+	u32 dma_lifcr;
+	u32 dma_hifcr;
+	u32 dma_scr;
+	u32 dma_sndtr;
+	u32 dma_spar;
+	u32 dma_sm0ar;
+	u32 dma_sm1ar;
+	u32 dma_sfcr;
+};
+
+struct stm32_dma_sg_req {
+	u32 len;
+	struct stm32_dma_chan_reg chan_reg;
+};
+
+struct stm32_dma_desc {
+	struct virt_dma_desc vdesc;
+	bool cyclic;
+	u32 num_sgs;
+	struct stm32_dma_sg_req sg_req[];
+};
+
+struct stm32_dma_chan {
+	struct virt_dma_chan vchan;
+	bool config_init;
+	bool busy;
+	u32 id;
+	u32 irq;
+	struct stm32_dma_desc *desc;
+	u32 next_sg;
+	struct dma_slave_config	dma_sconfig;
+	struct stm32_dma_chan_reg chan_reg;
+	u32 threshold;
+	u32 mem_burst;
+	u32 mem_width;
+};
+
+struct stm32_dma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	struct reset_control *rst;
+	bool mem2mem;
+	struct stm32_dma_chan chan[STM32_DMA_MAX_CHANNELS];
+};
+
+static struct stm32_dma_device *stm32_dma_get_dev(struct stm32_dma_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct stm32_dma_device,
+			    ddev);
+}
+
+static struct stm32_dma_chan *to_stm32_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct stm32_dma_chan, vchan.chan);
+}
+
+static struct stm32_dma_desc *to_stm32_dma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct stm32_dma_desc, vdesc);
+}
+
+static struct device *chan2dev(struct stm32_dma_chan *chan)
+{
+	return &chan->vchan.chan.dev->device;
+}
+
+static u32 stm32_dma_read(struct stm32_dma_device *dmadev, u32 reg)
+{
+	return readl_relaxed(dmadev->base + reg);
+}
+
+static void stm32_dma_write(struct stm32_dma_device *dmadev, u32 reg, u32 val)
+{
+	writel_relaxed(val, dmadev->base + reg);
+}
+
+static struct stm32_dma_desc *stm32_dma_alloc_desc(u32 num_sgs)
+{
+	return kzalloc(sizeof(struct stm32_dma_desc) +
+		       sizeof(struct stm32_dma_sg_req) * num_sgs, GFP_NOWAIT);
+}
+
+static int stm32_dma_get_width(struct stm32_dma_chan *chan,
+			       enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return STM32_DMA_BYTE;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return STM32_DMA_HALF_WORD;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return STM32_DMA_WORD;
+	default:
+		dev_err(chan2dev(chan), "Dma bus width not supported\n");
+		return -EINVAL;
+	}
+}
+
+static enum dma_slave_buswidth stm32_dma_get_max_width(u32 buf_len,
+						       u32 threshold)
+{
+	enum dma_slave_buswidth max_width;
+
+	if (threshold == STM32_DMA_FIFO_THRESHOLD_FULL)
+		max_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	else
+		max_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	while ((buf_len < max_width  || buf_len % max_width) &&
+	       max_width > DMA_SLAVE_BUSWIDTH_1_BYTE)
+		max_width = max_width >> 1;
+
+	return max_width;
+}
+
+static bool stm32_dma_fifo_threshold_is_allowed(u32 burst, u32 threshold,
+						enum dma_slave_buswidth width)
+{
+	u32 remaining;
+
+	if (width != DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		if (burst != 0) {
+			/*
+			 * If number of beats fit in several whole bursts
+			 * this configuration is allowed.
+			 */
+			remaining = ((STM32_DMA_FIFO_SIZE / width) *
+				     (threshold + 1) / 4) % burst;
+
+			if (remaining == 0)
+				return true;
+		} else {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool stm32_dma_is_burst_possible(u32 buf_len, u32 threshold)
+{
+	switch (threshold) {
+	case STM32_DMA_FIFO_THRESHOLD_FULL:
+		if (buf_len >= STM32_DMA_MAX_BURST)
+			return true;
+		else
+			return false;
+	case STM32_DMA_FIFO_THRESHOLD_HALFFULL:
+		if (buf_len >= STM32_DMA_MAX_BURST / 2)
+			return true;
+		else
+			return false;
+	default:
+		return false;
+	}
+}
+
+static u32 stm32_dma_get_best_burst(u32 buf_len, u32 max_burst, u32 threshold,
+				    enum dma_slave_buswidth width)
+{
+	u32 best_burst = max_burst;
+
+	if (best_burst == 1 || !stm32_dma_is_burst_possible(buf_len, threshold))
+		return 0;
+
+	while ((buf_len < best_burst * width && best_burst > 1) ||
+	       !stm32_dma_fifo_threshold_is_allowed(best_burst, threshold,
+						    width)) {
+		if (best_burst > STM32_DMA_MIN_BURST)
+			best_burst = best_burst >> 1;
+		else
+			best_burst = 0;
+	}
+
+	return best_burst;
+}
+
+static int stm32_dma_get_burst(struct stm32_dma_chan *chan, u32 maxburst)
+{
+	switch (maxburst) {
+	case 0:
+	case 1:
+		return STM32_DMA_BURST_SINGLE;
+	case 4:
+		return STM32_DMA_BURST_INCR4;
+	case 8:
+		return STM32_DMA_BURST_INCR8;
+	case 16:
+		return STM32_DMA_BURST_INCR16;
+	default:
+		dev_err(chan2dev(chan), "Dma burst size not supported\n");
+		return -EINVAL;
+	}
+}
+
+static void stm32_dma_set_fifo_config(struct stm32_dma_chan *chan,
+				      u32 src_burst, u32 dst_burst)
+{
+	chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_MASK;
+	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_DMEIE;
+
+	if (!src_burst && !dst_burst) {
+		/* Using direct mode */
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DMEIE;
+	} else {
+		/* Using FIFO mode */
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+	}
+}
+
+static int stm32_dma_slave_config(struct dma_chan *c,
+				  struct dma_slave_config *config)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+	memcpy(&chan->dma_sconfig, config, sizeof(*config));
+
+	chan->config_init = true;
+
+	return 0;
+}
+
+static u32 stm32_dma_irq_status(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 flags, dma_isr;
+
+	/*
+	 * Read "flags" from DMA_xISR register corresponding to the selected
+	 * DMA channel at the correct bit offset inside that register.
+	 *
+	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
+	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+	 */
+
+	if (chan->id & 4)
+		dma_isr = stm32_dma_read(dmadev, STM32_DMA_HISR);
+	else
+		dma_isr = stm32_dma_read(dmadev, STM32_DMA_LISR);
+
+	flags = dma_isr >> (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+
+	return flags & STM32_DMA_MASKI;
+}
+
+static void stm32_dma_irq_clear(struct stm32_dma_chan *chan, u32 flags)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 dma_ifcr;
+
+	/*
+	 * Write "flags" to the DMA_xIFCR register corresponding to the selected
+	 * DMA channel at the correct bit offset inside that register.
+	 *
+	 * If (ch % 4) is 2 or 3, left shift the mask by 16 bits.
+	 * If (ch % 4) is 1 or 3, additionally left shift the mask by 6 bits.
+	 */
+	flags &= STM32_DMA_MASKI;
+	dma_ifcr = flags << (((chan->id & 2) << 3) | ((chan->id & 1) * 6));
+
+	if (chan->id & 4)
+		stm32_dma_write(dmadev, STM32_DMA_HIFCR, dma_ifcr);
+	else
+		stm32_dma_write(dmadev, STM32_DMA_LIFCR, dma_ifcr);
+}
+
+static int stm32_dma_disable_chan(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	unsigned long timeout = jiffies + msecs_to_jiffies(5000);
+	u32 dma_scr, id;
+
+	id = chan->id;
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+	if (dma_scr & STM32_DMA_SCR_EN) {
+		dma_scr &= ~STM32_DMA_SCR_EN;
+		stm32_dma_write(dmadev, STM32_DMA_SCR(id), dma_scr);
+
+		do {
+			dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+			dma_scr &= STM32_DMA_SCR_EN;
+			if (!dma_scr)
+				break;
+
+			if (time_after_eq(jiffies, timeout)) {
+				dev_err(chan2dev(chan), "%s: timeout!\n",
+					__func__);
+				return -EBUSY;
+			}
+			cond_resched();
+		} while (1);
+	}
+
+	return 0;
+}
+
+static void stm32_dma_stop(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 dma_scr, dma_sfcr, status;
+	int ret;
+
+	/* Disable interrupts */
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	dma_scr &= ~STM32_DMA_SCR_IRQ_MASK;
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), dma_scr);
+	dma_sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+	dma_sfcr &= ~STM32_DMA_SFCR_FEIE;
+	stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), dma_sfcr);
+
+	/* Disable DMA */
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		return;
+
+	/* Clear interrupt status if it is there */
+	status = stm32_dma_irq_status(chan);
+	if (status) {
+		dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n",
+			__func__, status);
+		stm32_dma_irq_clear(chan, status);
+	}
+
+	chan->busy = false;
+}
+
+static int stm32_dma_terminate_all(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	if (chan->busy) {
+		stm32_dma_stop(chan);
+		chan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void stm32_dma_synchronize(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
+static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	u32 ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+	u32 spar = stm32_dma_read(dmadev, STM32_DMA_SPAR(chan->id));
+	u32 sm0ar = stm32_dma_read(dmadev, STM32_DMA_SM0AR(chan->id));
+	u32 sm1ar = stm32_dma_read(dmadev, STM32_DMA_SM1AR(chan->id));
+	u32 sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
+
+	dev_dbg(chan2dev(chan), "SCR:   0x%08x\n", scr);
+	dev_dbg(chan2dev(chan), "NDTR:  0x%08x\n", ndtr);
+	dev_dbg(chan2dev(chan), "SPAR:  0x%08x\n", spar);
+	dev_dbg(chan2dev(chan), "SM0AR: 0x%08x\n", sm0ar);
+	dev_dbg(chan2dev(chan), "SM1AR: 0x%08x\n", sm1ar);
+	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
+}
+
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan);
+
+static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	struct virt_dma_desc *vdesc;
+	struct stm32_dma_sg_req *sg_req;
+	struct stm32_dma_chan_reg *reg;
+	u32 status;
+	int ret;
+
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		return;
+
+	if (!chan->desc) {
+		vdesc = vchan_next_desc(&chan->vchan);
+		if (!vdesc)
+			return;
+
+		chan->desc = to_stm32_dma_desc(vdesc);
+		chan->next_sg = 0;
+	}
+
+	if (chan->next_sg == chan->desc->num_sgs)
+		chan->next_sg = 0;
+
+	sg_req = &chan->desc->sg_req[chan->next_sg];
+	reg = &sg_req->chan_reg;
+
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+	stm32_dma_write(dmadev, STM32_DMA_SPAR(chan->id), reg->dma_spar);
+	stm32_dma_write(dmadev, STM32_DMA_SM0AR(chan->id), reg->dma_sm0ar);
+	stm32_dma_write(dmadev, STM32_DMA_SFCR(chan->id), reg->dma_sfcr);
+	stm32_dma_write(dmadev, STM32_DMA_SM1AR(chan->id), reg->dma_sm1ar);
+	stm32_dma_write(dmadev, STM32_DMA_SNDTR(chan->id), reg->dma_sndtr);
+
+	chan->next_sg++;
+
+	/* Clear interrupt status if it is there */
+	status = stm32_dma_irq_status(chan);
+	if (status)
+		stm32_dma_irq_clear(chan, status);
+
+	if (chan->desc->cyclic)
+		stm32_dma_configure_next_sg(chan);
+
+	stm32_dma_dump_reg(chan);
+
+	/* Start DMA */
+	reg->dma_scr |= STM32_DMA_SCR_EN;
+	stm32_dma_write(dmadev, STM32_DMA_SCR(chan->id), reg->dma_scr);
+
+	chan->busy = true;
+
+	dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
+}
+
+static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
+{
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	struct stm32_dma_sg_req *sg_req;
+	u32 dma_scr, dma_sm0ar, dma_sm1ar, id;
+
+	id = chan->id;
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(id));
+
+	if (dma_scr & STM32_DMA_SCR_DBM) {
+		if (chan->next_sg == chan->desc->num_sgs)
+			chan->next_sg = 0;
+
+		sg_req = &chan->desc->sg_req[chan->next_sg];
+
+		if (dma_scr & STM32_DMA_SCR_CT) {
+			dma_sm0ar = sg_req->chan_reg.dma_sm0ar;
+			stm32_dma_write(dmadev, STM32_DMA_SM0AR(id), dma_sm0ar);
+			dev_dbg(chan2dev(chan), "CT=1 <=> SM0AR: 0x%08x\n",
+				stm32_dma_read(dmadev, STM32_DMA_SM0AR(id)));
+		} else {
+			dma_sm1ar = sg_req->chan_reg.dma_sm1ar;
+			stm32_dma_write(dmadev, STM32_DMA_SM1AR(id), dma_sm1ar);
+			dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n",
+				stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)));
+		}
+	}
+}
+
+static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan)
+{
+	if (chan->desc) {
+		if (chan->desc->cyclic) {
+			vchan_cyclic_callback(&chan->desc->vdesc);
+			chan->next_sg++;
+			stm32_dma_configure_next_sg(chan);
+		} else {
+			chan->busy = false;
+			if (chan->next_sg == chan->desc->num_sgs) {
+				list_del(&chan->desc->vdesc.node);
+				vchan_cookie_complete(&chan->desc->vdesc);
+				chan->desc = NULL;
+			}
+			stm32_dma_start_transfer(chan);
+		}
+	}
+}
+
+static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
+{
+	struct stm32_dma_chan *chan = devid;
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	u32 status, scr;
+
+	spin_lock(&chan->vchan.lock);
+
+	status = stm32_dma_irq_status(chan);
+	scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+
+	if (status & STM32_DMA_TCI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_TCI);
+		if (scr & STM32_DMA_SCR_TCIE)
+			stm32_dma_handle_chan_done(chan);
+		status &= ~STM32_DMA_TCI;
+	}
+	if (status & STM32_DMA_HTI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_HTI);
+		status &= ~STM32_DMA_HTI;
+	}
+	if (status & STM32_DMA_FEI) {
+		stm32_dma_irq_clear(chan, STM32_DMA_FEI);
+		status &= ~STM32_DMA_FEI;
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "FIFO Error\n");
+		else
+			dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+	}
+	if (status) {
+		stm32_dma_irq_clear(chan, status);
+		dev_err(chan2dev(chan), "DMA error: status=0x%08x\n", status);
+		if (!(scr & STM32_DMA_SCR_EN))
+			dev_err(chan2dev(chan), "chan disabled by HW\n");
+	}
+
+	spin_unlock(&chan->vchan.lock);
+
+	return IRQ_HANDLED;
+}
+
+static void stm32_dma_issue_pending(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
+		dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
+		stm32_dma_start_transfer(chan);
+
+	}
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int stm32_dma_set_xfer_param(struct stm32_dma_chan *chan,
+				    enum dma_transfer_direction direction,
+				    enum dma_slave_buswidth *buswidth,
+				    u32 buf_len)
+{
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	int src_bus_width, dst_bus_width;
+	int src_burst_size, dst_burst_size;
+	u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+	u32 dma_scr, threshold;
+
+	src_addr_width = chan->dma_sconfig.src_addr_width;
+	dst_addr_width = chan->dma_sconfig.dst_addr_width;
+	src_maxburst = chan->dma_sconfig.src_maxburst;
+	dst_maxburst = chan->dma_sconfig.dst_maxburst;
+	threshold = chan->threshold;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		/* Set device data size */
+		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+
+		/* Set device burst size */
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
+		if (dst_burst_size < 0)
+			return dst_burst_size;
+
+		/* Set memory data size */
+		src_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = src_addr_width;
+		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+
+		/* Set memory burst size */
+		src_maxburst = STM32_DMA_MAX_BURST;
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
+		if (src_burst_size < 0)
+			return src_burst_size;
+
+		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_DEV) |
+			STM32_DMA_SCR_PSIZE(dst_bus_width) |
+			STM32_DMA_SCR_MSIZE(src_bus_width) |
+			STM32_DMA_SCR_PBURST(dst_burst_size) |
+			STM32_DMA_SCR_MBURST(src_burst_size);
+
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
+		chan->chan_reg.dma_spar = chan->dma_sconfig.dst_addr;
+		*buswidth = dst_addr_width;
+		break;
+
+	case DMA_DEV_TO_MEM:
+		/* Set device data size */
+		src_bus_width = stm32_dma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+
+		/* Set device burst size */
+		src_best_burst = stm32_dma_get_best_burst(buf_len,
+							  src_maxburst,
+							  threshold,
+							  src_addr_width);
+		chan->mem_burst = src_best_burst;
+		src_burst_size = stm32_dma_get_burst(chan, src_best_burst);
+		if (src_burst_size < 0)
+			return src_burst_size;
+
+		/* Set memory data size */
+		dst_addr_width = stm32_dma_get_max_width(buf_len, threshold);
+		chan->mem_width = dst_addr_width;
+		dst_bus_width = stm32_dma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+
+		/* Set memory burst size */
+		dst_maxburst = STM32_DMA_MAX_BURST;
+		dst_best_burst = stm32_dma_get_best_burst(buf_len,
+							  dst_maxburst,
+							  threshold,
+							  dst_addr_width);
+		chan->mem_burst = dst_best_burst;
+		dst_burst_size = stm32_dma_get_burst(chan, dst_best_burst);
+		if (dst_burst_size < 0)
+			return dst_burst_size;
+
+		dma_scr = STM32_DMA_SCR_DIR(STM32_DMA_DEV_TO_MEM) |
+			STM32_DMA_SCR_PSIZE(src_bus_width) |
+			STM32_DMA_SCR_MSIZE(dst_bus_width) |
+			STM32_DMA_SCR_PBURST(src_burst_size) |
+			STM32_DMA_SCR_MBURST(dst_burst_size);
+
+		/* Set FIFO threshold */
+		chan->chan_reg.dma_sfcr &= ~STM32_DMA_SFCR_FTH_MASK;
+		chan->chan_reg.dma_sfcr |= STM32_DMA_SFCR_FTH(threshold);
+
+		/* Set peripheral address */
+		chan->chan_reg.dma_spar = chan->dma_sconfig.src_addr;
+		*buswidth = chan->dma_sconfig.src_addr_width;
+		break;
+
+	default:
+		dev_err(chan2dev(chan), "Dma direction is not supported\n");
+		return -EINVAL;
+	}
+
+	stm32_dma_set_fifo_config(chan, src_best_burst, dst_best_burst);
+
+	/* Set DMA control register */
+	chan->chan_reg.dma_scr &= ~(STM32_DMA_SCR_DIR_MASK |
+			STM32_DMA_SCR_PSIZE_MASK | STM32_DMA_SCR_MSIZE_MASK |
+			STM32_DMA_SCR_PBURST_MASK | STM32_DMA_SCR_MBURST_MASK);
+	chan->chan_reg.dma_scr |= dma_scr;
+
+	return 0;
+}
+
+static void stm32_dma_clear_reg(struct stm32_dma_chan_reg *regs)
+{
+	memset(regs, 0, sizeof(struct stm32_dma_chan_reg));
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_slave_sg(
+	struct dma_chan *c, struct scatterlist *sgl,
+	u32 sg_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_desc *desc;
+	struct scatterlist *sg;
+	enum dma_slave_buswidth buswidth;
+	u32 nb_data_items;
+	int i, ret;
+
+	if (!chan->config_init) {
+		dev_err(chan2dev(chan), "dma channel is not configured\n");
+		return NULL;
+	}
+
+	if (sg_len < 1) {
+		dev_err(chan2dev(chan), "Invalid segment length %d\n", sg_len);
+		return NULL;
+	}
+
+	desc = stm32_dma_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	/* Set peripheral flow controller */
+	if (chan->dma_sconfig.device_fc)
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_PFCTRL;
+	else
+		chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		ret = stm32_dma_set_xfer_param(chan, direction, &buswidth,
+					       sg_dma_len(sg));
+		if (ret < 0)
+			goto err;
+
+		desc->sg_req[i].len = sg_dma_len(sg);
+
+		nb_data_items = desc->sg_req[i].len / buswidth;
+		if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
+			dev_err(chan2dev(chan), "nb items not supported\n");
+			goto err;
+		}
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+		desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+		desc->sg_req[i].chan_reg.dma_sm0ar = sg_dma_address(sg);
+		desc->sg_req[i].chan_reg.dma_sm1ar = sg_dma_address(sg);
+		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+	}
+
+	desc->num_sgs = sg_len;
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+err:
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_cyclic(
+	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_desc *desc;
+	enum dma_slave_buswidth buswidth;
+	u32 num_periods, nb_data_items;
+	int i, ret;
+
+	if (!buf_len || !period_len) {
+		dev_err(chan2dev(chan), "Invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (!chan->config_init) {
+		dev_err(chan2dev(chan), "dma channel is not configured\n");
+		return NULL;
+	}
+
+	if (buf_len % period_len) {
+		dev_err(chan2dev(chan), "buf_len not multiple of period_len\n");
+		return NULL;
+	}
+
+	/*
+	 * We allow to take more number of requests till DMA is
+	 * not started. The driver will loop over all requests.
+	 * Once DMA is started then new requests can be queued only after
+	 * terminating the DMA.
+	 */
+	if (chan->busy) {
+		dev_err(chan2dev(chan), "Request not allowed when dma busy\n");
+		return NULL;
+	}
+
+	ret = stm32_dma_set_xfer_param(chan, direction, &buswidth, period_len);
+	if (ret < 0)
+		return NULL;
+
+	nb_data_items = period_len / buswidth;
+	if (nb_data_items > STM32_DMA_ALIGNED_MAX_DATA_ITEMS) {
+		dev_err(chan2dev(chan), "number of items not supported\n");
+		return NULL;
+	}
+
+	/*  Enable Circular mode or double buffer mode */
+	if (buf_len == period_len)
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_CIRC;
+	else
+		chan->chan_reg.dma_scr |= STM32_DMA_SCR_DBM;
+
+	/* Clear periph ctrl if client set it */
+	chan->chan_reg.dma_scr &= ~STM32_DMA_SCR_PFCTRL;
+
+	num_periods = buf_len / period_len;
+
+	desc = stm32_dma_alloc_desc(num_periods);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_periods; i++) {
+		desc->sg_req[i].len = period_len;
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr = chan->chan_reg.dma_scr;
+		desc->sg_req[i].chan_reg.dma_sfcr = chan->chan_reg.dma_sfcr;
+		desc->sg_req[i].chan_reg.dma_spar = chan->chan_reg.dma_spar;
+		desc->sg_req[i].chan_reg.dma_sm0ar = buf_addr;
+		desc->sg_req[i].chan_reg.dma_sm1ar = buf_addr;
+		desc->sg_req[i].chan_reg.dma_sndtr = nb_data_items;
+		buf_addr += period_len;
+	}
+
+	desc->num_sgs = num_periods;
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
+	struct dma_chan *c, dma_addr_t dest,
+	dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	enum dma_slave_buswidth max_width;
+	struct stm32_dma_desc *desc;
+	size_t xfer_count, offset;
+	u32 num_sgs, best_burst, dma_burst, threshold;
+	int i;
+
+	num_sgs = DIV_ROUND_UP(len, STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
+	desc = stm32_dma_alloc_desc(num_sgs);
+	if (!desc)
+		return NULL;
+
+	threshold = chan->threshold;
+
+	for (offset = 0, i = 0; offset < len; offset += xfer_count, i++) {
+		xfer_count = min_t(size_t, len - offset,
+				   STM32_DMA_ALIGNED_MAX_DATA_ITEMS);
+
+		/* Compute best burst size */
+		max_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+		best_burst = stm32_dma_get_best_burst(len, STM32_DMA_MAX_BURST,
+						      threshold, max_width);
+		dma_burst = stm32_dma_get_burst(chan, best_burst);
+
+		stm32_dma_clear_reg(&desc->sg_req[i].chan_reg);
+		desc->sg_req[i].chan_reg.dma_scr =
+			STM32_DMA_SCR_DIR(STM32_DMA_MEM_TO_MEM) |
+			STM32_DMA_SCR_PBURST(dma_burst) |
+			STM32_DMA_SCR_MBURST(dma_burst) |
+			STM32_DMA_SCR_MINC |
+			STM32_DMA_SCR_PINC |
+			STM32_DMA_SCR_TCIE |
+			STM32_DMA_SCR_TEIE;
+		desc->sg_req[i].chan_reg.dma_sfcr |= STM32_DMA_SFCR_MASK;
+		desc->sg_req[i].chan_reg.dma_sfcr |=
+			STM32_DMA_SFCR_FTH(threshold);
+		desc->sg_req[i].chan_reg.dma_spar = src + offset;
+		desc->sg_req[i].chan_reg.dma_sm0ar = dest + offset;
+		desc->sg_req[i].chan_reg.dma_sndtr = xfer_count;
+		desc->sg_req[i].len = xfer_count;
+	}
+
+	desc->num_sgs = num_sgs;
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
+{
+	u32 dma_scr, width, ndtr;
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
+	ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+	return ndtr << width;
+}
+
+static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
+				     struct stm32_dma_desc *desc,
+				     u32 next_sg)
+{
+	u32 modulo, burst_size;
+	u32 residue = 0;
+	int i;
+
+	/*
+	 * In cyclic mode, for the last period, residue = remaining bytes from
+	 * NDTR
+	 */
+	if (chan->desc->cyclic && next_sg == 0) {
+		residue = stm32_dma_get_remaining_bytes(chan);
+		goto end;
+	}
+
+	/*
+	 * For all other periods in cyclic mode, and in sg mode,
+	 * residue = remaining bytes from NDTR + remaining periods/sg to be
+	 * transferred
+	 */
+	for (i = next_sg; i < desc->num_sgs; i++)
+		residue += desc->sg_req[i].len;
+	residue += stm32_dma_get_remaining_bytes(chan);
+
+end:
+	if (!chan->mem_burst)
+		return residue;
+
+	burst_size = chan->mem_burst * chan->mem_width;
+	modulo = residue % burst_size;
+	if (modulo)
+		residue = residue - modulo + burst_size;
+
+	return residue;
+}
+
+static enum dma_status stm32_dma_tx_status(struct dma_chan *c,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+	u32 residue = 0;
+
+	status = dma_cookie_status(c, cookie, state);
+	if (status == DMA_COMPLETE || !state)
+		return status;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	vdesc = vchan_find_desc(&chan->vchan, cookie);
+	if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
+		residue = stm32_dma_desc_residue(chan, chan->desc,
+						 chan->next_sg);
+	else if (vdesc)
+		residue = stm32_dma_desc_residue(chan,
+						 to_stm32_dma_desc(vdesc), 0);
+	dma_set_residue(state, residue);
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	return status;
+}
+
+static int stm32_dma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	int ret;
+
+	chan->config_init = false;
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret < 0) {
+		dev_err(chan2dev(chan), "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = stm32_dma_disable_chan(chan);
+	if (ret < 0)
+		clk_disable_unprepare(dmadev->clk);
+
+	return ret;
+}
+
+static void stm32_dma_free_chan_resources(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+	unsigned long flags;
+
+	dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id);
+
+	if (chan->busy) {
+		spin_lock_irqsave(&chan->vchan.lock, flags);
+		stm32_dma_stop(chan);
+		chan->desc = NULL;
+		spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	}
+
+	clk_disable_unprepare(dmadev->clk);
+
+	vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void stm32_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct stm32_dma_desc, vdesc));
+}
+
+static void stm32_dma_set_config(struct stm32_dma_chan *chan,
+				 struct stm32_dma_cfg *cfg)
+{
+	stm32_dma_clear_reg(&chan->chan_reg);
+
+	chan->chan_reg.dma_scr = cfg->stream_config & STM32_DMA_SCR_CFG_MASK;
+	chan->chan_reg.dma_scr |= STM32_DMA_SCR_REQ(cfg->request_line);
+
+	/* Enable Interrupts  */
+	chan->chan_reg.dma_scr |= STM32_DMA_SCR_TEIE | STM32_DMA_SCR_TCIE;
+
+	chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
+}
+
+static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct stm32_dma_device *dmadev = ofdma->of_dma_data;
+	struct device *dev = dmadev->ddev.dev;
+	struct stm32_dma_cfg cfg;
+	struct stm32_dma_chan *chan;
+	struct dma_chan *c;
+
+	if (dma_spec->args_count < 4) {
+		dev_err(dev, "Bad number of cells\n");
+		return NULL;
+	}
+
+	cfg.channel_id = dma_spec->args[0];
+	cfg.request_line = dma_spec->args[1];
+	cfg.stream_config = dma_spec->args[2];
+	cfg.features = dma_spec->args[3];
+
+	if (cfg.channel_id >= STM32_DMA_MAX_CHANNELS ||
+	    cfg.request_line >= STM32_DMA_MAX_REQUEST_ID) {
+		dev_err(dev, "Bad channel and/or request id\n");
+		return NULL;
+	}
+
+	chan = &dmadev->chan[cfg.channel_id];
+
+	c = dma_get_slave_channel(&chan->vchan.chan);
+	if (!c) {
+		dev_err(dev, "No more channels available\n");
+		return NULL;
+	}
+
+	stm32_dma_set_config(chan, &cfg);
+
+	return c;
+}
+
+static const struct of_device_id stm32_dma_of_match[] = {
+	{ .compatible = "st,stm32-dma", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, stm32_dma_of_match);
+
+static int stm32_dma_probe(struct platform_device *pdev)
+{
+	struct stm32_dma_chan *chan;
+	struct stm32_dma_device *dmadev;
+	struct dma_device *dd;
+	const struct of_device_id *match;
+	struct resource *res;
+	int i, ret;
+
+	match = of_match_device(stm32_dma_of_match, &pdev->dev);
+	if (!match) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+	if (!dmadev)
+		return -ENOMEM;
+
+	dd = &dmadev->ddev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmadev->base))
+		return PTR_ERR(dmadev->base);
+
+	dmadev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmadev->clk)) {
+		dev_err(&pdev->dev, "Error: Missing controller clock\n");
+		return PTR_ERR(dmadev->clk);
+	}
+
+	dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node,
+						"st,mem2mem");
+
+	dmadev->rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (!IS_ERR(dmadev->rst)) {
+		reset_control_assert(dmadev->rst);
+		udelay(2);
+		reset_control_deassert(dmadev->rst);
+	}
+
+	dma_cap_set(DMA_SLAVE, dd->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dd->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+	dd->device_alloc_chan_resources = stm32_dma_alloc_chan_resources;
+	dd->device_free_chan_resources = stm32_dma_free_chan_resources;
+	dd->device_tx_status = stm32_dma_tx_status;
+	dd->device_issue_pending = stm32_dma_issue_pending;
+	dd->device_prep_slave_sg = stm32_dma_prep_slave_sg;
+	dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
+	dd->device_config = stm32_dma_slave_config;
+	dd->device_terminate_all = stm32_dma_terminate_all;
+	dd->device_synchronize = stm32_dma_synchronize;
+	dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->max_burst = STM32_DMA_MAX_BURST;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	if (dmadev->mem2mem) {
+		dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+		dd->device_prep_dma_memcpy = stm32_dma_prep_dma_memcpy;
+		dd->directions |= BIT(DMA_MEM_TO_MEM);
+	}
+
+	for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+		chan = &dmadev->chan[i];
+		chan->id = i;
+		chan->vchan.desc_free = stm32_dma_desc_free;
+		vchan_init(&chan->vchan, dd);
+	}
+
+	ret = dma_async_device_register(dd);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
+		chan = &dmadev->chan[i];
+		res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!res) {
+			ret = -EINVAL;
+			dev_err(&pdev->dev, "No irq resource for chan %d\n", i);
+			goto err_unregister;
+		}
+		chan->irq = res->start;
+		ret = devm_request_irq(&pdev->dev, chan->irq,
+				       stm32_dma_chan_irq, 0,
+				       dev_name(chan2dev(chan)), chan);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"request_irq failed with err %d channel %d\n",
+				ret, i);
+			goto err_unregister;
+		}
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 stm32_dma_of_xlate, dmadev);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"STM32 DMA DMA OF registration failed %d\n", ret);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, dmadev);
+
+	dev_info(&pdev->dev, "STM32 DMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return ret;
+}
+
+static struct platform_driver stm32_dma_driver = {
+	.driver = {
+		.name = "stm32-dma",
+		.of_match_table = stm32_dma_of_match,
+	},
+};
+
+static int __init stm32_dma_init(void)
+{
+	return platform_driver_probe(&stm32_dma_driver, stm32_dma_probe);
+}
+subsys_initcall(stm32_dma_init);
diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
new file mode 100644
index 0000000..b922db9
--- /dev/null
+++ b/drivers/dma/stm32-dmamux.c
@@ -0,0 +1,327 @@
+/*
+ *
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author(s): M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *            Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ *
+ * License terms: GPL V2.0.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * DMA Router driver for STM32 DMA MUX
+ *
+ * Based on TI DMA Crossbar driver
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#define STM32_DMAMUX_CCR(x)		(0x4 * (x))
+#define STM32_DMAMUX_MAX_DMA_REQUESTS	32
+#define STM32_DMAMUX_MAX_REQUESTS	255
+
+struct stm32_dmamux {
+	u32 master;
+	u32 request;
+	u32 chan_id;
+};
+
+struct stm32_dmamux_data {
+	struct dma_router dmarouter;
+	struct clk *clk;
+	struct reset_control *rst;
+	void __iomem *iomem;
+	u32 dma_requests; /* Number of DMA requests connected to DMAMUX */
+	u32 dmamux_requests; /* Number of DMA requests routed toward DMAs */
+	spinlock_t lock; /* Protects register access */
+	unsigned long *dma_inuse; /* Used DMA channel */
+	u32 dma_reqs[]; /* Number of DMA Request per DMA masters.
+			 *  [0] holds number of DMA Masters.
+			 *  To be kept at very end end of this structure
+			 */
+};
+
+static inline u32 stm32_dmamux_read(void __iomem *iomem, u32 reg)
+{
+	return readl_relaxed(iomem + reg);
+}
+
+static inline void stm32_dmamux_write(void __iomem *iomem, u32 reg, u32 val)
+{
+	writel_relaxed(val, iomem + reg);
+}
+
+static void stm32_dmamux_free(struct device *dev, void *route_data)
+{
+	struct stm32_dmamux_data *dmamux = dev_get_drvdata(dev);
+	struct stm32_dmamux *mux = route_data;
+	unsigned long flags;
+
+	/* Clear dma request */
+	spin_lock_irqsave(&dmamux->lock, flags);
+
+	stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id), 0);
+	clear_bit(mux->chan_id, dmamux->dma_inuse);
+
+	if (!IS_ERR(dmamux->clk))
+		clk_disable(dmamux->clk);
+
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+
+	dev_dbg(dev, "Unmapping DMAMUX(%u) to DMA%u(%u)\n",
+		mux->request, mux->master, mux->chan_id);
+
+	kfree(mux);
+}
+
+static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct stm32_dmamux_data *dmamux = platform_get_drvdata(pdev);
+	struct stm32_dmamux *mux;
+	u32 i, min, max;
+	int ret;
+	unsigned long flags;
+
+	if (dma_spec->args_count != 3) {
+		dev_err(&pdev->dev, "invalid number of dma mux args\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dma_spec->args[0] > dmamux->dmamux_requests) {
+		dev_err(&pdev->dev, "invalid mux request number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_irqsave(&dmamux->lock, flags);
+	mux->chan_id = find_first_zero_bit(dmamux->dma_inuse,
+					   dmamux->dma_requests);
+
+	if (mux->chan_id == dmamux->dma_requests) {
+		spin_unlock_irqrestore(&dmamux->lock, flags);
+		dev_err(&pdev->dev, "Run out of free DMA requests\n");
+		ret = -ENOMEM;
+		goto error_chan_id;
+	}
+	set_bit(mux->chan_id, dmamux->dma_inuse);
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+
+	/* Look for DMA Master */
+	for (i = 1, min = 0, max = dmamux->dma_reqs[i];
+	     i <= dmamux->dma_reqs[0];
+	     min += dmamux->dma_reqs[i], max += dmamux->dma_reqs[++i])
+		if (mux->chan_id < max)
+			break;
+	mux->master = i - 1;
+
+	/* The of_node_put() will be done in of_dma_router_xlate function */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", i - 1);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "can't get dma master\n");
+		ret = -EINVAL;
+		goto error;
+	}
+
+	/* Set dma request */
+	spin_lock_irqsave(&dmamux->lock, flags);
+	if (!IS_ERR(dmamux->clk)) {
+		ret = clk_enable(dmamux->clk);
+		if (ret < 0) {
+			spin_unlock_irqrestore(&dmamux->lock, flags);
+			dev_err(&pdev->dev, "clk_prep_enable issue: %d\n", ret);
+			goto error;
+		}
+	}
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+
+	mux->request = dma_spec->args[0];
+
+	/*  craft DMA spec */
+	dma_spec->args[3] = dma_spec->args[2];
+	dma_spec->args[2] = dma_spec->args[1];
+	dma_spec->args[1] = 0;
+	dma_spec->args[0] = mux->chan_id - min;
+	dma_spec->args_count = 4;
+
+	stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id),
+			   mux->request);
+	dev_dbg(&pdev->dev, "Mapping DMAMUX(%u) to DMA%u(%u)\n",
+		mux->request, mux->master, mux->chan_id);
+
+	return mux;
+
+error:
+	clear_bit(mux->chan_id, dmamux->dma_inuse);
+
+error_chan_id:
+	kfree(mux);
+	return ERR_PTR(ret);
+}
+
+static const struct of_device_id stm32_stm32dma_master_match[] = {
+	{ .compatible = "st,stm32-dma", },
+	{},
+};
+
+static int stm32_dmamux_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct device_node *dma_node;
+	struct stm32_dmamux_data *stm32_dmamux;
+	struct resource *res;
+	void __iomem *iomem;
+	int i, count, ret;
+	u32 dma_req;
+
+	if (!node)
+		return -ENODEV;
+
+	count = device_property_read_u32_array(&pdev->dev, "dma-masters",
+					       NULL, 0);
+	if (count < 0) {
+		dev_err(&pdev->dev, "Can't get DMA master(s) node\n");
+		return -ENODEV;
+	}
+
+	stm32_dmamux = devm_kzalloc(&pdev->dev, sizeof(*stm32_dmamux) +
+				    sizeof(u32) * (count + 1), GFP_KERNEL);
+	if (!stm32_dmamux)
+		return -ENOMEM;
+
+	dma_req = 0;
+	for (i = 1; i <= count; i++) {
+		dma_node = of_parse_phandle(node, "dma-masters", i - 1);
+
+		match = of_match_node(stm32_stm32dma_master_match, dma_node);
+		if (!match) {
+			dev_err(&pdev->dev, "DMA master is not supported\n");
+			of_node_put(dma_node);
+			return -EINVAL;
+		}
+
+		if (of_property_read_u32(dma_node, "dma-requests",
+					 &stm32_dmamux->dma_reqs[i])) {
+			dev_info(&pdev->dev,
+				 "Missing MUX output information, using %u.\n",
+				 STM32_DMAMUX_MAX_DMA_REQUESTS);
+			stm32_dmamux->dma_reqs[i] =
+				STM32_DMAMUX_MAX_DMA_REQUESTS;
+		}
+		dma_req += stm32_dmamux->dma_reqs[i];
+		of_node_put(dma_node);
+	}
+
+	if (dma_req > STM32_DMAMUX_MAX_DMA_REQUESTS) {
+		dev_err(&pdev->dev, "Too many DMA Master Requests to manage\n");
+		return -ENODEV;
+	}
+
+	stm32_dmamux->dma_requests = dma_req;
+	stm32_dmamux->dma_reqs[0] = count;
+	stm32_dmamux->dma_inuse = devm_kcalloc(&pdev->dev,
+					       BITS_TO_LONGS(dma_req),
+					       sizeof(unsigned long),
+					       GFP_KERNEL);
+	if (!stm32_dmamux->dma_inuse)
+		return -ENOMEM;
+
+	if (device_property_read_u32(&pdev->dev, "dma-requests",
+				     &stm32_dmamux->dmamux_requests)) {
+		stm32_dmamux->dmamux_requests = STM32_DMAMUX_MAX_REQUESTS;
+		dev_warn(&pdev->dev, "DMAMUX defaulting on %u requests\n",
+			 stm32_dmamux->dmamux_requests);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	spin_lock_init(&stm32_dmamux->lock);
+
+	stm32_dmamux->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(stm32_dmamux->clk)) {
+		ret = PTR_ERR(stm32_dmamux->clk);
+		if (ret == -EPROBE_DEFER)
+			dev_info(&pdev->dev, "Missing controller clock\n");
+		return ret;
+	}
+
+	stm32_dmamux->rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (!IS_ERR(stm32_dmamux->rst)) {
+		reset_control_assert(stm32_dmamux->rst);
+		udelay(2);
+		reset_control_deassert(stm32_dmamux->rst);
+	}
+
+	stm32_dmamux->iomem = iomem;
+	stm32_dmamux->dmarouter.dev = &pdev->dev;
+	stm32_dmamux->dmarouter.route_free = stm32_dmamux_free;
+
+	platform_set_drvdata(pdev, stm32_dmamux);
+
+	if (!IS_ERR(stm32_dmamux->clk)) {
+		ret = clk_prepare_enable(stm32_dmamux->clk);
+		if (ret < 0) {
+			dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+			return ret;
+		}
+	}
+
+	/* Reset the dmamux */
+	for (i = 0; i < stm32_dmamux->dma_requests; i++)
+		stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i), 0);
+
+	if (!IS_ERR(stm32_dmamux->clk))
+		clk_disable(stm32_dmamux->clk);
+
+	return of_dma_router_register(node, stm32_dmamux_route_allocate,
+				     &stm32_dmamux->dmarouter);
+}
+
+static const struct of_device_id stm32_dmamux_match[] = {
+	{ .compatible = "st,stm32h7-dmamux" },
+	{},
+};
+
+static struct platform_driver stm32_dmamux_driver = {
+	.probe	= stm32_dmamux_probe,
+	.driver = {
+		.name = "stm32-dmamux",
+		.of_match_table = stm32_dmamux_match,
+	},
+};
+
+static int __init stm32_dmamux_init(void)
+{
+	return platform_driver_register(&stm32_dmamux_driver);
+}
+arch_initcall(stm32_dmamux_init);
+
+MODULE_DESCRIPTION("DMA Router driver for STM32 DMA MUX");
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_AUTHOR("Pierre-Yves Mordret <pierre-yves.mordret@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
new file mode 100644
index 0000000..06dd172
--- /dev/null
+++ b/drivers/dma/stm32-mdma.c
@@ -0,0 +1,1700 @@
+/*
+ *
+ * Copyright (C) STMicroelectronics SA 2017
+ * Author(s): M'boumba Cedric Madianga <cedric.madianga@gmail.com>
+ *            Pierre-Yves Mordret <pierre-yves.mordret@st.com>
+ *
+ * License terms: GPL V2.0.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * Driver for STM32 MDMA controller
+ *
+ * Inspired by stm32-dma.c and dma-jz4780.c
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+/*  MDMA Generic getter/setter */
+#define STM32_MDMA_SHIFT(n)		(ffs(n) - 1)
+#define STM32_MDMA_SET(n, mask)		(((n) << STM32_MDMA_SHIFT(mask)) & \
+					 (mask))
+#define STM32_MDMA_GET(n, mask)		(((n) & (mask)) >> \
+					 STM32_MDMA_SHIFT(mask))
+
+#define STM32_MDMA_GISR0		0x0000 /* MDMA Int Status Reg 1 */
+#define STM32_MDMA_GISR1		0x0004 /* MDMA Int Status Reg 2 */
+
+/* MDMA Channel x interrupt/status register */
+#define STM32_MDMA_CISR(x)		(0x40 + 0x40 * (x)) /* x = 0..62 */
+#define STM32_MDMA_CISR_CRQA		BIT(16)
+#define STM32_MDMA_CISR_TCIF		BIT(4)
+#define STM32_MDMA_CISR_BTIF		BIT(3)
+#define STM32_MDMA_CISR_BRTIF		BIT(2)
+#define STM32_MDMA_CISR_CTCIF		BIT(1)
+#define STM32_MDMA_CISR_TEIF		BIT(0)
+
+/* MDMA Channel x interrupt flag clear register */
+#define STM32_MDMA_CIFCR(x)		(0x44 + 0x40 * (x))
+#define STM32_MDMA_CIFCR_CLTCIF		BIT(4)
+#define STM32_MDMA_CIFCR_CBTIF		BIT(3)
+#define STM32_MDMA_CIFCR_CBRTIF		BIT(2)
+#define STM32_MDMA_CIFCR_CCTCIF		BIT(1)
+#define STM32_MDMA_CIFCR_CTEIF		BIT(0)
+#define STM32_MDMA_CIFCR_CLEAR_ALL	(STM32_MDMA_CIFCR_CLTCIF \
+					| STM32_MDMA_CIFCR_CBTIF \
+					| STM32_MDMA_CIFCR_CBRTIF \
+					| STM32_MDMA_CIFCR_CCTCIF \
+					| STM32_MDMA_CIFCR_CTEIF)
+
+/* MDMA Channel x error status register */
+#define STM32_MDMA_CESR(x)		(0x48 + 0x40 * (x))
+#define STM32_MDMA_CESR_BSE		BIT(11)
+#define STM32_MDMA_CESR_ASR		BIT(10)
+#define STM32_MDMA_CESR_TEMD		BIT(9)
+#define STM32_MDMA_CESR_TELD		BIT(8)
+#define STM32_MDMA_CESR_TED		BIT(7)
+#define STM32_MDMA_CESR_TEA_MASK	GENMASK(6, 0)
+
+/* MDMA Channel x control register */
+#define STM32_MDMA_CCR(x)		(0x4C + 0x40 * (x))
+#define STM32_MDMA_CCR_SWRQ		BIT(16)
+#define STM32_MDMA_CCR_WEX		BIT(14)
+#define STM32_MDMA_CCR_HEX		BIT(13)
+#define STM32_MDMA_CCR_BEX		BIT(12)
+#define STM32_MDMA_CCR_PL_MASK		GENMASK(7, 6)
+#define STM32_MDMA_CCR_PL(n)		STM32_MDMA_SET(n, \
+						       STM32_MDMA_CCR_PL_MASK)
+#define STM32_MDMA_CCR_TCIE		BIT(5)
+#define STM32_MDMA_CCR_BTIE		BIT(4)
+#define STM32_MDMA_CCR_BRTIE		BIT(3)
+#define STM32_MDMA_CCR_CTCIE		BIT(2)
+#define STM32_MDMA_CCR_TEIE		BIT(1)
+#define STM32_MDMA_CCR_EN		BIT(0)
+#define STM32_MDMA_CCR_IRQ_MASK		(STM32_MDMA_CCR_TCIE \
+					| STM32_MDMA_CCR_BTIE \
+					| STM32_MDMA_CCR_BRTIE \
+					| STM32_MDMA_CCR_CTCIE \
+					| STM32_MDMA_CCR_TEIE)
+
+/* MDMA Channel x transfer configuration register */
+#define STM32_MDMA_CTCR(x)		(0x50 + 0x40 * (x))
+#define STM32_MDMA_CTCR_BWM		BIT(31)
+#define STM32_MDMA_CTCR_SWRM		BIT(30)
+#define STM32_MDMA_CTCR_TRGM_MSK	GENMASK(29, 28)
+#define STM32_MDMA_CTCR_TRGM(n)		STM32_MDMA_SET((n), \
+						       STM32_MDMA_CTCR_TRGM_MSK)
+#define STM32_MDMA_CTCR_TRGM_GET(n)	STM32_MDMA_GET((n), \
+						       STM32_MDMA_CTCR_TRGM_MSK)
+#define STM32_MDMA_CTCR_PAM_MASK	GENMASK(27, 26)
+#define STM32_MDMA_CTCR_PAM(n)		STM32_MDMA_SET(n, \
+						       STM32_MDMA_CTCR_PAM_MASK)
+#define STM32_MDMA_CTCR_PKE		BIT(25)
+#define STM32_MDMA_CTCR_TLEN_MSK	GENMASK(24, 18)
+#define STM32_MDMA_CTCR_TLEN(n)		STM32_MDMA_SET((n), \
+						       STM32_MDMA_CTCR_TLEN_MSK)
+#define STM32_MDMA_CTCR_TLEN_GET(n)	STM32_MDMA_GET((n), \
+						       STM32_MDMA_CTCR_TLEN_MSK)
+#define STM32_MDMA_CTCR_LEN2_MSK	GENMASK(25, 18)
+#define STM32_MDMA_CTCR_LEN2(n)		STM32_MDMA_SET((n), \
+						       STM32_MDMA_CTCR_LEN2_MSK)
+#define STM32_MDMA_CTCR_LEN2_GET(n)	STM32_MDMA_GET((n), \
+						       STM32_MDMA_CTCR_LEN2_MSK)
+#define STM32_MDMA_CTCR_DBURST_MASK	GENMASK(17, 15)
+#define STM32_MDMA_CTCR_DBURST(n)	STM32_MDMA_SET(n, \
+						    STM32_MDMA_CTCR_DBURST_MASK)
+#define STM32_MDMA_CTCR_SBURST_MASK	GENMASK(14, 12)
+#define STM32_MDMA_CTCR_SBURST(n)	STM32_MDMA_SET(n, \
+						    STM32_MDMA_CTCR_SBURST_MASK)
+#define STM32_MDMA_CTCR_DINCOS_MASK	GENMASK(11, 10)
+#define STM32_MDMA_CTCR_DINCOS(n)	STM32_MDMA_SET((n), \
+						    STM32_MDMA_CTCR_DINCOS_MASK)
+#define STM32_MDMA_CTCR_SINCOS_MASK	GENMASK(9, 8)
+#define STM32_MDMA_CTCR_SINCOS(n)	STM32_MDMA_SET((n), \
+						    STM32_MDMA_CTCR_SINCOS_MASK)
+#define STM32_MDMA_CTCR_DSIZE_MASK	GENMASK(7, 6)
+#define STM32_MDMA_CTCR_DSIZE(n)	STM32_MDMA_SET(n, \
+						     STM32_MDMA_CTCR_DSIZE_MASK)
+#define STM32_MDMA_CTCR_SSIZE_MASK	GENMASK(5, 4)
+#define STM32_MDMA_CTCR_SSIZE(n)	STM32_MDMA_SET(n, \
+						     STM32_MDMA_CTCR_SSIZE_MASK)
+#define STM32_MDMA_CTCR_DINC_MASK	GENMASK(3, 2)
+#define STM32_MDMA_CTCR_DINC(n)		STM32_MDMA_SET((n), \
+						      STM32_MDMA_CTCR_DINC_MASK)
+#define STM32_MDMA_CTCR_SINC_MASK	GENMASK(1, 0)
+#define STM32_MDMA_CTCR_SINC(n)		STM32_MDMA_SET((n), \
+						      STM32_MDMA_CTCR_SINC_MASK)
+#define STM32_MDMA_CTCR_CFG_MASK	(STM32_MDMA_CTCR_SINC_MASK \
+					| STM32_MDMA_CTCR_DINC_MASK \
+					| STM32_MDMA_CTCR_SINCOS_MASK \
+					| STM32_MDMA_CTCR_DINCOS_MASK \
+					| STM32_MDMA_CTCR_LEN2_MSK \
+					| STM32_MDMA_CTCR_TRGM_MSK)
+
+/* MDMA Channel x block number of data register */
+#define STM32_MDMA_CBNDTR(x)		(0x54 + 0x40 * (x))
+#define STM32_MDMA_CBNDTR_BRC_MK	GENMASK(31, 20)
+#define STM32_MDMA_CBNDTR_BRC(n)	STM32_MDMA_SET(n, \
+						       STM32_MDMA_CBNDTR_BRC_MK)
+#define STM32_MDMA_CBNDTR_BRC_GET(n)	STM32_MDMA_GET((n), \
+						       STM32_MDMA_CBNDTR_BRC_MK)
+
+#define STM32_MDMA_CBNDTR_BRDUM		BIT(19)
+#define STM32_MDMA_CBNDTR_BRSUM		BIT(18)
+#define STM32_MDMA_CBNDTR_BNDT_MASK	GENMASK(16, 0)
+#define STM32_MDMA_CBNDTR_BNDT(n)	STM32_MDMA_SET(n, \
+						    STM32_MDMA_CBNDTR_BNDT_MASK)
+
+/* MDMA Channel x source address register */
+#define STM32_MDMA_CSAR(x)		(0x58 + 0x40 * (x))
+
+/* MDMA Channel x destination address register */
+#define STM32_MDMA_CDAR(x)		(0x5C + 0x40 * (x))
+
+/* MDMA Channel x block repeat address update register */
+#define STM32_MDMA_CBRUR(x)		(0x60 + 0x40 * (x))
+#define STM32_MDMA_CBRUR_DUV_MASK	GENMASK(31, 16)
+#define STM32_MDMA_CBRUR_DUV(n)		STM32_MDMA_SET(n, \
+						      STM32_MDMA_CBRUR_DUV_MASK)
+#define STM32_MDMA_CBRUR_SUV_MASK	GENMASK(15, 0)
+#define STM32_MDMA_CBRUR_SUV(n)		STM32_MDMA_SET(n, \
+						      STM32_MDMA_CBRUR_SUV_MASK)
+
+/* MDMA Channel x link address register */
+#define STM32_MDMA_CLAR(x)		(0x64 + 0x40 * (x))
+
+/* MDMA Channel x trigger and bus selection register */
+#define STM32_MDMA_CTBR(x)		(0x68 + 0x40 * (x))
+#define STM32_MDMA_CTBR_DBUS		BIT(17)
+#define STM32_MDMA_CTBR_SBUS		BIT(16)
+#define STM32_MDMA_CTBR_TSEL_MASK	GENMASK(7, 0)
+#define STM32_MDMA_CTBR_TSEL(n)		STM32_MDMA_SET(n, \
+						      STM32_MDMA_CTBR_TSEL_MASK)
+
+/* MDMA Channel x mask address register */
+#define STM32_MDMA_CMAR(x)		(0x70 + 0x40 * (x))
+
+/* MDMA Channel x mask data register */
+#define STM32_MDMA_CMDR(x)		(0x74 + 0x40 * (x))
+
+#define STM32_MDMA_MAX_BUF_LEN		128
+#define STM32_MDMA_MAX_BLOCK_LEN	65536
+#define STM32_MDMA_MAX_CHANNELS		63
+#define STM32_MDMA_MAX_REQUESTS		256
+#define STM32_MDMA_MAX_BURST		128
+#define STM32_MDMA_VERY_HIGH_PRIORITY	0x11
+
+enum stm32_mdma_trigger_mode {
+	STM32_MDMA_BUFFER,
+	STM32_MDMA_BLOCK,
+	STM32_MDMA_BLOCK_REP,
+	STM32_MDMA_LINKED_LIST,
+};
+
+enum stm32_mdma_width {
+	STM32_MDMA_BYTE,
+	STM32_MDMA_HALF_WORD,
+	STM32_MDMA_WORD,
+	STM32_MDMA_DOUBLE_WORD,
+};
+
+enum stm32_mdma_inc_mode {
+	STM32_MDMA_FIXED = 0,
+	STM32_MDMA_INC = 2,
+	STM32_MDMA_DEC = 3,
+};
+
+struct stm32_mdma_chan_config {
+	u32 request;
+	u32 priority_level;
+	u32 transfer_config;
+	u32 mask_addr;
+	u32 mask_data;
+};
+
+struct stm32_mdma_hwdesc {
+	u32 ctcr;
+	u32 cbndtr;
+	u32 csar;
+	u32 cdar;
+	u32 cbrur;
+	u32 clar;
+	u32 ctbr;
+	u32 dummy;
+	u32 cmar;
+	u32 cmdr;
+} __aligned(64);
+
+struct stm32_mdma_desc_node {
+	struct stm32_mdma_hwdesc *hwdesc;
+	dma_addr_t hwdesc_phys;
+};
+
+struct stm32_mdma_desc {
+	struct virt_dma_desc vdesc;
+	u32 ccr;
+	bool cyclic;
+	u32 count;
+	struct stm32_mdma_desc_node node[];
+};
+
+struct stm32_mdma_chan {
+	struct virt_dma_chan vchan;
+	struct dma_pool *desc_pool;
+	u32 id;
+	struct stm32_mdma_desc *desc;
+	u32 curr_hwdesc;
+	struct dma_slave_config dma_config;
+	struct stm32_mdma_chan_config chan_config;
+	bool busy;
+	u32 mem_burst;
+	u32 mem_width;
+};
+
+struct stm32_mdma_device {
+	struct dma_device ddev;
+	void __iomem *base;
+	struct clk *clk;
+	int irq;
+	struct reset_control *rst;
+	u32 nr_channels;
+	u32 nr_requests;
+	u32 nr_ahb_addr_masks;
+	struct stm32_mdma_chan chan[STM32_MDMA_MAX_CHANNELS];
+	u32 ahb_addr_masks[];
+};
+
+static struct stm32_mdma_device *stm32_mdma_get_dev(
+	struct stm32_mdma_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct stm32_mdma_device,
+			    ddev);
+}
+
+static struct stm32_mdma_chan *to_stm32_mdma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct stm32_mdma_chan, vchan.chan);
+}
+
+static struct stm32_mdma_desc *to_stm32_mdma_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct stm32_mdma_desc, vdesc);
+}
+
+static struct device *chan2dev(struct stm32_mdma_chan *chan)
+{
+	return &chan->vchan.chan.dev->device;
+}
+
+static struct device *mdma2dev(struct stm32_mdma_device *mdma_dev)
+{
+	return mdma_dev->ddev.dev;
+}
+
+static u32 stm32_mdma_read(struct stm32_mdma_device *dmadev, u32 reg)
+{
+	return readl_relaxed(dmadev->base + reg);
+}
+
+static void stm32_mdma_write(struct stm32_mdma_device *dmadev, u32 reg, u32 val)
+{
+	writel_relaxed(val, dmadev->base + reg);
+}
+
+static void stm32_mdma_set_bits(struct stm32_mdma_device *dmadev, u32 reg,
+				u32 mask)
+{
+	void __iomem *addr = dmadev->base + reg;
+
+	writel_relaxed(readl_relaxed(addr) | mask, addr);
+}
+
+static void stm32_mdma_clr_bits(struct stm32_mdma_device *dmadev, u32 reg,
+				u32 mask)
+{
+	void __iomem *addr = dmadev->base + reg;
+
+	writel_relaxed(readl_relaxed(addr) & ~mask, addr);
+}
+
+static struct stm32_mdma_desc *stm32_mdma_alloc_desc(
+		struct stm32_mdma_chan *chan, u32 count)
+{
+	struct stm32_mdma_desc *desc;
+	int i;
+
+	desc = kzalloc(offsetof(typeof(*desc), node[count]), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < count; i++) {
+		desc->node[i].hwdesc =
+			dma_pool_alloc(chan->desc_pool, GFP_NOWAIT,
+				       &desc->node[i].hwdesc_phys);
+		if (!desc->node[i].hwdesc)
+			goto err;
+	}
+
+	desc->count = count;
+
+	return desc;
+
+err:
+	dev_err(chan2dev(chan), "Failed to allocate descriptor\n");
+	while (--i >= 0)
+		dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+			      desc->node[i].hwdesc_phys);
+	kfree(desc);
+	return NULL;
+}
+
+static void stm32_mdma_desc_free(struct virt_dma_desc *vdesc)
+{
+	struct stm32_mdma_desc *desc = to_stm32_mdma_desc(vdesc);
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(vdesc->tx.chan);
+	int i;
+
+	for (i = 0; i < desc->count; i++)
+		dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+			      desc->node[i].hwdesc_phys);
+	kfree(desc);
+}
+
+static int stm32_mdma_get_width(struct stm32_mdma_chan *chan,
+				enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return ffs(width) - 1;
+	default:
+		dev_err(chan2dev(chan), "Dma bus width %i not supported\n",
+			width);
+		return -EINVAL;
+	}
+}
+
+static enum dma_slave_buswidth stm32_mdma_get_max_width(dma_addr_t addr,
+							u32 buf_len, u32 tlen)
+{
+	enum dma_slave_buswidth max_width = DMA_SLAVE_BUSWIDTH_8_BYTES;
+
+	for (max_width = DMA_SLAVE_BUSWIDTH_8_BYTES;
+	     max_width > DMA_SLAVE_BUSWIDTH_1_BYTE;
+	     max_width >>= 1) {
+		/*
+		 * Address and buffer length both have to be aligned on
+		 * bus width
+		 */
+		if ((((buf_len | addr) & (max_width - 1)) == 0) &&
+		    tlen >= max_width)
+			break;
+	}
+
+	return max_width;
+}
+
+static u32 stm32_mdma_get_best_burst(u32 buf_len, u32 tlen, u32 max_burst,
+				     enum dma_slave_buswidth width)
+{
+	u32 best_burst;
+
+	best_burst = min((u32)1 << __ffs(tlen | buf_len),
+			 max_burst * width) / width;
+
+	return (best_burst > 0) ? best_burst : 1;
+}
+
+static int stm32_mdma_disable_chan(struct stm32_mdma_chan *chan)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	u32 ccr, cisr, id, reg;
+	int ret;
+
+	id = chan->id;
+	reg = STM32_MDMA_CCR(id);
+
+	/* Disable interrupts */
+	stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_IRQ_MASK);
+
+	ccr = stm32_mdma_read(dmadev, reg);
+	if (ccr & STM32_MDMA_CCR_EN) {
+		stm32_mdma_clr_bits(dmadev, reg, STM32_MDMA_CCR_EN);
+
+		/* Ensure that any ongoing transfer has been completed */
+		ret = readl_relaxed_poll_timeout_atomic(
+				dmadev->base + STM32_MDMA_CISR(id), cisr,
+				(cisr & STM32_MDMA_CISR_CTCIF), 10, 1000);
+		if (ret) {
+			dev_err(chan2dev(chan), "%s: timeout!\n", __func__);
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static void stm32_mdma_stop(struct stm32_mdma_chan *chan)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	u32 status;
+	int ret;
+
+	/* Disable DMA */
+	ret = stm32_mdma_disable_chan(chan);
+	if (ret < 0)
+		return;
+
+	/* Clear interrupt status if it is there */
+	status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+	if (status) {
+		dev_dbg(chan2dev(chan), "%s(): clearing interrupt: 0x%08x\n",
+			__func__, status);
+		stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status);
+	}
+
+	chan->busy = false;
+}
+
+static void stm32_mdma_set_bus(struct stm32_mdma_device *dmadev, u32 *ctbr,
+			       u32 ctbr_mask, u32 src_addr)
+{
+	u32 mask;
+	int i;
+
+	/* Check if memory device is on AHB or AXI */
+	*ctbr &= ~ctbr_mask;
+	mask = src_addr & 0xF0000000;
+	for (i = 0; i < dmadev->nr_ahb_addr_masks; i++) {
+		if (mask == dmadev->ahb_addr_masks[i]) {
+			*ctbr |= ctbr_mask;
+			break;
+		}
+	}
+}
+
+static int stm32_mdma_set_xfer_param(struct stm32_mdma_chan *chan,
+				     enum dma_transfer_direction direction,
+				     u32 *mdma_ccr, u32 *mdma_ctcr,
+				     u32 *mdma_ctbr, dma_addr_t addr,
+				     u32 buf_len)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct stm32_mdma_chan_config *chan_config = &chan->chan_config;
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	phys_addr_t src_addr, dst_addr;
+	int src_bus_width, dst_bus_width;
+	u32 src_maxburst, dst_maxburst, src_best_burst, dst_best_burst;
+	u32 ccr, ctcr, ctbr, tlen;
+
+	src_addr_width = chan->dma_config.src_addr_width;
+	dst_addr_width = chan->dma_config.dst_addr_width;
+	src_maxburst = chan->dma_config.src_maxburst;
+	dst_maxburst = chan->dma_config.dst_maxburst;
+
+	ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id));
+	ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id));
+	ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id));
+
+	/* Enable HW request mode */
+	ctcr &= ~STM32_MDMA_CTCR_SWRM;
+
+	/* Set DINC, SINC, DINCOS, SINCOS, TRGM and TLEN retrieve from DT */
+	ctcr &= ~STM32_MDMA_CTCR_CFG_MASK;
+	ctcr |= chan_config->transfer_config & STM32_MDMA_CTCR_CFG_MASK;
+
+	/*
+	 * For buffer transfer length (TLEN) we have to set
+	 * the number of bytes - 1 in CTCR register
+	 */
+	tlen = STM32_MDMA_CTCR_LEN2_GET(ctcr);
+	ctcr &= ~STM32_MDMA_CTCR_LEN2_MSK;
+	ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1));
+
+	/* Disable Pack Enable */
+	ctcr &= ~STM32_MDMA_CTCR_PKE;
+
+	/* Check burst size constraints */
+	if (src_maxburst * src_addr_width > STM32_MDMA_MAX_BURST ||
+	    dst_maxburst * dst_addr_width > STM32_MDMA_MAX_BURST) {
+		dev_err(chan2dev(chan),
+			"burst size * bus width higher than %d bytes\n",
+			STM32_MDMA_MAX_BURST);
+		return -EINVAL;
+	}
+
+	if ((!is_power_of_2(src_maxburst) && src_maxburst > 0) ||
+	    (!is_power_of_2(dst_maxburst) && dst_maxburst > 0)) {
+		dev_err(chan2dev(chan), "burst size must be a power of 2\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Configure channel control:
+	 * - Clear SW request as in this case this is a HW one
+	 * - Clear WEX, HEX and BEX bits
+	 * - Set priority level
+	 */
+	ccr &= ~(STM32_MDMA_CCR_SWRQ | STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX |
+		 STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK);
+	ccr |= STM32_MDMA_CCR_PL(chan_config->priority_level);
+
+	/* Configure Trigger selection */
+	ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK;
+	ctbr |= STM32_MDMA_CTBR_TSEL(chan_config->request);
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		dst_addr = chan->dma_config.dst_addr;
+
+		/* Set device data size */
+		dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+		ctcr &= ~STM32_MDMA_CTCR_DSIZE_MASK;
+		ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width);
+
+		/* Set device burst value */
+		dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+							   dst_maxburst,
+							   dst_addr_width);
+		chan->mem_burst = dst_best_burst;
+		ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK;
+		ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst)));
+
+		/* Set memory data size */
+		src_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen);
+		chan->mem_width = src_addr_width;
+		src_bus_width = stm32_mdma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+		ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK |
+			STM32_MDMA_CTCR_SINCOS_MASK;
+		ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+			STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+		/* Set memory burst value */
+		src_maxburst = STM32_MDMA_MAX_BUF_LEN / src_addr_width;
+		src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+							   src_maxburst,
+							   src_addr_width);
+		chan->mem_burst = src_best_burst;
+		ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK;
+		ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst)));
+
+		/* Select bus */
+		stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+				   dst_addr);
+
+		if (dst_bus_width != src_bus_width)
+			ctcr |= STM32_MDMA_CTCR_PKE;
+
+		/* Set destination address */
+		stm32_mdma_write(dmadev, STM32_MDMA_CDAR(chan->id), dst_addr);
+		break;
+
+	case DMA_DEV_TO_MEM:
+		src_addr = chan->dma_config.src_addr;
+
+		/* Set device data size */
+		src_bus_width = stm32_mdma_get_width(chan, src_addr_width);
+		if (src_bus_width < 0)
+			return src_bus_width;
+		ctcr &= ~STM32_MDMA_CTCR_SSIZE_MASK;
+		ctcr |= STM32_MDMA_CTCR_SSIZE(src_bus_width);
+
+		/* Set device burst value */
+		src_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+							   src_maxburst,
+							   src_addr_width);
+		ctcr &= ~STM32_MDMA_CTCR_SBURST_MASK;
+		ctcr |= STM32_MDMA_CTCR_SBURST((ilog2(src_best_burst)));
+
+		/* Set memory data size */
+		dst_addr_width = stm32_mdma_get_max_width(addr, buf_len, tlen);
+		chan->mem_width = dst_addr_width;
+		dst_bus_width = stm32_mdma_get_width(chan, dst_addr_width);
+		if (dst_bus_width < 0)
+			return dst_bus_width;
+		ctcr &= ~(STM32_MDMA_CTCR_DSIZE_MASK |
+			STM32_MDMA_CTCR_DINCOS_MASK);
+		ctcr |= STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+			STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+		/* Set memory burst value */
+		dst_maxburst = STM32_MDMA_MAX_BUF_LEN / dst_addr_width;
+		dst_best_burst = stm32_mdma_get_best_burst(buf_len, tlen,
+							   dst_maxburst,
+							   dst_addr_width);
+		ctcr &= ~STM32_MDMA_CTCR_DBURST_MASK;
+		ctcr |= STM32_MDMA_CTCR_DBURST((ilog2(dst_best_burst)));
+
+		/* Select bus */
+		stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+				   src_addr);
+
+		if (dst_bus_width != src_bus_width)
+			ctcr |= STM32_MDMA_CTCR_PKE;
+
+		/* Set source address */
+		stm32_mdma_write(dmadev, STM32_MDMA_CSAR(chan->id), src_addr);
+		break;
+
+	default:
+		dev_err(chan2dev(chan), "Dma direction is not supported\n");
+		return -EINVAL;
+	}
+
+	*mdma_ccr = ccr;
+	*mdma_ctcr = ctcr;
+	*mdma_ctbr = ctbr;
+
+	return 0;
+}
+
+static void stm32_mdma_dump_hwdesc(struct stm32_mdma_chan *chan,
+				   struct stm32_mdma_desc_node *node)
+{
+	dev_dbg(chan2dev(chan), "hwdesc:  %pad\n", &node->hwdesc_phys);
+	dev_dbg(chan2dev(chan), "CTCR:    0x%08x\n", node->hwdesc->ctcr);
+	dev_dbg(chan2dev(chan), "CBNDTR:  0x%08x\n", node->hwdesc->cbndtr);
+	dev_dbg(chan2dev(chan), "CSAR:    0x%08x\n", node->hwdesc->csar);
+	dev_dbg(chan2dev(chan), "CDAR:    0x%08x\n", node->hwdesc->cdar);
+	dev_dbg(chan2dev(chan), "CBRUR:   0x%08x\n", node->hwdesc->cbrur);
+	dev_dbg(chan2dev(chan), "CLAR:    0x%08x\n", node->hwdesc->clar);
+	dev_dbg(chan2dev(chan), "CTBR:    0x%08x\n", node->hwdesc->ctbr);
+	dev_dbg(chan2dev(chan), "CMAR:    0x%08x\n", node->hwdesc->cmar);
+	dev_dbg(chan2dev(chan), "CMDR:    0x%08x\n\n", node->hwdesc->cmdr);
+}
+
+static void stm32_mdma_setup_hwdesc(struct stm32_mdma_chan *chan,
+				    struct stm32_mdma_desc *desc,
+				    enum dma_transfer_direction dir, u32 count,
+				    dma_addr_t src_addr, dma_addr_t dst_addr,
+				    u32 len, u32 ctcr, u32 ctbr, bool is_last,
+				    bool is_first, bool is_cyclic)
+{
+	struct stm32_mdma_chan_config *config = &chan->chan_config;
+	struct stm32_mdma_hwdesc *hwdesc;
+	u32 next = count + 1;
+
+	hwdesc = desc->node[count].hwdesc;
+	hwdesc->ctcr = ctcr;
+	hwdesc->cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK |
+			STM32_MDMA_CBNDTR_BRDUM |
+			STM32_MDMA_CBNDTR_BRSUM |
+			STM32_MDMA_CBNDTR_BNDT_MASK);
+	hwdesc->cbndtr |= STM32_MDMA_CBNDTR_BNDT(len);
+	hwdesc->csar = src_addr;
+	hwdesc->cdar = dst_addr;
+	hwdesc->cbrur = 0;
+	hwdesc->ctbr = ctbr;
+	hwdesc->cmar = config->mask_addr;
+	hwdesc->cmdr = config->mask_data;
+
+	if (is_last) {
+		if (is_cyclic)
+			hwdesc->clar = desc->node[0].hwdesc_phys;
+		else
+			hwdesc->clar = 0;
+	} else {
+		hwdesc->clar = desc->node[next].hwdesc_phys;
+	}
+
+	stm32_mdma_dump_hwdesc(chan, &desc->node[count]);
+}
+
+static int stm32_mdma_setup_xfer(struct stm32_mdma_chan *chan,
+				 struct stm32_mdma_desc *desc,
+				 struct scatterlist *sgl, u32 sg_len,
+				 enum dma_transfer_direction direction)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct dma_slave_config *dma_config = &chan->dma_config;
+	struct scatterlist *sg;
+	dma_addr_t src_addr, dst_addr;
+	u32 ccr, ctcr, ctbr;
+	int i, ret = 0;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		if (sg_dma_len(sg) > STM32_MDMA_MAX_BLOCK_LEN) {
+			dev_err(chan2dev(chan), "Invalid block len\n");
+			return -EINVAL;
+		}
+
+		if (direction == DMA_MEM_TO_DEV) {
+			src_addr = sg_dma_address(sg);
+			dst_addr = dma_config->dst_addr;
+			ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
+							&ctcr, &ctbr, src_addr,
+							sg_dma_len(sg));
+			stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+					   src_addr);
+		} else {
+			src_addr = dma_config->src_addr;
+			dst_addr = sg_dma_address(sg);
+			ret = stm32_mdma_set_xfer_param(chan, direction, &ccr,
+							&ctcr, &ctbr, dst_addr,
+							sg_dma_len(sg));
+			stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+					   dst_addr);
+		}
+
+		if (ret < 0)
+			return ret;
+
+		stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr,
+					dst_addr, sg_dma_len(sg), ctcr, ctbr,
+					i == sg_len - 1, i == 0, false);
+	}
+
+	/* Enable interrupts */
+	ccr &= ~STM32_MDMA_CCR_IRQ_MASK;
+	ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE;
+	if (sg_len > 1)
+		ccr |= STM32_MDMA_CCR_BTIE;
+	desc->ccr = ccr;
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_slave_sg(struct dma_chan *c, struct scatterlist *sgl,
+			 u32 sg_len, enum dma_transfer_direction direction,
+			 unsigned long flags, void *context)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_desc *desc;
+	int i, ret;
+
+	/*
+	 * Once DMA is in setup cyclic mode the channel we cannot assign this
+	 * channel anymore. The DMA channel needs to be aborted or terminated
+	 * for allowing another request.
+	 */
+	if (chan->desc && chan->desc->cyclic) {
+		dev_err(chan2dev(chan),
+			"Request not allowed when dma in cyclic mode\n");
+		return NULL;
+	}
+
+	desc = stm32_mdma_alloc_desc(chan, sg_len);
+	if (!desc)
+		return NULL;
+
+	ret = stm32_mdma_setup_xfer(chan, desc, sgl, sg_len, direction);
+	if (ret < 0)
+		goto xfer_setup_err;
+
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+xfer_setup_err:
+	for (i = 0; i < desc->count; i++)
+		dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+			      desc->node[i].hwdesc_phys);
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_dma_cyclic(struct dma_chan *c, dma_addr_t buf_addr,
+			   size_t buf_len, size_t period_len,
+			   enum dma_transfer_direction direction,
+			   unsigned long flags)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct dma_slave_config *dma_config = &chan->dma_config;
+	struct stm32_mdma_desc *desc;
+	dma_addr_t src_addr, dst_addr;
+	u32 ccr, ctcr, ctbr, count;
+	int i, ret;
+
+	/*
+	 * Once DMA is in setup cyclic mode the channel we cannot assign this
+	 * channel anymore. The DMA channel needs to be aborted or terminated
+	 * for allowing another request.
+	 */
+	if (chan->desc && chan->desc->cyclic) {
+		dev_err(chan2dev(chan),
+			"Request not allowed when dma in cyclic mode\n");
+		return NULL;
+	}
+
+	if (!buf_len || !period_len || period_len > STM32_MDMA_MAX_BLOCK_LEN) {
+		dev_err(chan2dev(chan), "Invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (buf_len % period_len) {
+		dev_err(chan2dev(chan), "buf_len not multiple of period_len\n");
+		return NULL;
+	}
+
+	count = buf_len / period_len;
+
+	desc = stm32_mdma_alloc_desc(chan, count);
+	if (!desc)
+		return NULL;
+
+	/* Select bus */
+	if (direction == DMA_MEM_TO_DEV) {
+		src_addr = buf_addr;
+		ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr,
+						&ctbr, src_addr, period_len);
+		stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS,
+				   src_addr);
+	} else {
+		dst_addr = buf_addr;
+		ret = stm32_mdma_set_xfer_param(chan, direction, &ccr, &ctcr,
+						&ctbr, dst_addr, period_len);
+		stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS,
+				   dst_addr);
+	}
+
+	if (ret < 0)
+		goto xfer_setup_err;
+
+	/* Enable interrupts */
+	ccr &= ~STM32_MDMA_CCR_IRQ_MASK;
+	ccr |= STM32_MDMA_CCR_TEIE | STM32_MDMA_CCR_CTCIE | STM32_MDMA_CCR_BTIE;
+	desc->ccr = ccr;
+
+	/* Configure hwdesc list */
+	for (i = 0; i < count; i++) {
+		if (direction == DMA_MEM_TO_DEV) {
+			src_addr = buf_addr + i * period_len;
+			dst_addr = dma_config->dst_addr;
+		} else {
+			src_addr = dma_config->src_addr;
+			dst_addr = buf_addr + i * period_len;
+		}
+
+		stm32_mdma_setup_hwdesc(chan, desc, direction, i, src_addr,
+					dst_addr, period_len, ctcr, ctbr,
+					i == count - 1, i == 0, true);
+	}
+
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+
+xfer_setup_err:
+	for (i = 0; i < desc->count; i++)
+		dma_pool_free(chan->desc_pool, desc->node[i].hwdesc,
+			      desc->node[i].hwdesc_phys);
+	kfree(desc);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+stm32_mdma_prep_dma_memcpy(struct dma_chan *c, dma_addr_t dest, dma_addr_t src,
+			   size_t len, unsigned long flags)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	enum dma_slave_buswidth max_width;
+	struct stm32_mdma_desc *desc;
+	struct stm32_mdma_hwdesc *hwdesc;
+	u32 ccr, ctcr, ctbr, cbndtr, count, max_burst, mdma_burst;
+	u32 best_burst, tlen;
+	size_t xfer_count, offset;
+	int src_bus_width, dst_bus_width;
+	int i;
+
+	/*
+	 * Once DMA is in setup cyclic mode the channel we cannot assign this
+	 * channel anymore. The DMA channel needs to be aborted or terminated
+	 * to allow another request
+	 */
+	if (chan->desc && chan->desc->cyclic) {
+		dev_err(chan2dev(chan),
+			"Request not allowed when dma in cyclic mode\n");
+		return NULL;
+	}
+
+	count = DIV_ROUND_UP(len, STM32_MDMA_MAX_BLOCK_LEN);
+	desc = stm32_mdma_alloc_desc(chan, count);
+	if (!desc)
+		return NULL;
+
+	ccr = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id));
+	ctcr = stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id));
+	ctbr = stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id));
+	cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
+
+	/* Enable sw req, some interrupts and clear other bits */
+	ccr &= ~(STM32_MDMA_CCR_WEX | STM32_MDMA_CCR_HEX |
+		 STM32_MDMA_CCR_BEX | STM32_MDMA_CCR_PL_MASK |
+		 STM32_MDMA_CCR_IRQ_MASK);
+	ccr |= STM32_MDMA_CCR_TEIE;
+
+	/* Enable SW request mode, dest/src inc and clear other bits */
+	ctcr &= ~(STM32_MDMA_CTCR_BWM | STM32_MDMA_CTCR_TRGM_MSK |
+		  STM32_MDMA_CTCR_PAM_MASK | STM32_MDMA_CTCR_PKE |
+		  STM32_MDMA_CTCR_TLEN_MSK | STM32_MDMA_CTCR_DBURST_MASK |
+		  STM32_MDMA_CTCR_SBURST_MASK | STM32_MDMA_CTCR_DINCOS_MASK |
+		  STM32_MDMA_CTCR_SINCOS_MASK | STM32_MDMA_CTCR_DSIZE_MASK |
+		  STM32_MDMA_CTCR_SSIZE_MASK | STM32_MDMA_CTCR_DINC_MASK |
+		  STM32_MDMA_CTCR_SINC_MASK);
+	ctcr |= STM32_MDMA_CTCR_SWRM | STM32_MDMA_CTCR_SINC(STM32_MDMA_INC) |
+		STM32_MDMA_CTCR_DINC(STM32_MDMA_INC);
+
+	/* Reset HW request */
+	ctbr &= ~STM32_MDMA_CTBR_TSEL_MASK;
+
+	/* Select bus */
+	stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_SBUS, src);
+	stm32_mdma_set_bus(dmadev, &ctbr, STM32_MDMA_CTBR_DBUS, dest);
+
+	/* Clear CBNDTR registers */
+	cbndtr &= ~(STM32_MDMA_CBNDTR_BRC_MK | STM32_MDMA_CBNDTR_BRDUM |
+			STM32_MDMA_CBNDTR_BRSUM | STM32_MDMA_CBNDTR_BNDT_MASK);
+
+	if (len <= STM32_MDMA_MAX_BLOCK_LEN) {
+		cbndtr |= STM32_MDMA_CBNDTR_BNDT(len);
+		if (len <= STM32_MDMA_MAX_BUF_LEN) {
+			/* Setup a buffer transfer */
+			ccr |= STM32_MDMA_CCR_TCIE | STM32_MDMA_CCR_CTCIE;
+			ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BUFFER);
+		} else {
+			/* Setup a block transfer */
+			ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE;
+			ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_BLOCK);
+		}
+
+		tlen = STM32_MDMA_MAX_BUF_LEN;
+		ctcr |= STM32_MDMA_CTCR_TLEN((tlen - 1));
+
+		/* Set source best burst size */
+		max_width = stm32_mdma_get_max_width(src, len, tlen);
+		src_bus_width = stm32_mdma_get_width(chan, max_width);
+
+		max_burst = tlen / max_width;
+		best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst,
+						       max_width);
+		mdma_burst = ilog2(best_burst);
+
+		ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) |
+			STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+			STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+		/* Set destination best burst size */
+		max_width = stm32_mdma_get_max_width(dest, len, tlen);
+		dst_bus_width = stm32_mdma_get_width(chan, max_width);
+
+		max_burst = tlen / max_width;
+		best_burst = stm32_mdma_get_best_burst(len, tlen, max_burst,
+						       max_width);
+		mdma_burst = ilog2(best_burst);
+
+		ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) |
+			STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+			STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+		if (dst_bus_width != src_bus_width)
+			ctcr |= STM32_MDMA_CTCR_PKE;
+
+		/* Prepare hardware descriptor */
+		hwdesc = desc->node[0].hwdesc;
+		hwdesc->ctcr = ctcr;
+		hwdesc->cbndtr = cbndtr;
+		hwdesc->csar = src;
+		hwdesc->cdar = dest;
+		hwdesc->cbrur = 0;
+		hwdesc->clar = 0;
+		hwdesc->ctbr = ctbr;
+		hwdesc->cmar = 0;
+		hwdesc->cmdr = 0;
+
+		stm32_mdma_dump_hwdesc(chan, &desc->node[0]);
+	} else {
+		/* Setup a LLI transfer */
+		ctcr |= STM32_MDMA_CTCR_TRGM(STM32_MDMA_LINKED_LIST) |
+			STM32_MDMA_CTCR_TLEN((STM32_MDMA_MAX_BUF_LEN - 1));
+		ccr |= STM32_MDMA_CCR_BTIE | STM32_MDMA_CCR_CTCIE;
+		tlen = STM32_MDMA_MAX_BUF_LEN;
+
+		for (i = 0, offset = 0; offset < len;
+		     i++, offset += xfer_count) {
+			xfer_count = min_t(size_t, len - offset,
+					   STM32_MDMA_MAX_BLOCK_LEN);
+
+			/* Set source best burst size */
+			max_width = stm32_mdma_get_max_width(src, len, tlen);
+			src_bus_width = stm32_mdma_get_width(chan, max_width);
+
+			max_burst = tlen / max_width;
+			best_burst = stm32_mdma_get_best_burst(len, tlen,
+							       max_burst,
+							       max_width);
+			mdma_burst = ilog2(best_burst);
+
+			ctcr |= STM32_MDMA_CTCR_SBURST(mdma_burst) |
+				STM32_MDMA_CTCR_SSIZE(src_bus_width) |
+				STM32_MDMA_CTCR_SINCOS(src_bus_width);
+
+			/* Set destination best burst size */
+			max_width = stm32_mdma_get_max_width(dest, len, tlen);
+			dst_bus_width = stm32_mdma_get_width(chan, max_width);
+
+			max_burst = tlen / max_width;
+			best_burst = stm32_mdma_get_best_burst(len, tlen,
+							       max_burst,
+							       max_width);
+			mdma_burst = ilog2(best_burst);
+
+			ctcr |= STM32_MDMA_CTCR_DBURST(mdma_burst) |
+				STM32_MDMA_CTCR_DSIZE(dst_bus_width) |
+				STM32_MDMA_CTCR_DINCOS(dst_bus_width);
+
+			if (dst_bus_width != src_bus_width)
+				ctcr |= STM32_MDMA_CTCR_PKE;
+
+			/* Prepare hardware descriptor */
+			stm32_mdma_setup_hwdesc(chan, desc, DMA_MEM_TO_MEM, i,
+						src + offset, dest + offset,
+						xfer_count, ctcr, ctbr,
+						i == count - 1, i == 0, false);
+		}
+	}
+
+	desc->ccr = ccr;
+
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static void stm32_mdma_dump_reg(struct stm32_mdma_chan *chan)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+
+	dev_dbg(chan2dev(chan), "CCR:     0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id)));
+	dev_dbg(chan2dev(chan), "CTCR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CTCR(chan->id)));
+	dev_dbg(chan2dev(chan), "CBNDTR:  0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id)));
+	dev_dbg(chan2dev(chan), "CSAR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CSAR(chan->id)));
+	dev_dbg(chan2dev(chan), "CDAR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CDAR(chan->id)));
+	dev_dbg(chan2dev(chan), "CBRUR:   0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CBRUR(chan->id)));
+	dev_dbg(chan2dev(chan), "CLAR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CLAR(chan->id)));
+	dev_dbg(chan2dev(chan), "CTBR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CTBR(chan->id)));
+	dev_dbg(chan2dev(chan), "CMAR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CMAR(chan->id)));
+	dev_dbg(chan2dev(chan), "CMDR:    0x%08x\n",
+		stm32_mdma_read(dmadev, STM32_MDMA_CMDR(chan->id)));
+}
+
+static void stm32_mdma_start_transfer(struct stm32_mdma_chan *chan)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct virt_dma_desc *vdesc;
+	struct stm32_mdma_hwdesc *hwdesc;
+	u32 id = chan->id;
+	u32 status, reg;
+
+	vdesc = vchan_next_desc(&chan->vchan);
+	if (!vdesc) {
+		chan->desc = NULL;
+		return;
+	}
+
+	chan->desc = to_stm32_mdma_desc(vdesc);
+	hwdesc = chan->desc->node[0].hwdesc;
+	chan->curr_hwdesc = 0;
+
+	stm32_mdma_write(dmadev, STM32_MDMA_CCR(id), chan->desc->ccr);
+	stm32_mdma_write(dmadev, STM32_MDMA_CTCR(id), hwdesc->ctcr);
+	stm32_mdma_write(dmadev, STM32_MDMA_CBNDTR(id), hwdesc->cbndtr);
+	stm32_mdma_write(dmadev, STM32_MDMA_CSAR(id), hwdesc->csar);
+	stm32_mdma_write(dmadev, STM32_MDMA_CDAR(id), hwdesc->cdar);
+	stm32_mdma_write(dmadev, STM32_MDMA_CBRUR(id), hwdesc->cbrur);
+	stm32_mdma_write(dmadev, STM32_MDMA_CLAR(id), hwdesc->clar);
+	stm32_mdma_write(dmadev, STM32_MDMA_CTBR(id), hwdesc->ctbr);
+	stm32_mdma_write(dmadev, STM32_MDMA_CMAR(id), hwdesc->cmar);
+	stm32_mdma_write(dmadev, STM32_MDMA_CMDR(id), hwdesc->cmdr);
+
+	/* Clear interrupt status if it is there */
+	status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(id));
+	if (status)
+		stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(id), status);
+
+	stm32_mdma_dump_reg(chan);
+
+	/* Start DMA */
+	stm32_mdma_set_bits(dmadev, STM32_MDMA_CCR(id), STM32_MDMA_CCR_EN);
+
+	/* Set SW request in case of MEM2MEM transfer */
+	if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM) {
+		reg = STM32_MDMA_CCR(id);
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ);
+	}
+
+	chan->busy = true;
+
+	dev_dbg(chan2dev(chan), "vchan %pK: started\n", &chan->vchan);
+}
+
+static void stm32_mdma_issue_pending(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	if (!vchan_issue_pending(&chan->vchan))
+		goto end;
+
+	dev_dbg(chan2dev(chan), "vchan %pK: issued\n", &chan->vchan);
+
+	if (!chan->desc && !chan->busy)
+		stm32_mdma_start_transfer(chan);
+
+end:
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int stm32_mdma_pause(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	ret = stm32_mdma_disable_chan(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	if (!ret)
+		dev_dbg(chan2dev(chan), "vchan %pK: pause\n", &chan->vchan);
+
+	return ret;
+}
+
+static int stm32_mdma_resume(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct stm32_mdma_hwdesc *hwdesc;
+	unsigned long flags;
+	u32 status, reg;
+
+	hwdesc = chan->desc->node[chan->curr_hwdesc].hwdesc;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	/* Re-configure control register */
+	stm32_mdma_write(dmadev, STM32_MDMA_CCR(chan->id), chan->desc->ccr);
+
+	/* Clear interrupt status if it is there */
+	status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+	if (status)
+		stm32_mdma_set_bits(dmadev, STM32_MDMA_CIFCR(chan->id), status);
+
+	stm32_mdma_dump_reg(chan);
+
+	/* Re-start DMA */
+	reg = STM32_MDMA_CCR(chan->id);
+	stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_EN);
+
+	/* Set SW request in case of MEM2MEM transfer */
+	if (hwdesc->ctcr & STM32_MDMA_CTCR_SWRM)
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CCR_SWRQ);
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	dev_dbg(chan2dev(chan), "vchan %pK: resume\n", &chan->vchan);
+
+	return 0;
+}
+
+static int stm32_mdma_terminate_all(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (chan->busy) {
+		stm32_mdma_stop(chan);
+		chan->desc = NULL;
+	}
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void stm32_mdma_synchronize(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
+static int stm32_mdma_slave_config(struct dma_chan *c,
+				   struct dma_slave_config *config)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+
+	memcpy(&chan->dma_config, config, sizeof(*config));
+
+	return 0;
+}
+
+static size_t stm32_mdma_desc_residue(struct stm32_mdma_chan *chan,
+				      struct stm32_mdma_desc *desc,
+				      u32 curr_hwdesc)
+{
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	struct stm32_mdma_hwdesc *hwdesc = desc->node[0].hwdesc;
+	u32 cbndtr, residue, modulo, burst_size;
+	int i;
+
+	residue = 0;
+	for (i = curr_hwdesc + 1; i < desc->count; i++) {
+		hwdesc = desc->node[i].hwdesc;
+		residue += STM32_MDMA_CBNDTR_BNDT(hwdesc->cbndtr);
+	}
+	cbndtr = stm32_mdma_read(dmadev, STM32_MDMA_CBNDTR(chan->id));
+	residue += cbndtr & STM32_MDMA_CBNDTR_BNDT_MASK;
+
+	if (!chan->mem_burst)
+		return residue;
+
+	burst_size = chan->mem_burst * chan->mem_width;
+	modulo = residue % burst_size;
+	if (modulo)
+		residue = residue - modulo + burst_size;
+
+	return residue;
+}
+
+static enum dma_status stm32_mdma_tx_status(struct dma_chan *c,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *state)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	unsigned long flags;
+	u32 residue = 0;
+
+	status = dma_cookie_status(c, cookie, state);
+	if ((status == DMA_COMPLETE) || (!state))
+		return status;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+
+	vdesc = vchan_find_desc(&chan->vchan, cookie);
+	if (chan->desc && cookie == chan->desc->vdesc.tx.cookie)
+		residue = stm32_mdma_desc_residue(chan, chan->desc,
+						  chan->curr_hwdesc);
+	else if (vdesc)
+		residue = stm32_mdma_desc_residue(chan,
+						  to_stm32_mdma_desc(vdesc), 0);
+	dma_set_residue(state, residue);
+
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	return status;
+}
+
+static void stm32_mdma_xfer_end(struct stm32_mdma_chan *chan)
+{
+	list_del(&chan->desc->vdesc.node);
+	vchan_cookie_complete(&chan->desc->vdesc);
+	chan->desc = NULL;
+	chan->busy = false;
+
+	/* Start the next transfer if this driver has a next desc */
+	stm32_mdma_start_transfer(chan);
+}
+
+static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid)
+{
+	struct stm32_mdma_device *dmadev = devid;
+	struct stm32_mdma_chan *chan = devid;
+	u32 reg, id, ien, status, flag;
+
+	/* Find out which channel generates the interrupt */
+	status = readl_relaxed(dmadev->base + STM32_MDMA_GISR0);
+	if (status) {
+		id = __ffs(status);
+	} else {
+		status = readl_relaxed(dmadev->base + STM32_MDMA_GISR1);
+		if (!status) {
+			dev_dbg(mdma2dev(dmadev), "spurious it\n");
+			return IRQ_NONE;
+		}
+		id = __ffs(status);
+		/*
+		 * As GISR0 provides status for channel id from 0 to 31,
+		 * so GISR1 provides status for channel id from 32 to 62
+		 */
+		id += 32;
+	}
+
+	chan = &dmadev->chan[id];
+	if (!chan) {
+		dev_err(chan2dev(chan), "MDMA channel not initialized\n");
+		goto exit;
+	}
+
+	/* Handle interrupt for the channel */
+	spin_lock(&chan->vchan.lock);
+	status = stm32_mdma_read(dmadev, STM32_MDMA_CISR(chan->id));
+	ien = stm32_mdma_read(dmadev, STM32_MDMA_CCR(chan->id));
+	ien &= STM32_MDMA_CCR_IRQ_MASK;
+	ien >>= 1;
+
+	if (!(status & ien)) {
+		spin_unlock(&chan->vchan.lock);
+		dev_dbg(chan2dev(chan),
+			"spurious it (status=0x%04x, ien=0x%04x)\n",
+			status, ien);
+		return IRQ_NONE;
+	}
+
+	flag = __ffs(status & ien);
+	reg = STM32_MDMA_CIFCR(chan->id);
+
+	switch (1 << flag) {
+	case STM32_MDMA_CISR_TEIF:
+		id = chan->id;
+		status = readl_relaxed(dmadev->base + STM32_MDMA_CESR(id));
+		dev_err(chan2dev(chan), "Transfer Err: stat=0x%08x\n", status);
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CTEIF);
+		break;
+
+	case STM32_MDMA_CISR_CTCIF:
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CCTCIF);
+		stm32_mdma_xfer_end(chan);
+		break;
+
+	case STM32_MDMA_CISR_BRTIF:
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBRTIF);
+		break;
+
+	case STM32_MDMA_CISR_BTIF:
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CBTIF);
+		chan->curr_hwdesc++;
+		if (chan->desc && chan->desc->cyclic) {
+			if (chan->curr_hwdesc == chan->desc->count)
+				chan->curr_hwdesc = 0;
+			vchan_cyclic_callback(&chan->desc->vdesc);
+		}
+		break;
+
+	case STM32_MDMA_CISR_TCIF:
+		stm32_mdma_set_bits(dmadev, reg, STM32_MDMA_CIFCR_CLTCIF);
+		break;
+
+	default:
+		dev_err(chan2dev(chan), "it %d unhandled (status=0x%04x)\n",
+			1 << flag, status);
+	}
+
+	spin_unlock(&chan->vchan.lock);
+
+exit:
+	return IRQ_HANDLED;
+}
+
+static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	int ret;
+
+	chan->desc_pool = dmam_pool_create(dev_name(&c->dev->device),
+					   c->device->dev,
+					   sizeof(struct stm32_mdma_hwdesc),
+					  __alignof__(struct stm32_mdma_hwdesc),
+					   0);
+	if (!chan->desc_pool) {
+		dev_err(chan2dev(chan), "failed to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret < 0) {
+		dev_err(chan2dev(chan), "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = stm32_mdma_disable_chan(chan);
+	if (ret < 0)
+		clk_disable_unprepare(dmadev->clk);
+
+	return ret;
+}
+
+static void stm32_mdma_free_chan_resources(struct dma_chan *c)
+{
+	struct stm32_mdma_chan *chan = to_stm32_mdma_chan(c);
+	struct stm32_mdma_device *dmadev = stm32_mdma_get_dev(chan);
+	unsigned long flags;
+
+	dev_dbg(chan2dev(chan), "Freeing channel %d\n", chan->id);
+
+	if (chan->busy) {
+		spin_lock_irqsave(&chan->vchan.lock, flags);
+		stm32_mdma_stop(chan);
+		chan->desc = NULL;
+		spin_unlock_irqrestore(&chan->vchan.lock, flags);
+	}
+
+	clk_disable_unprepare(dmadev->clk);
+	vchan_free_chan_resources(to_virt_chan(c));
+	dmam_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+static struct dma_chan *stm32_mdma_of_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct stm32_mdma_device *dmadev = ofdma->of_dma_data;
+	struct stm32_mdma_chan *chan;
+	struct dma_chan *c;
+	struct stm32_mdma_chan_config config;
+
+	if (dma_spec->args_count < 5) {
+		dev_err(mdma2dev(dmadev), "Bad number of args\n");
+		return NULL;
+	}
+
+	config.request = dma_spec->args[0];
+	config.priority_level = dma_spec->args[1];
+	config.transfer_config = dma_spec->args[2];
+	config.mask_addr = dma_spec->args[3];
+	config.mask_data = dma_spec->args[4];
+
+	if (config.request >= dmadev->nr_requests) {
+		dev_err(mdma2dev(dmadev), "Bad request line\n");
+		return NULL;
+	}
+
+	if (config.priority_level > STM32_MDMA_VERY_HIGH_PRIORITY) {
+		dev_err(mdma2dev(dmadev), "Priority level not supported\n");
+		return NULL;
+	}
+
+	c = dma_get_any_slave_channel(&dmadev->ddev);
+	if (!c) {
+		dev_err(mdma2dev(dmadev), "No more channels available\n");
+		return NULL;
+	}
+
+	chan = to_stm32_mdma_chan(c);
+	chan->chan_config = config;
+
+	return c;
+}
+
+static const struct of_device_id stm32_mdma_of_match[] = {
+	{ .compatible = "st,stm32h7-mdma", },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, stm32_mdma_of_match);
+
+static int stm32_mdma_probe(struct platform_device *pdev)
+{
+	struct stm32_mdma_chan *chan;
+	struct stm32_mdma_device *dmadev;
+	struct dma_device *dd;
+	struct device_node *of_node;
+	struct resource *res;
+	u32 nr_channels, nr_requests;
+	int i, count, ret;
+
+	of_node = pdev->dev.of_node;
+	if (!of_node)
+		return -ENODEV;
+
+	ret = device_property_read_u32(&pdev->dev, "dma-channels",
+				       &nr_channels);
+	if (ret) {
+		nr_channels = STM32_MDMA_MAX_CHANNELS;
+		dev_warn(&pdev->dev, "MDMA defaulting on %i channels\n",
+			 nr_channels);
+	}
+
+	ret = device_property_read_u32(&pdev->dev, "dma-requests",
+				       &nr_requests);
+	if (ret) {
+		nr_requests = STM32_MDMA_MAX_REQUESTS;
+		dev_warn(&pdev->dev, "MDMA defaulting on %i request lines\n",
+			 nr_requests);
+	}
+
+	count = device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
+					       NULL, 0);
+	if (count < 0)
+		count = 0;
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev) + sizeof(u32) * count,
+			      GFP_KERNEL);
+	if (!dmadev)
+		return -ENOMEM;
+
+	dmadev->nr_channels = nr_channels;
+	dmadev->nr_requests = nr_requests;
+	device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
+				       dmadev->ahb_addr_masks,
+				       count);
+	dmadev->nr_ahb_addr_masks = count;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmadev->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmadev->base))
+		return PTR_ERR(dmadev->base);
+
+	dmadev->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmadev->clk)) {
+		ret = PTR_ERR(dmadev->clk);
+		if (ret == -EPROBE_DEFER)
+			dev_info(&pdev->dev, "Missing controller clock\n");
+		return ret;
+	}
+
+	dmadev->rst = devm_reset_control_get(&pdev->dev, NULL);
+	if (!IS_ERR(dmadev->rst)) {
+		reset_control_assert(dmadev->rst);
+		udelay(2);
+		reset_control_deassert(dmadev->rst);
+	}
+
+	dd = &dmadev->ddev;
+	dma_cap_set(DMA_SLAVE, dd->cap_mask);
+	dma_cap_set(DMA_PRIVATE, dd->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+	dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+	dd->device_alloc_chan_resources = stm32_mdma_alloc_chan_resources;
+	dd->device_free_chan_resources = stm32_mdma_free_chan_resources;
+	dd->device_tx_status = stm32_mdma_tx_status;
+	dd->device_issue_pending = stm32_mdma_issue_pending;
+	dd->device_prep_slave_sg = stm32_mdma_prep_slave_sg;
+	dd->device_prep_dma_cyclic = stm32_mdma_prep_dma_cyclic;
+	dd->device_prep_dma_memcpy = stm32_mdma_prep_dma_memcpy;
+	dd->device_config = stm32_mdma_slave_config;
+	dd->device_pause = stm32_mdma_pause;
+	dd->device_resume = stm32_mdma_resume;
+	dd->device_terminate_all = stm32_mdma_terminate_all;
+	dd->device_synchronize = stm32_mdma_synchronize;
+	dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+	dd->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
+		BIT(DMA_MEM_TO_MEM);
+	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->max_burst = STM32_MDMA_MAX_BURST;
+	dd->dev = &pdev->dev;
+	INIT_LIST_HEAD(&dd->channels);
+
+	for (i = 0; i < dmadev->nr_channels; i++) {
+		chan = &dmadev->chan[i];
+		chan->id = i;
+		chan->vchan.desc_free = stm32_mdma_desc_free;
+		vchan_init(&chan->vchan, dd);
+	}
+
+	dmadev->irq = platform_get_irq(pdev, 0);
+	if (dmadev->irq < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ\n");
+		return dmadev->irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, dmadev->irq, stm32_mdma_irq_handler,
+			       0, dev_name(&pdev->dev), dmadev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request IRQ\n");
+		return ret;
+	}
+
+	ret = dma_async_device_register(dd);
+	if (ret)
+		return ret;
+
+	ret = of_dma_controller_register(of_node, stm32_mdma_of_xlate, dmadev);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"STM32 MDMA DMA OF registration failed %d\n", ret);
+		goto err_unregister;
+	}
+
+	platform_set_drvdata(pdev, dmadev);
+
+	dev_info(&pdev->dev, "STM32 MDMA driver registered\n");
+
+	return 0;
+
+err_unregister:
+	dma_async_device_unregister(dd);
+
+	return ret;
+}
+
+static struct platform_driver stm32_mdma_driver = {
+	.probe = stm32_mdma_probe,
+	.driver = {
+		.name = "stm32-mdma",
+		.of_match_table = stm32_mdma_of_match,
+	},
+};
+
+static int __init stm32_mdma_init(void)
+{
+	return platform_driver_register(&stm32_mdma_driver);
+}
+
+subsys_initcall(stm32_mdma_init);
+
+MODULE_DESCRIPTION("Driver for STM32 MDMA controller");
+MODULE_AUTHOR("M'boumba Cedric Madianga <cedric.madianga@gmail.com>");
+MODULE_AUTHOR("Pierre-Yves Mordret <pierre-yves.mordret@st.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
new file mode 100644
index 0000000..f4ed3f1
--- /dev/null
+++ b/drivers/dma/sun4i-dma.c
@@ -0,0 +1,1289 @@
+/*
+ * Copyright (C) 2014 Emilio López
+ * Emilio López <emilio@elopez.com.ar>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+/** Common macros to normal and dedicated DMA registers **/
+
+#define SUN4I_DMA_CFG_LOADING			BIT(31)
+#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width)	((width) << 25)
+#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len)	((len) << 23)
+#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode)	((mode) << 21)
+#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type)	((type) << 16)
+#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width)	((width) << 9)
+#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len)	((len) << 7)
+#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode)	((mode) << 5)
+#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type)	(type)
+
+/** Normal DMA register values **/
+
+/* Normal DMA source/destination data request type values */
+#define SUN4I_NDMA_DRQ_TYPE_SDRAM		0x16
+#define SUN4I_NDMA_DRQ_TYPE_LIMIT		(0x1F + 1)
+
+/** Normal DMA register layout **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_NDMA_ADDR_MODE_LINEAR		0
+#define SUN4I_NDMA_ADDR_MODE_IO			1
+
+/* Normal DMA configuration register layout */
+#define SUN4I_NDMA_CFG_CONT_MODE		BIT(30)
+#define SUN4I_NDMA_CFG_WAIT_STATE(n)		((n) << 27)
+#define SUN4I_NDMA_CFG_DST_NON_SECURE		BIT(22)
+#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN	BIT(15)
+#define SUN4I_NDMA_CFG_SRC_NON_SECURE		BIT(6)
+
+/** Dedicated DMA register values **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_DDMA_ADDR_MODE_LINEAR		0
+#define SUN4I_DDMA_ADDR_MODE_IO			1
+#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE	2
+#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE	3
+
+/* Dedicated DMA source/destination data request type values */
+#define SUN4I_DDMA_DRQ_TYPE_SDRAM		0x1
+#define SUN4I_DDMA_DRQ_TYPE_LIMIT		(0x1F + 1)
+
+/** Dedicated DMA register layout **/
+
+/* Dedicated DMA configuration register layout */
+#define SUN4I_DDMA_CFG_BUSY			BIT(30)
+#define SUN4I_DDMA_CFG_CONT_MODE		BIT(29)
+#define SUN4I_DDMA_CFG_DST_NON_SECURE		BIT(28)
+#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN	BIT(15)
+#define SUN4I_DDMA_CFG_SRC_NON_SECURE		BIT(12)
+
+/* Dedicated DMA parameter register layout */
+#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n)	(((n) - 1) << 24)
+#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n)	(((n) - 1) << 16)
+#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n)	(((n) - 1) << 8)
+#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n)	(((n) - 1) << 0)
+
+/** DMA register offsets **/
+
+/* General register offsets */
+#define SUN4I_DMA_IRQ_ENABLE_REG		0x0
+#define SUN4I_DMA_IRQ_PENDING_STATUS_REG	0x4
+
+/* Normal DMA register offsets */
+#define SUN4I_NDMA_CHANNEL_REG_BASE(n)		(0x100 + (n) * 0x20)
+#define SUN4I_NDMA_CFG_REG			0x0
+#define SUN4I_NDMA_SRC_ADDR_REG			0x4
+#define SUN4I_NDMA_DST_ADDR_REG		0x8
+#define SUN4I_NDMA_BYTE_COUNT_REG		0xC
+
+/* Dedicated DMA register offsets */
+#define SUN4I_DDMA_CHANNEL_REG_BASE(n)		(0x300 + (n) * 0x20)
+#define SUN4I_DDMA_CFG_REG			0x0
+#define SUN4I_DDMA_SRC_ADDR_REG			0x4
+#define SUN4I_DDMA_DST_ADDR_REG		0x8
+#define SUN4I_DDMA_BYTE_COUNT_REG		0xC
+#define SUN4I_DDMA_PARA_REG			0x18
+
+/** DMA Driver **/
+
+/*
+ * Normal DMA has 8 channels, and Dedicated DMA has another 8, so
+ * that's 16 channels. As for endpoints, there's 29 and 21
+ * respectively. Given that the Normal DMA endpoints (other than
+ * SDRAM) can be used as tx/rx, we need 78 vchans in total
+ */
+#define SUN4I_NDMA_NR_MAX_CHANNELS	8
+#define SUN4I_DDMA_NR_MAX_CHANNELS	8
+#define SUN4I_DMA_NR_MAX_CHANNELS					\
+	(SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS)
+#define SUN4I_NDMA_NR_MAX_VCHANS	(29 * 2 - 1)
+#define SUN4I_DDMA_NR_MAX_VCHANS	21
+#define SUN4I_DMA_NR_MAX_VCHANS						\
+	(SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS)
+
+/* This set of SUN4I_DDMA timing parameters were found experimentally while
+ * working with the SPI driver and seem to make it behave correctly */
+#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \
+	(SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) |			\
+	 SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) |				\
+	 SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) |				\
+	 SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2))
+
+struct sun4i_dma_pchan {
+	/* Register base of channel */
+	void __iomem			*base;
+	/* vchan currently being serviced */
+	struct sun4i_dma_vchan		*vchan;
+	/* Is this a dedicated pchan? */
+	int				is_dedicated;
+};
+
+struct sun4i_dma_vchan {
+	struct virt_dma_chan		vc;
+	struct dma_slave_config		cfg;
+	struct sun4i_dma_pchan		*pchan;
+	struct sun4i_dma_promise	*processing;
+	struct sun4i_dma_contract	*contract;
+	u8				endpoint;
+	int				is_dedicated;
+};
+
+struct sun4i_dma_promise {
+	u32				cfg;
+	u32				para;
+	dma_addr_t			src;
+	dma_addr_t			dst;
+	size_t				len;
+	struct list_head		list;
+};
+
+/* A contract is a set of promises */
+struct sun4i_dma_contract {
+	struct virt_dma_desc		vd;
+	struct list_head		demands;
+	struct list_head		completed_demands;
+	int				is_cyclic;
+};
+
+struct sun4i_dma_dev {
+	DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS);
+	struct dma_device		slave;
+	struct sun4i_dma_pchan		*pchans;
+	struct sun4i_dma_vchan		*vchans;
+	void __iomem			*base;
+	struct clk			*clk;
+	int				irq;
+	spinlock_t			lock;
+};
+
+static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev)
+{
+	return container_of(dev, struct sun4i_dma_dev, slave);
+}
+
+static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sun4i_dma_vchan, vc.chan);
+}
+
+static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct sun4i_dma_contract, vd);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static int convert_burst(u32 maxburst)
+{
+	if (maxburst > 8)
+		return -EINVAL;
+
+	/* 1 -> 0, 4 -> 1, 8 -> 2 */
+	return (maxburst >> 2);
+}
+
+static int convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)
+		return -EINVAL;
+
+	/* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */
+	return (addr_width >> 1);
+}
+
+static void sun4i_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv,
+						  struct sun4i_dma_vchan *vchan)
+{
+	struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans;
+	unsigned long flags;
+	int i, max;
+
+	/*
+	 * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and
+	 * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones
+	 */
+	if (vchan->is_dedicated) {
+		i = SUN4I_NDMA_NR_MAX_CHANNELS;
+		max = SUN4I_DMA_NR_MAX_CHANNELS;
+	} else {
+		i = 0;
+		max = SUN4I_NDMA_NR_MAX_CHANNELS;
+	}
+
+	spin_lock_irqsave(&priv->lock, flags);
+	for_each_clear_bit_from(i, priv->pchans_used, max) {
+		pchan = &pchans[i];
+		pchan->vchan = vchan;
+		set_bit(i, priv->pchans_used);
+		break;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return pchan;
+}
+
+static void release_pchan(struct sun4i_dma_dev *priv,
+			  struct sun4i_dma_pchan *pchan)
+{
+	unsigned long flags;
+	int nr = pchan - priv->pchans;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	pchan->vchan = NULL;
+	clear_bit(nr, priv->pchans_used);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void configure_pchan(struct sun4i_dma_pchan *pchan,
+			    struct sun4i_dma_promise *d)
+{
+	/*
+	 * Configure addresses and misc parameters depending on type
+	 * SUN4I_DDMA has an extra field with timing parameters
+	 */
+	if (pchan->is_dedicated) {
+		writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG);
+		writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG);
+		writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+		writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG);
+		writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG);
+	} else {
+		writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG);
+		writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG);
+		writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+		writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG);
+	}
+}
+
+static void set_pchan_interrupt(struct sun4i_dma_dev *priv,
+				struct sun4i_dma_pchan *pchan,
+				int half, int end)
+{
+	u32 reg;
+	int pchan_number = pchan - priv->pchans;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+	if (half)
+		reg |= BIT(pchan_number * 2);
+	else
+		reg &= ~BIT(pchan_number * 2);
+
+	if (end)
+		reg |= BIT(pchan_number * 2 + 1);
+	else
+		reg &= ~BIT(pchan_number * 2 + 1);
+
+	writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/**
+ * Execute pending operations on a vchan
+ *
+ * When given a vchan, this function will try to acquire a suitable
+ * pchan and, if successful, will configure it to fulfill a promise
+ * from the next pending contract.
+ *
+ * This function must be called with &vchan->vc.lock held.
+ */
+static int __execute_vchan_pending(struct sun4i_dma_dev *priv,
+				   struct sun4i_dma_vchan *vchan)
+{
+	struct sun4i_dma_promise *promise = NULL;
+	struct sun4i_dma_contract *contract = NULL;
+	struct sun4i_dma_pchan *pchan;
+	struct virt_dma_desc *vd;
+	int ret;
+
+	lockdep_assert_held(&vchan->vc.lock);
+
+	/* We need a pchan to do anything, so secure one if available */
+	pchan = find_and_use_pchan(priv, vchan);
+	if (!pchan)
+		return -EBUSY;
+
+	/*
+	 * Channel endpoints must not be repeated, so if this vchan
+	 * has already submitted some work, we can't do anything else
+	 */
+	if (vchan->processing) {
+		dev_dbg(chan2dev(&vchan->vc.chan),
+			"processing something to this endpoint already\n");
+		ret = -EBUSY;
+		goto release_pchan;
+	}
+
+	do {
+		/* Figure out which contract we're working with today */
+		vd = vchan_next_desc(&vchan->vc);
+		if (!vd) {
+			dev_dbg(chan2dev(&vchan->vc.chan),
+				"No pending contract found");
+			ret = 0;
+			goto release_pchan;
+		}
+
+		contract = to_sun4i_dma_contract(vd);
+		if (list_empty(&contract->demands)) {
+			/* The contract has been completed so mark it as such */
+			list_del(&contract->vd.node);
+			vchan_cookie_complete(&contract->vd);
+			dev_dbg(chan2dev(&vchan->vc.chan),
+				"Empty contract found and marked complete");
+		}
+	} while (list_empty(&contract->demands));
+
+	/* Now find out what we need to do */
+	promise = list_first_entry(&contract->demands,
+				   struct sun4i_dma_promise, list);
+	vchan->processing = promise;
+
+	/* ... and make it reality */
+	if (promise) {
+		vchan->contract = contract;
+		vchan->pchan = pchan;
+		set_pchan_interrupt(priv, pchan, contract->is_cyclic, 1);
+		configure_pchan(pchan, promise);
+	}
+
+	return 0;
+
+release_pchan:
+	release_pchan(priv, pchan);
+	return ret;
+}
+
+static int sanitize_config(struct dma_slave_config *sconfig,
+			   enum dma_transfer_direction direction)
+{
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+		    !sconfig->dst_maxburst)
+			return -EINVAL;
+
+		if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			sconfig->src_addr_width = sconfig->dst_addr_width;
+
+		if (!sconfig->src_maxburst)
+			sconfig->src_maxburst = sconfig->dst_maxburst;
+
+		break;
+
+	case DMA_DEV_TO_MEM:
+		if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+		    !sconfig->src_maxburst)
+			return -EINVAL;
+
+		if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			sconfig->dst_addr_width = sconfig->src_addr_width;
+
+		if (!sconfig->dst_maxburst)
+			sconfig->dst_maxburst = sconfig->src_maxburst;
+
+		break;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+/**
+ * Generate a promise, to be used in a normal DMA contract.
+ *
+ * A NDMA promise contains all the information required to program the
+ * normal part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+		      size_t len, struct dma_slave_config *sconfig,
+		      enum dma_transfer_direction direction)
+{
+	struct sun4i_dma_promise *promise;
+	int ret;
+
+	ret = sanitize_config(sconfig, direction);
+	if (ret)
+		return NULL;
+
+	promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+	if (!promise)
+		return NULL;
+
+	promise->src = src;
+	promise->dst = dest;
+	promise->len = len;
+	promise->cfg = SUN4I_DMA_CFG_LOADING |
+		SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+	dev_dbg(chan2dev(chan),
+		"src burst %d, dst burst %d, src buswidth %d, dst buswidth %d",
+		sconfig->src_maxburst, sconfig->dst_maxburst,
+		sconfig->src_addr_width, sconfig->dst_addr_width);
+
+	/* Source burst */
+	ret = convert_burst(sconfig->src_maxburst);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+	/* Destination burst */
+	ret = convert_burst(sconfig->dst_maxburst);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+	/* Source bus width */
+	ret = convert_buswidth(sconfig->src_addr_width);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+	/* Destination bus width */
+	ret = convert_buswidth(sconfig->dst_addr_width);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+	return promise;
+
+fail:
+	kfree(promise);
+	return NULL;
+}
+
+/**
+ * Generate a promise, to be used in a dedicated DMA contract.
+ *
+ * A DDMA promise contains all the information required to program the
+ * Dedicated part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+		      size_t len, struct dma_slave_config *sconfig)
+{
+	struct sun4i_dma_promise *promise;
+	int ret;
+
+	promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+	if (!promise)
+		return NULL;
+
+	promise->src = src;
+	promise->dst = dest;
+	promise->len = len;
+	promise->cfg = SUN4I_DMA_CFG_LOADING |
+		SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+	/* Source burst */
+	ret = convert_burst(sconfig->src_maxburst);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+	/* Destination burst */
+	ret = convert_burst(sconfig->dst_maxburst);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+	/* Source bus width */
+	ret = convert_buswidth(sconfig->src_addr_width);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+	/* Destination bus width */
+	ret = convert_buswidth(sconfig->dst_addr_width);
+	if (ret < 0)
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+	return promise;
+
+fail:
+	kfree(promise);
+	return NULL;
+}
+
+/**
+ * Generate a contract
+ *
+ * Contracts function as DMA descriptors. As our hardware does not support
+ * linked lists, we need to implement SG via software. We use a contract
+ * to hold all the pieces of the request and process them serially one
+ * after another. Each piece is represented as a promise.
+ */
+static struct sun4i_dma_contract *generate_dma_contract(void)
+{
+	struct sun4i_dma_contract *contract;
+
+	contract = kzalloc(sizeof(*contract), GFP_NOWAIT);
+	if (!contract)
+		return NULL;
+
+	INIT_LIST_HEAD(&contract->demands);
+	INIT_LIST_HEAD(&contract->completed_demands);
+
+	return contract;
+}
+
+/**
+ * Get next promise on a cyclic transfer
+ *
+ * Cyclic contracts contain a series of promises which are executed on a
+ * loop. This function returns the next promise from a cyclic contract,
+ * so it can be programmed into the hardware.
+ */
+static struct sun4i_dma_promise *
+get_next_cyclic_promise(struct sun4i_dma_contract *contract)
+{
+	struct sun4i_dma_promise *promise;
+
+	promise = list_first_entry_or_null(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	if (!promise) {
+		list_splice_init(&contract->completed_demands,
+				 &contract->demands);
+		promise = list_first_entry(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	}
+
+	return promise;
+}
+
+/**
+ * Free a contract and all its associated promises
+ */
+static void sun4i_dma_free_contract(struct virt_dma_desc *vd)
+{
+	struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd);
+	struct sun4i_dma_promise *promise, *tmp;
+
+	/* Free all the demands and completed demands */
+	list_for_each_entry_safe(promise, tmp, &contract->demands, list)
+		kfree(promise);
+
+	list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list)
+		kfree(promise);
+
+	kfree(contract);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	/*
+	 * We can only do the copy to bus aligned addresses, so
+	 * choose the best one so we get decent performance. We also
+	 * maximize the burst size for this same reason.
+	 */
+	sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	sconfig->src_maxburst = 8;
+	sconfig->dst_maxburst = 8;
+
+	if (vchan->is_dedicated)
+		promise = generate_ddma_promise(chan, src, dest, len, sconfig);
+	else
+		promise = generate_ndma_promise(chan, src, dest, len, sconfig,
+						DMA_MEM_TO_MEM);
+
+	if (!promise) {
+		kfree(contract);
+		return NULL;
+	}
+
+	/* Configure memcpy mode */
+	if (vchan->is_dedicated) {
+		promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) |
+				SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM);
+	} else {
+		promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
+				SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+	}
+
+	/* Fill the contract with our only promise */
+	list_add_tail(&promise->list, &contract->demands);
+
+	/* And add it to the vchan */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
+			  size_t period_len, enum dma_transfer_direction dir,
+			  unsigned long flags)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+	dma_addr_t src, dest;
+	u32 endpoints;
+	int nr_periods, offset, plength, i;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+		return NULL;
+	}
+
+	if (vchan->is_dedicated) {
+		/*
+		 * As we are using this just for audio data, we need to use
+		 * normal DMA. There is nothing stopping us from supporting
+		 * dedicated DMA here as well, so if a client comes up and
+		 * requires it, it will be simple to implement it.
+		 */
+		dev_err(chan2dev(chan),
+			"Cyclic transfers are only supported on Normal DMA\n");
+		return NULL;
+	}
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	contract->is_cyclic = 1;
+
+	/* Figure out the endpoints and the address we need */
+	if (dir == DMA_MEM_TO_DEV) {
+		src = buf;
+		dest = sconfig->dst_addr;
+		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
+			    SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO);
+	} else {
+		src = sconfig->src_addr;
+		dest = buf;
+		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) |
+			    SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+	}
+
+	/*
+	 * We will be using half done interrupts to make two periods
+	 * out of a promise, so we need to program the DMA engine less
+	 * often
+	 */
+
+	/*
+	 * The engine can interrupt on half-transfer, so we can use
+	 * this feature to program the engine half as often as if we
+	 * didn't use it (keep in mind the hardware doesn't support
+	 * linked lists).
+	 *
+	 * Say you have a set of periods (| marks the start/end, I for
+	 * interrupt, P for programming the engine to do a new
+	 * transfer), the easy but slow way would be to do
+	 *
+	 *  |---|---|---|---| (periods / promises)
+	 *  P  I,P I,P I,P  I
+	 *
+	 * Using half transfer interrupts you can do
+	 *
+	 *  |-------|-------| (promises as configured on hw)
+	 *  |---|---|---|---| (periods)
+	 *  P   I  I,P  I   I
+	 *
+	 * Which requires half the engine programming for the same
+	 * functionality.
+	 */
+	nr_periods = DIV_ROUND_UP(len / period_len, 2);
+	for (i = 0; i < nr_periods; i++) {
+		/* Calculate the offset in the buffer and the length needed */
+		offset = i * period_len * 2;
+		plength = min((len - offset), (period_len * 2));
+		if (dir == DMA_MEM_TO_DEV)
+			src = buf + offset;
+		else
+			dest = buf + offset;
+
+		/* Make the promise */
+		promise = generate_ndma_promise(chan, src, dest,
+						plength, sconfig, dir);
+		if (!promise) {
+			/* TODO: should we free everything? */
+			return NULL;
+		}
+		promise->cfg |= endpoints;
+
+		/* Then add it to the contract */
+		list_add_tail(&promise->list, &contract->demands);
+	}
+
+	/* And add it to the vchan */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags, void *context)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+	u8 ram_type, io_mode, linear_mode;
+	struct scatterlist *sg;
+	dma_addr_t srcaddr, dstaddr;
+	u32 endpoints, para;
+	int i;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+		return NULL;
+	}
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	if (vchan->is_dedicated) {
+		io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+	} else {
+		io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+	}
+
+	if (dir == DMA_MEM_TO_DEV)
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode);
+	else
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* Figure out addresses */
+		if (dir == DMA_MEM_TO_DEV) {
+			srcaddr = sg_dma_address(sg);
+			dstaddr = sconfig->dst_addr;
+		} else {
+			srcaddr = sconfig->src_addr;
+			dstaddr = sg_dma_address(sg);
+		}
+
+		/*
+		 * These are the magic DMA engine timings that keep SPI going.
+		 * I haven't seen any interface on DMAEngine to configure
+		 * timings, and so far they seem to work for everything we
+		 * support, so I've kept them here. I don't know if other
+		 * devices need different timings because, as usual, we only
+		 * have the "para" bitfield meanings, but no comment on what
+		 * the values should be when doing a certain operation :|
+		 */
+		para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS;
+
+		/* And make a suitable promise */
+		if (vchan->is_dedicated)
+			promise = generate_ddma_promise(chan, srcaddr, dstaddr,
+							sg_dma_len(sg),
+							sconfig);
+		else
+			promise = generate_ndma_promise(chan, srcaddr, dstaddr,
+							sg_dma_len(sg),
+							sconfig, dir);
+
+		if (!promise)
+			return NULL; /* TODO: should we free everything? */
+
+		promise->cfg |= endpoints;
+		promise->para = para;
+
+		/* Then add it to the contract */
+		list_add_tail(&promise->list, &contract->demands);
+	}
+
+	/*
+	 * Once we've got all the promises ready, add the contract
+	 * to the pending list on the vchan
+	 */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static int sun4i_dma_terminate_all(struct dma_chan *chan)
+{
+	struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct sun4i_dma_pchan *pchan = vchan->pchan;
+	LIST_HEAD(head);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vchan_get_all_descriptors(&vchan->vc, &head);
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	/*
+	 * Clearing the configuration register will halt the pchan. Interrupts
+	 * may still trigger, so don't forget to disable them.
+	 */
+	if (pchan) {
+		if (pchan->is_dedicated)
+			writel(0, pchan->base + SUN4I_DDMA_CFG_REG);
+		else
+			writel(0, pchan->base + SUN4I_NDMA_CFG_REG);
+		set_pchan_interrupt(priv, pchan, 0, 0);
+		release_pchan(priv, pchan);
+	}
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+	/* Clear these so the vchan is usable again */
+	vchan->processing = NULL;
+	vchan->pchan = NULL;
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return 0;
+}
+
+static int sun4i_dma_config(struct dma_chan *chan,
+			    struct dma_slave_config *config)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+	memcpy(&vchan->cfg, config, sizeof(*config));
+
+	return 0;
+}
+
+static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct sun4i_dma_dev *priv = ofdma->of_dma_data;
+	struct sun4i_dma_vchan *vchan;
+	struct dma_chan *chan;
+	u8 is_dedicated = dma_spec->args[0];
+	u8 endpoint = dma_spec->args[1];
+
+	/* Check if type is Normal or Dedicated */
+	if (is_dedicated != 0 && is_dedicated != 1)
+		return NULL;
+
+	/* Make sure the endpoint looks sane */
+	if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) ||
+	    (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT))
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&priv->slave);
+	if (!chan)
+		return NULL;
+
+	/* Assign the endpoint to the vchan */
+	vchan = to_sun4i_dma_vchan(chan);
+	vchan->is_dedicated = is_dedicated;
+	vchan->endpoint = endpoint;
+
+	return chan;
+}
+
+static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct sun4i_dma_pchan *pchan = vchan->pchan;
+	struct sun4i_dma_contract *contract;
+	struct sun4i_dma_promise *promise;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (!state || (ret == DMA_COMPLETE))
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	if (!vd)
+		goto exit;
+	contract = to_sun4i_dma_contract(vd);
+
+	list_for_each_entry(promise, &contract->demands, list)
+		bytes += promise->len;
+
+	/*
+	 * The hardware is configured to return the remaining byte
+	 * quantity. If possible, replace the first listed element's
+	 * full size with the actual remaining amount
+	 */
+	promise = list_first_entry_or_null(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	if (promise && pchan) {
+		bytes -= promise->len;
+		if (pchan->is_dedicated)
+			bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+		else
+			bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+	}
+
+exit:
+
+	dma_set_residue(state, bytes);
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return ret;
+}
+
+static void sun4i_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	/*
+	 * If there are pending transactions for this vchan, push one of
+	 * them into the engine to get the ball rolling.
+	 */
+	if (vchan_issue_pending(&vchan->vc))
+		__execute_vchan_pending(priv, vchan);
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id)
+{
+	struct sun4i_dma_dev *priv = dev_id;
+	struct sun4i_dma_pchan *pchans = priv->pchans, *pchan;
+	struct sun4i_dma_vchan *vchan;
+	struct sun4i_dma_contract *contract;
+	struct sun4i_dma_promise *promise;
+	unsigned long pendirq, irqs, disableirqs;
+	int bit, i, free_room, allow_mitigation = 1;
+
+	pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+handle_pending:
+
+	disableirqs = 0;
+	free_room = 0;
+
+	for_each_set_bit(bit, &pendirq, 32) {
+		pchan = &pchans[bit >> 1];
+		vchan = pchan->vchan;
+		if (!vchan) /* a terminated channel may still interrupt */
+			continue;
+		contract = vchan->contract;
+
+		/*
+		 * Disable the IRQ and free the pchan if it's an end
+		 * interrupt (odd bit)
+		 */
+		if (bit & 1) {
+			spin_lock(&vchan->vc.lock);
+
+			/*
+			 * Move the promise into the completed list now that
+			 * we're done with it
+			 */
+			list_del(&vchan->processing->list);
+			list_add_tail(&vchan->processing->list,
+				      &contract->completed_demands);
+
+			/*
+			 * Cyclic DMA transfers are special:
+			 * - There's always something we can dispatch
+			 * - We need to run the callback
+			 * - Latency is very important, as this is used by audio
+			 * We therefore just cycle through the list and dispatch
+			 * whatever we have here, reusing the pchan. There's
+			 * no need to run the thread after this.
+			 *
+			 * For non-cyclic transfers we need to look around,
+			 * so we can program some more work, or notify the
+			 * client that their transfers have been completed.
+			 */
+			if (contract->is_cyclic) {
+				promise = get_next_cyclic_promise(contract);
+				vchan->processing = promise;
+				configure_pchan(pchan, promise);
+				vchan_cyclic_callback(&contract->vd);
+			} else {
+				vchan->processing = NULL;
+				vchan->pchan = NULL;
+
+				free_room = 1;
+				disableirqs |= BIT(bit);
+				release_pchan(priv, pchan);
+			}
+
+			spin_unlock(&vchan->vc.lock);
+		} else {
+			/* Half done interrupt */
+			if (contract->is_cyclic)
+				vchan_cyclic_callback(&contract->vd);
+			else
+				disableirqs |= BIT(bit);
+		}
+	}
+
+	/* Disable the IRQs for events we handled */
+	spin_lock(&priv->lock);
+	irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	writel_relaxed(irqs & ~disableirqs,
+		       priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	spin_unlock(&priv->lock);
+
+	/* Writing 1 to the pending field will clear the pending interrupt */
+	writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+	/*
+	 * If a pchan was freed, we may be able to schedule something else,
+	 * so have a look around
+	 */
+	if (free_room) {
+		for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+			vchan = &priv->vchans[i];
+			spin_lock(&vchan->vc.lock);
+			__execute_vchan_pending(priv, vchan);
+			spin_unlock(&vchan->vc.lock);
+		}
+	}
+
+	/*
+	 * Handle newer interrupts if some showed up, but only do it once
+	 * to avoid a too long a loop
+	 */
+	if (allow_mitigation) {
+		pendirq = readl_relaxed(priv->base +
+					SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+		if (pendirq) {
+			allow_mitigation = 0;
+			goto handle_pending;
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int sun4i_dma_probe(struct platform_device *pdev)
+{
+	struct sun4i_dma_dev *priv;
+	struct resource *res;
+	int i, j, ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->irq = platform_get_irq(pdev, 0);
+	if (priv->irq < 0) {
+		dev_err(&pdev->dev, "Cannot claim IRQ\n");
+		return priv->irq;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "No clock specified\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	platform_set_drvdata(pdev, priv);
+	spin_lock_init(&priv->lock);
+
+	dma_cap_zero(priv->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, priv->slave.cap_mask);
+
+	INIT_LIST_HEAD(&priv->slave.channels);
+	priv->slave.device_free_chan_resources	= sun4i_dma_free_chan_resources;
+	priv->slave.device_tx_status		= sun4i_dma_tx_status;
+	priv->slave.device_issue_pending	= sun4i_dma_issue_pending;
+	priv->slave.device_prep_slave_sg	= sun4i_dma_prep_slave_sg;
+	priv->slave.device_prep_dma_memcpy	= sun4i_dma_prep_dma_memcpy;
+	priv->slave.device_prep_dma_cyclic	= sun4i_dma_prep_dma_cyclic;
+	priv->slave.device_config		= sun4i_dma_config;
+	priv->slave.device_terminate_all	= sun4i_dma_terminate_all;
+	priv->slave.copy_align			= 2;
+	priv->slave.src_addr_widths		= BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+						  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+						  BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	priv->slave.dst_addr_widths		= BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+						  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+						  BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	priv->slave.directions			= BIT(DMA_DEV_TO_MEM) |
+						  BIT(DMA_MEM_TO_DEV);
+	priv->slave.residue_granularity		= DMA_RESIDUE_GRANULARITY_BURST;
+
+	priv->slave.dev = &pdev->dev;
+
+	priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS,
+				    sizeof(struct sun4i_dma_pchan), GFP_KERNEL);
+	priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS,
+				    sizeof(struct sun4i_dma_vchan), GFP_KERNEL);
+	if (!priv->vchans || !priv->pchans)
+		return -ENOMEM;
+
+	/*
+	 * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and
+	 * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are
+	 * dedicated ones
+	 */
+	for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++)
+		priv->pchans[i].base = priv->base +
+			SUN4I_NDMA_CHANNEL_REG_BASE(i);
+
+	for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) {
+		priv->pchans[i].base = priv->base +
+			SUN4I_DDMA_CHANNEL_REG_BASE(j);
+		priv->pchans[i].is_dedicated = 1;
+	}
+
+	for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+		struct sun4i_dma_vchan *vchan = &priv->vchans[i];
+
+		spin_lock_init(&vchan->vc.lock);
+		vchan->vc.desc_free = sun4i_dma_free_contract;
+		vchan_init(&vchan->vc, &priv->slave);
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't enable the clock\n");
+		return ret;
+	}
+
+	/*
+	 * Make sure the IRQs are all disabled and accounted for. The bootloader
+	 * likes to leave these dirty
+	 */
+	writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+	ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot request IRQ\n");
+		goto err_clk_disable;
+	}
+
+	ret = dma_async_device_register(&priv->slave);
+	if (ret) {
+		dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+		goto err_clk_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate,
+					 priv);
+	if (ret) {
+		dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+		goto err_dma_unregister;
+	}
+
+	dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n");
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&priv->slave);
+err_clk_disable:
+	clk_disable_unprepare(priv->clk);
+	return ret;
+}
+
+static int sun4i_dma_remove(struct platform_device *pdev)
+{
+	struct sun4i_dma_dev *priv = platform_get_drvdata(pdev);
+
+	/* Disable IRQ so no more work is scheduled */
+	disable_irq(priv->irq);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&priv->slave);
+
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct of_device_id sun4i_dma_match[] = {
+	{ .compatible = "allwinner,sun4i-a10-dma" },
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, sun4i_dma_match);
+
+static struct platform_driver sun4i_dma_driver = {
+	.probe	= sun4i_dma_probe,
+	.remove	= sun4i_dma_remove,
+	.driver	= {
+		.name		= "sun4i-dma",
+		.of_match_table	= sun4i_dma_match,
+	},
+};
+
+module_platform_driver(sun4i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver");
+MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c
new file mode 100644
index 0000000..0cd13f1
--- /dev/null
+++ b/drivers/dma/sun6i-dma.c
@@ -0,0 +1,1383 @@
+/*
+ * Copyright (C) 2013-2014 Allwinner Tech Co., Ltd
+ * Author: Sugar <shuge@allwinnertech.com>
+ *
+ * Copyright (C) 2014 Maxime Ripard
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "virt-dma.h"
+
+/*
+ * Common registers
+ */
+#define DMA_IRQ_EN(x)		((x) * 0x04)
+#define DMA_IRQ_HALF			BIT(0)
+#define DMA_IRQ_PKG			BIT(1)
+#define DMA_IRQ_QUEUE			BIT(2)
+
+#define DMA_IRQ_CHAN_NR			8
+#define DMA_IRQ_CHAN_WIDTH		4
+
+
+#define DMA_IRQ_STAT(x)		((x) * 0x04 + 0x10)
+
+#define DMA_STAT		0x30
+
+/* Offset between DMA_IRQ_EN and DMA_IRQ_STAT limits number of channels */
+#define DMA_MAX_CHANNELS	(DMA_IRQ_CHAN_NR * 0x10 / 4)
+
+/*
+ * sun8i specific registers
+ */
+#define SUN8I_DMA_GATE		0x20
+#define SUN8I_DMA_GATE_ENABLE	0x4
+
+#define SUNXI_H3_SECURE_REG		0x20
+#define SUNXI_H3_DMA_GATE		0x28
+#define SUNXI_H3_DMA_GATE_ENABLE	0x4
+/*
+ * Channels specific registers
+ */
+#define DMA_CHAN_ENABLE		0x00
+#define DMA_CHAN_ENABLE_START		BIT(0)
+#define DMA_CHAN_ENABLE_STOP		0
+
+#define DMA_CHAN_PAUSE		0x04
+#define DMA_CHAN_PAUSE_PAUSE		BIT(1)
+#define DMA_CHAN_PAUSE_RESUME		0
+
+#define DMA_CHAN_LLI_ADDR	0x08
+
+#define DMA_CHAN_CUR_CFG	0x0c
+#define DMA_CHAN_MAX_DRQ		0x1f
+#define DMA_CHAN_CFG_SRC_DRQ(x)		((x) & DMA_CHAN_MAX_DRQ)
+#define DMA_CHAN_CFG_SRC_IO_MODE	BIT(5)
+#define DMA_CHAN_CFG_SRC_LINEAR_MODE	(0 << 5)
+#define DMA_CHAN_CFG_SRC_BURST_A31(x)	(((x) & 0x3) << 7)
+#define DMA_CHAN_CFG_SRC_BURST_H3(x)	(((x) & 0x3) << 6)
+#define DMA_CHAN_CFG_SRC_WIDTH(x)	(((x) & 0x3) << 9)
+
+#define DMA_CHAN_CFG_DST_DRQ(x)		(DMA_CHAN_CFG_SRC_DRQ(x) << 16)
+#define DMA_CHAN_CFG_DST_IO_MODE	(DMA_CHAN_CFG_SRC_IO_MODE << 16)
+#define DMA_CHAN_CFG_DST_LINEAR_MODE	(DMA_CHAN_CFG_SRC_LINEAR_MODE << 16)
+#define DMA_CHAN_CFG_DST_BURST_A31(x)	(DMA_CHAN_CFG_SRC_BURST_A31(x) << 16)
+#define DMA_CHAN_CFG_DST_BURST_H3(x)	(DMA_CHAN_CFG_SRC_BURST_H3(x) << 16)
+#define DMA_CHAN_CFG_DST_WIDTH(x)	(DMA_CHAN_CFG_SRC_WIDTH(x) << 16)
+
+#define DMA_CHAN_CUR_SRC	0x10
+
+#define DMA_CHAN_CUR_DST	0x14
+
+#define DMA_CHAN_CUR_CNT	0x18
+
+#define DMA_CHAN_CUR_PARA	0x1c
+
+
+/*
+ * Various hardware related defines
+ */
+#define LLI_LAST_ITEM	0xfffff800
+#define NORMAL_WAIT	8
+#define DRQ_SDRAM	1
+
+/* forward declaration */
+struct sun6i_dma_dev;
+
+/*
+ * Hardware channels / ports representation
+ *
+ * The hardware is used in several SoCs, with differing numbers
+ * of channels and endpoints. This structure ties those numbers
+ * to a certain compatible string.
+ */
+struct sun6i_dma_config {
+	u32 nr_max_channels;
+	u32 nr_max_requests;
+	u32 nr_max_vchans;
+	/*
+	 * In the datasheets/user manuals of newer Allwinner SoCs, a special
+	 * bit (bit 2 at register 0x20) is present.
+	 * It's named "DMA MCLK interface circuit auto gating bit" in the
+	 * documents, and the footnote of this register says that this bit
+	 * should be set up when initializing the DMA controller.
+	 * Allwinner A23/A33 user manuals do not have this bit documented,
+	 * however these SoCs really have and need this bit, as seen in the
+	 * BSP kernel source code.
+	 */
+	void (*clock_autogate_enable)(struct sun6i_dma_dev *);
+	void (*set_burst_length)(u32 *p_cfg, s8 src_burst, s8 dst_burst);
+	u32 src_burst_lengths;
+	u32 dst_burst_lengths;
+	u32 src_addr_widths;
+	u32 dst_addr_widths;
+};
+
+/*
+ * Hardware representation of the LLI
+ *
+ * The hardware will be fed the physical address of this structure,
+ * and read its content in order to start the transfer.
+ */
+struct sun6i_dma_lli {
+	u32			cfg;
+	u32			src;
+	u32			dst;
+	u32			len;
+	u32			para;
+	u32			p_lli_next;
+
+	/*
+	 * This field is not used by the DMA controller, but will be
+	 * used by the CPU to go through the list (mostly for dumping
+	 * or freeing it).
+	 */
+	struct sun6i_dma_lli	*v_lli_next;
+};
+
+
+struct sun6i_desc {
+	struct virt_dma_desc	vd;
+	dma_addr_t		p_lli;
+	struct sun6i_dma_lli	*v_lli;
+};
+
+struct sun6i_pchan {
+	u32			idx;
+	void __iomem		*base;
+	struct sun6i_vchan	*vchan;
+	struct sun6i_desc	*desc;
+	struct sun6i_desc	*done;
+};
+
+struct sun6i_vchan {
+	struct virt_dma_chan	vc;
+	struct list_head	node;
+	struct dma_slave_config	cfg;
+	struct sun6i_pchan	*phy;
+	u8			port;
+	u8			irq_type;
+	bool			cyclic;
+};
+
+struct sun6i_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	struct clk		*clk;
+	int			irq;
+	spinlock_t		lock;
+	struct reset_control	*rstc;
+	struct tasklet_struct	task;
+	atomic_t		tasklet_shutdown;
+	struct list_head	pending;
+	struct dma_pool		*pool;
+	struct sun6i_pchan	*pchans;
+	struct sun6i_vchan	*vchans;
+	const struct sun6i_dma_config *cfg;
+	u32			num_pchans;
+	u32			num_vchans;
+	u32			max_request;
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static inline struct sun6i_dma_dev *to_sun6i_dma_dev(struct dma_device *d)
+{
+	return container_of(d, struct sun6i_dma_dev, slave);
+}
+
+static inline struct sun6i_vchan *to_sun6i_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sun6i_vchan, vc.chan);
+}
+
+static inline struct sun6i_desc *
+to_sun6i_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct sun6i_desc, vd.tx);
+}
+
+static inline void sun6i_dma_dump_com_regs(struct sun6i_dma_dev *sdev)
+{
+	dev_dbg(sdev->slave.dev, "Common register:\n"
+		"\tmask0(%04x): 0x%08x\n"
+		"\tmask1(%04x): 0x%08x\n"
+		"\tpend0(%04x): 0x%08x\n"
+		"\tpend1(%04x): 0x%08x\n"
+		"\tstats(%04x): 0x%08x\n",
+		DMA_IRQ_EN(0), readl(sdev->base + DMA_IRQ_EN(0)),
+		DMA_IRQ_EN(1), readl(sdev->base + DMA_IRQ_EN(1)),
+		DMA_IRQ_STAT(0), readl(sdev->base + DMA_IRQ_STAT(0)),
+		DMA_IRQ_STAT(1), readl(sdev->base + DMA_IRQ_STAT(1)),
+		DMA_STAT, readl(sdev->base + DMA_STAT));
+}
+
+static inline void sun6i_dma_dump_chan_regs(struct sun6i_dma_dev *sdev,
+					    struct sun6i_pchan *pchan)
+{
+	phys_addr_t reg = virt_to_phys(pchan->base);
+
+	dev_dbg(sdev->slave.dev, "Chan %d reg: %pa\n"
+		"\t___en(%04x): \t0x%08x\n"
+		"\tpause(%04x): \t0x%08x\n"
+		"\tstart(%04x): \t0x%08x\n"
+		"\t__cfg(%04x): \t0x%08x\n"
+		"\t__src(%04x): \t0x%08x\n"
+		"\t__dst(%04x): \t0x%08x\n"
+		"\tcount(%04x): \t0x%08x\n"
+		"\t_para(%04x): \t0x%08x\n\n",
+		pchan->idx, &reg,
+		DMA_CHAN_ENABLE,
+		readl(pchan->base + DMA_CHAN_ENABLE),
+		DMA_CHAN_PAUSE,
+		readl(pchan->base + DMA_CHAN_PAUSE),
+		DMA_CHAN_LLI_ADDR,
+		readl(pchan->base + DMA_CHAN_LLI_ADDR),
+		DMA_CHAN_CUR_CFG,
+		readl(pchan->base + DMA_CHAN_CUR_CFG),
+		DMA_CHAN_CUR_SRC,
+		readl(pchan->base + DMA_CHAN_CUR_SRC),
+		DMA_CHAN_CUR_DST,
+		readl(pchan->base + DMA_CHAN_CUR_DST),
+		DMA_CHAN_CUR_CNT,
+		readl(pchan->base + DMA_CHAN_CUR_CNT),
+		DMA_CHAN_CUR_PARA,
+		readl(pchan->base + DMA_CHAN_CUR_PARA));
+}
+
+static inline s8 convert_burst(u32 maxburst)
+{
+	switch (maxburst) {
+	case 1:
+		return 0;
+	case 4:
+		return 1;
+	case 8:
+		return 2;
+	case 16:
+		return 3;
+	default:
+		return -EINVAL;
+	}
+}
+
+static inline s8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	return ilog2(addr_width);
+}
+
+static void sun6i_enable_clock_autogate_a23(struct sun6i_dma_dev *sdev)
+{
+	writel(SUN8I_DMA_GATE_ENABLE, sdev->base + SUN8I_DMA_GATE);
+}
+
+static void sun6i_enable_clock_autogate_h3(struct sun6i_dma_dev *sdev)
+{
+	writel(SUNXI_H3_DMA_GATE_ENABLE, sdev->base + SUNXI_H3_DMA_GATE);
+}
+
+static void sun6i_set_burst_length_a31(u32 *p_cfg, s8 src_burst, s8 dst_burst)
+{
+	*p_cfg |= DMA_CHAN_CFG_SRC_BURST_A31(src_burst) |
+		  DMA_CHAN_CFG_DST_BURST_A31(dst_burst);
+}
+
+static void sun6i_set_burst_length_h3(u32 *p_cfg, s8 src_burst, s8 dst_burst)
+{
+	*p_cfg |= DMA_CHAN_CFG_SRC_BURST_H3(src_burst) |
+		  DMA_CHAN_CFG_DST_BURST_H3(dst_burst);
+}
+
+static size_t sun6i_get_chan_size(struct sun6i_pchan *pchan)
+{
+	struct sun6i_desc *txd = pchan->desc;
+	struct sun6i_dma_lli *lli;
+	size_t bytes;
+	dma_addr_t pos;
+
+	pos = readl(pchan->base + DMA_CHAN_LLI_ADDR);
+	bytes = readl(pchan->base + DMA_CHAN_CUR_CNT);
+
+	if (pos == LLI_LAST_ITEM)
+		return bytes;
+
+	for (lli = txd->v_lli; lli; lli = lli->v_lli_next) {
+		if (lli->p_lli_next == pos) {
+			for (lli = lli->v_lli_next; lli; lli = lli->v_lli_next)
+				bytes += lli->len;
+			break;
+		}
+	}
+
+	return bytes;
+}
+
+static void *sun6i_dma_lli_add(struct sun6i_dma_lli *prev,
+			       struct sun6i_dma_lli *next,
+			       dma_addr_t next_phy,
+			       struct sun6i_desc *txd)
+{
+	if ((!prev && !txd) || !next)
+		return NULL;
+
+	if (!prev) {
+		txd->p_lli = next_phy;
+		txd->v_lli = next;
+	} else {
+		prev->p_lli_next = next_phy;
+		prev->v_lli_next = next;
+	}
+
+	next->p_lli_next = LLI_LAST_ITEM;
+	next->v_lli_next = NULL;
+
+	return next;
+}
+
+static inline void sun6i_dma_dump_lli(struct sun6i_vchan *vchan,
+				      struct sun6i_dma_lli *lli)
+{
+	phys_addr_t p_lli = virt_to_phys(lli);
+
+	dev_dbg(chan2dev(&vchan->vc.chan),
+		"\n\tdesc:   p - %pa v - 0x%p\n"
+		"\t\tc - 0x%08x s - 0x%08x d - 0x%08x\n"
+		"\t\tl - 0x%08x p - 0x%08x n - 0x%08x\n",
+		&p_lli, lli,
+		lli->cfg, lli->src, lli->dst,
+		lli->len, lli->para, lli->p_lli_next);
+}
+
+static void sun6i_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct sun6i_desc *txd = to_sun6i_desc(&vd->tx);
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vd->tx.chan->device);
+	struct sun6i_dma_lli *v_lli, *v_next;
+	dma_addr_t p_lli, p_next;
+
+	if (unlikely(!txd))
+		return;
+
+	p_lli = txd->p_lli;
+	v_lli = txd->v_lli;
+
+	while (v_lli) {
+		v_next = v_lli->v_lli_next;
+		p_next = v_lli->p_lli_next;
+
+		dma_pool_free(sdev->pool, v_lli, p_lli);
+
+		v_lli = v_next;
+		p_lli = p_next;
+	}
+
+	kfree(txd);
+}
+
+static int sun6i_dma_start_desc(struct sun6i_vchan *vchan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(vchan->vc.chan.device);
+	struct virt_dma_desc *desc = vchan_next_desc(&vchan->vc);
+	struct sun6i_pchan *pchan = vchan->phy;
+	u32 irq_val, irq_reg, irq_offset;
+
+	if (!pchan)
+		return -EAGAIN;
+
+	if (!desc) {
+		pchan->desc = NULL;
+		pchan->done = NULL;
+		return -EAGAIN;
+	}
+
+	list_del(&desc->node);
+
+	pchan->desc = to_sun6i_desc(&desc->tx);
+	pchan->done = NULL;
+
+	sun6i_dma_dump_lli(vchan, pchan->desc->v_lli);
+
+	irq_reg = pchan->idx / DMA_IRQ_CHAN_NR;
+	irq_offset = pchan->idx % DMA_IRQ_CHAN_NR;
+
+	vchan->irq_type = vchan->cyclic ? DMA_IRQ_PKG : DMA_IRQ_QUEUE;
+
+	irq_val = readl(sdev->base + DMA_IRQ_EN(irq_reg));
+	irq_val &= ~((DMA_IRQ_HALF | DMA_IRQ_PKG | DMA_IRQ_QUEUE) <<
+			(irq_offset * DMA_IRQ_CHAN_WIDTH));
+	irq_val |= vchan->irq_type << (irq_offset * DMA_IRQ_CHAN_WIDTH);
+	writel(irq_val, sdev->base + DMA_IRQ_EN(irq_reg));
+
+	writel(pchan->desc->p_lli, pchan->base + DMA_CHAN_LLI_ADDR);
+	writel(DMA_CHAN_ENABLE_START, pchan->base + DMA_CHAN_ENABLE);
+
+	sun6i_dma_dump_com_regs(sdev);
+	sun6i_dma_dump_chan_regs(sdev, pchan);
+
+	return 0;
+}
+
+static void sun6i_dma_tasklet(unsigned long data)
+{
+	struct sun6i_dma_dev *sdev = (struct sun6i_dma_dev *)data;
+	struct sun6i_vchan *vchan;
+	struct sun6i_pchan *pchan;
+	unsigned int pchan_alloc = 0;
+	unsigned int pchan_idx;
+
+	list_for_each_entry(vchan, &sdev->slave.channels, vc.chan.device_node) {
+		spin_lock_irq(&vchan->vc.lock);
+
+		pchan = vchan->phy;
+
+		if (pchan && pchan->done) {
+			if (sun6i_dma_start_desc(vchan)) {
+				/*
+				 * No current txd associated with this channel
+				 */
+				dev_dbg(sdev->slave.dev, "pchan %u: free\n",
+					pchan->idx);
+
+				/* Mark this channel free */
+				vchan->phy = NULL;
+				pchan->vchan = NULL;
+			}
+		}
+		spin_unlock_irq(&vchan->vc.lock);
+	}
+
+	spin_lock_irq(&sdev->lock);
+	for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) {
+		pchan = &sdev->pchans[pchan_idx];
+
+		if (pchan->vchan || list_empty(&sdev->pending))
+			continue;
+
+		vchan = list_first_entry(&sdev->pending,
+					 struct sun6i_vchan, node);
+
+		/* Remove from pending channels */
+		list_del_init(&vchan->node);
+		pchan_alloc |= BIT(pchan_idx);
+
+		/* Mark this channel allocated */
+		pchan->vchan = vchan;
+		vchan->phy = pchan;
+		dev_dbg(sdev->slave.dev, "pchan %u: alloc vchan %p\n",
+			pchan->idx, &vchan->vc);
+	}
+	spin_unlock_irq(&sdev->lock);
+
+	for (pchan_idx = 0; pchan_idx < sdev->num_pchans; pchan_idx++) {
+		if (!(pchan_alloc & BIT(pchan_idx)))
+			continue;
+
+		pchan = sdev->pchans + pchan_idx;
+		vchan = pchan->vchan;
+		if (vchan) {
+			spin_lock_irq(&vchan->vc.lock);
+			sun6i_dma_start_desc(vchan);
+			spin_unlock_irq(&vchan->vc.lock);
+		}
+	}
+}
+
+static irqreturn_t sun6i_dma_interrupt(int irq, void *dev_id)
+{
+	struct sun6i_dma_dev *sdev = dev_id;
+	struct sun6i_vchan *vchan;
+	struct sun6i_pchan *pchan;
+	int i, j, ret = IRQ_NONE;
+	u32 status;
+
+	for (i = 0; i < sdev->num_pchans / DMA_IRQ_CHAN_NR; i++) {
+		status = readl(sdev->base + DMA_IRQ_STAT(i));
+		if (!status)
+			continue;
+
+		dev_dbg(sdev->slave.dev, "DMA irq status %s: 0x%x\n",
+			i ? "high" : "low", status);
+
+		writel(status, sdev->base + DMA_IRQ_STAT(i));
+
+		for (j = 0; (j < DMA_IRQ_CHAN_NR) && status; j++) {
+			pchan = sdev->pchans + j;
+			vchan = pchan->vchan;
+			if (vchan && (status & vchan->irq_type)) {
+				if (vchan->cyclic) {
+					vchan_cyclic_callback(&pchan->desc->vd);
+				} else {
+					spin_lock(&vchan->vc.lock);
+					vchan_cookie_complete(&pchan->desc->vd);
+					pchan->done = pchan->desc;
+					spin_unlock(&vchan->vc.lock);
+				}
+			}
+
+			status = status >> DMA_IRQ_CHAN_WIDTH;
+		}
+
+		if (!atomic_read(&sdev->tasklet_shutdown))
+			tasklet_schedule(&sdev->task);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int set_config(struct sun6i_dma_dev *sdev,
+			struct dma_slave_config *sconfig,
+			enum dma_transfer_direction direction,
+			u32 *p_cfg)
+{
+	enum dma_slave_buswidth src_addr_width, dst_addr_width;
+	u32 src_maxburst, dst_maxburst;
+	s8 src_width, dst_width, src_burst, dst_burst;
+
+	src_addr_width = sconfig->src_addr_width;
+	dst_addr_width = sconfig->dst_addr_width;
+	src_maxburst = sconfig->src_maxburst;
+	dst_maxburst = sconfig->dst_maxburst;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		if (src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		src_maxburst = src_maxburst ? src_maxburst : 8;
+		break;
+	case DMA_DEV_TO_MEM:
+		if (dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+		dst_maxburst = dst_maxburst ? dst_maxburst : 8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!(BIT(src_addr_width) & sdev->slave.src_addr_widths))
+		return -EINVAL;
+	if (!(BIT(dst_addr_width) & sdev->slave.dst_addr_widths))
+		return -EINVAL;
+	if (!(BIT(src_maxburst) & sdev->cfg->src_burst_lengths))
+		return -EINVAL;
+	if (!(BIT(dst_maxburst) & sdev->cfg->dst_burst_lengths))
+		return -EINVAL;
+
+	src_width = convert_buswidth(src_addr_width);
+	dst_width = convert_buswidth(dst_addr_width);
+	dst_burst = convert_burst(dst_maxburst);
+	src_burst = convert_burst(src_maxburst);
+
+	*p_cfg = DMA_CHAN_CFG_SRC_WIDTH(src_width) |
+		DMA_CHAN_CFG_DST_WIDTH(dst_width);
+
+	sdev->cfg->set_burst_length(p_cfg, src_burst, dst_burst);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_dma_lli *v_lli;
+	struct sun6i_desc *txd;
+	dma_addr_t p_lli;
+	s8 burst, width;
+
+	dev_dbg(chan2dev(chan),
+		"%s; chan: %d, dest: %pad, src: %pad, len: %zu. flags: 0x%08lx\n",
+		__func__, vchan->vc.chan.chan_id, &dest, &src, len, flags);
+
+	if (!len)
+		return NULL;
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+	if (!v_lli) {
+		dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+		goto err_txd_free;
+	}
+
+	v_lli->src = src;
+	v_lli->dst = dest;
+	v_lli->len = len;
+	v_lli->para = NORMAL_WAIT;
+
+	burst = convert_burst(8);
+	width = convert_buswidth(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	v_lli->cfg = DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+		DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+		DMA_CHAN_CFG_DST_LINEAR_MODE |
+		DMA_CHAN_CFG_SRC_LINEAR_MODE |
+		DMA_CHAN_CFG_SRC_WIDTH(width) |
+		DMA_CHAN_CFG_DST_WIDTH(width);
+
+	sdev->cfg->set_burst_length(&v_lli->cfg, burst, burst);
+
+	sun6i_dma_lli_add(NULL, v_lli, p_lli, txd);
+
+	sun6i_dma_dump_lli(vchan, v_lli);
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_txd_free:
+	kfree(txd);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction dir,
+		unsigned long flags, void *context)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun6i_dma_lli *v_lli, *prev = NULL;
+	struct sun6i_desc *txd;
+	struct scatterlist *sg;
+	dma_addr_t p_lli;
+	u32 lli_cfg;
+	int i, ret;
+
+	if (!sgl)
+		return NULL;
+
+	ret = set_config(sdev, sconfig, dir, &lli_cfg);
+	if (ret) {
+		dev_err(chan2dev(chan), "Invalid DMA configuration\n");
+		return NULL;
+	}
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+		if (!v_lli)
+			goto err_lli_free;
+
+		v_lli->len = sg_dma_len(sg);
+		v_lli->para = NORMAL_WAIT;
+
+		if (dir == DMA_MEM_TO_DEV) {
+			v_lli->src = sg_dma_address(sg);
+			v_lli->dst = sconfig->dst_addr;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_IO_MODE |
+				DMA_CHAN_CFG_SRC_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_DST_DRQ(vchan->port);
+
+			dev_dbg(chan2dev(chan),
+				"%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+				__func__, vchan->vc.chan.chan_id,
+				&sconfig->dst_addr, &sg_dma_address(sg),
+				sg_dma_len(sg), flags);
+
+		} else {
+			v_lli->src = sconfig->src_addr;
+			v_lli->dst = sg_dma_address(sg);
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_IO_MODE |
+				DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_SRC_DRQ(vchan->port);
+
+			dev_dbg(chan2dev(chan),
+				"%s; chan: %d, dest: %pad, src: %pad, len: %u. flags: 0x%08lx\n",
+				__func__, vchan->vc.chan.chan_id,
+				&sg_dma_address(sg), &sconfig->src_addr,
+				sg_dma_len(sg), flags);
+		}
+
+		prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+	}
+
+	dev_dbg(chan2dev(chan), "First: %pad\n", &txd->p_lli);
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		sun6i_dma_dump_lli(vchan, prev);
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_lli_free:
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		dma_pool_free(sdev->pool, prev, virt_to_phys(prev));
+	kfree(txd);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *sun6i_dma_prep_dma_cyclic(
+					struct dma_chan *chan,
+					dma_addr_t buf_addr,
+					size_t buf_len,
+					size_t period_len,
+					enum dma_transfer_direction dir,
+					unsigned long flags)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun6i_dma_lli *v_lli, *prev = NULL;
+	struct sun6i_desc *txd;
+	dma_addr_t p_lli;
+	u32 lli_cfg;
+	unsigned int i, periods = buf_len / period_len;
+	int ret;
+
+	ret = set_config(sdev, sconfig, dir, &lli_cfg);
+	if (ret) {
+		dev_err(chan2dev(chan), "Invalid DMA configuration\n");
+		return NULL;
+	}
+
+	txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+	if (!txd)
+		return NULL;
+
+	for (i = 0; i < periods; i++) {
+		v_lli = dma_pool_alloc(sdev->pool, GFP_NOWAIT, &p_lli);
+		if (!v_lli) {
+			dev_err(sdev->slave.dev, "Failed to alloc lli memory\n");
+			goto err_lli_free;
+		}
+
+		v_lli->len = period_len;
+		v_lli->para = NORMAL_WAIT;
+
+		if (dir == DMA_MEM_TO_DEV) {
+			v_lli->src = buf_addr + period_len * i;
+			v_lli->dst = sconfig->dst_addr;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_IO_MODE |
+				DMA_CHAN_CFG_SRC_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_DST_DRQ(vchan->port);
+		} else {
+			v_lli->src = sconfig->src_addr;
+			v_lli->dst = buf_addr + period_len * i;
+			v_lli->cfg = lli_cfg |
+				DMA_CHAN_CFG_DST_LINEAR_MODE |
+				DMA_CHAN_CFG_SRC_IO_MODE |
+				DMA_CHAN_CFG_DST_DRQ(DRQ_SDRAM) |
+				DMA_CHAN_CFG_SRC_DRQ(vchan->port);
+		}
+
+		prev = sun6i_dma_lli_add(prev, v_lli, p_lli, txd);
+	}
+
+	prev->p_lli_next = txd->p_lli;		/* cyclic list */
+
+	vchan->cyclic = true;
+
+	return vchan_tx_prep(&vchan->vc, &txd->vd, flags);
+
+err_lli_free:
+	for (prev = txd->v_lli; prev; prev = prev->v_lli_next)
+		dma_pool_free(sdev->pool, prev, virt_to_phys(prev));
+	kfree(txd);
+	return NULL;
+}
+
+static int sun6i_dma_config(struct dma_chan *chan,
+			    struct dma_slave_config *config)
+{
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+
+	memcpy(&vchan->cfg, config, sizeof(*config));
+
+	return 0;
+}
+
+static int sun6i_dma_pause(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+
+	dev_dbg(chan2dev(chan), "vchan %p: pause\n", &vchan->vc);
+
+	if (pchan) {
+		writel(DMA_CHAN_PAUSE_PAUSE,
+		       pchan->base + DMA_CHAN_PAUSE);
+	} else {
+		spin_lock(&sdev->lock);
+		list_del_init(&vchan->node);
+		spin_unlock(&sdev->lock);
+	}
+
+	return 0;
+}
+
+static int sun6i_dma_resume(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+	unsigned long flags;
+
+	dev_dbg(chan2dev(chan), "vchan %p: resume\n", &vchan->vc);
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (pchan) {
+		writel(DMA_CHAN_PAUSE_RESUME,
+		       pchan->base + DMA_CHAN_PAUSE);
+	} else if (!list_empty(&vchan->vc.desc_issued)) {
+		spin_lock(&sdev->lock);
+		list_add_tail(&vchan->node, &sdev->pending);
+		spin_unlock(&sdev->lock);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return 0;
+}
+
+static int sun6i_dma_terminate_all(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock(&sdev->lock);
+	list_del_init(&vchan->node);
+	spin_unlock(&sdev->lock);
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (vchan->cyclic) {
+		vchan->cyclic = false;
+		if (pchan && pchan->desc) {
+			struct virt_dma_desc *vd = &pchan->desc->vd;
+			struct virt_dma_chan *vc = &vchan->vc;
+
+			list_add_tail(&vd->node, &vc->desc_completed);
+		}
+	}
+
+	vchan_get_all_descriptors(&vchan->vc, &head);
+
+	if (pchan) {
+		writel(DMA_CHAN_ENABLE_STOP, pchan->base + DMA_CHAN_ENABLE);
+		writel(DMA_CHAN_PAUSE_RESUME, pchan->base + DMA_CHAN_PAUSE);
+
+		vchan->phy = NULL;
+		pchan->vchan = NULL;
+		pchan->desc = NULL;
+		pchan->done = NULL;
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+
+	return 0;
+}
+
+static enum dma_status sun6i_dma_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	struct sun6i_pchan *pchan = vchan->phy;
+	struct sun6i_dma_lli *lli;
+	struct virt_dma_desc *vd;
+	struct sun6i_desc *txd;
+	enum dma_status ret;
+	unsigned long flags;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (ret == DMA_COMPLETE || !state)
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	txd = to_sun6i_desc(&vd->tx);
+
+	if (vd) {
+		for (lli = txd->v_lli; lli != NULL; lli = lli->v_lli_next)
+			bytes += lli->len;
+	} else if (!pchan || !pchan->desc) {
+		bytes = 0;
+	} else {
+		bytes = sun6i_get_chan_size(pchan);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	dma_set_residue(state, bytes);
+
+	return ret;
+}
+
+static void sun6i_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	if (vchan_issue_pending(&vchan->vc)) {
+		spin_lock(&sdev->lock);
+
+		if (!vchan->phy && list_empty(&vchan->node)) {
+			list_add_tail(&vchan->node, &sdev->pending);
+			tasklet_schedule(&sdev->task);
+			dev_dbg(chan2dev(chan), "vchan %p: issued\n",
+				&vchan->vc);
+		}
+
+		spin_unlock(&sdev->lock);
+	} else {
+		dev_dbg(chan2dev(chan), "vchan %p: nothing to issue\n",
+			&vchan->vc);
+	}
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static void sun6i_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
+	struct sun6i_vchan *vchan = to_sun6i_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&sdev->lock, flags);
+	list_del_init(&vchan->node);
+	spin_unlock_irqrestore(&sdev->lock, flags);
+
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct dma_chan *sun6i_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct sun6i_dma_dev *sdev = ofdma->of_dma_data;
+	struct sun6i_vchan *vchan;
+	struct dma_chan *chan;
+	u8 port = dma_spec->args[0];
+
+	if (port > sdev->max_request)
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&sdev->slave);
+	if (!chan)
+		return NULL;
+
+	vchan = to_sun6i_vchan(chan);
+	vchan->port = port;
+
+	return chan;
+}
+
+static inline void sun6i_kill_tasklet(struct sun6i_dma_dev *sdev)
+{
+	/* Disable all interrupts from DMA */
+	writel(0, sdev->base + DMA_IRQ_EN(0));
+	writel(0, sdev->base + DMA_IRQ_EN(1));
+
+	/* Prevent spurious interrupts from scheduling the tasklet */
+	atomic_inc(&sdev->tasklet_shutdown);
+
+	/* Make sure we won't have any further interrupts */
+	devm_free_irq(sdev->slave.dev, sdev->irq, sdev);
+
+	/* Actually prevent the tasklet from being scheduled */
+	tasklet_kill(&sdev->task);
+}
+
+static inline void sun6i_dma_free(struct sun6i_dma_dev *sdev)
+{
+	int i;
+
+	for (i = 0; i < sdev->num_vchans; i++) {
+		struct sun6i_vchan *vchan = &sdev->vchans[i];
+
+		list_del(&vchan->vc.chan.device_node);
+		tasklet_kill(&vchan->vc.task);
+	}
+}
+
+/*
+ * For A31:
+ *
+ * There's 16 physical channels that can work in parallel.
+ *
+ * However we have 30 different endpoints for our requests.
+ *
+ * Since the channels are able to handle only an unidirectional
+ * transfer, we need to allocate more virtual channels so that
+ * everyone can grab one channel.
+ *
+ * Some devices can't work in both direction (mostly because it
+ * wouldn't make sense), so we have a bit fewer virtual channels than
+ * 2 channels per endpoints.
+ */
+
+static struct sun6i_dma_config sun6i_a31_dma_cfg = {
+	.nr_max_channels = 16,
+	.nr_max_requests = 30,
+	.nr_max_vchans   = 53,
+	.set_burst_length = sun6i_set_burst_length_a31,
+	.src_burst_lengths = BIT(1) | BIT(8),
+	.dst_burst_lengths = BIT(1) | BIT(8),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+/*
+ * The A23 only has 8 physical channels, a maximum DRQ port id of 24,
+ * and a total of 37 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_a23_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 24,
+	.nr_max_vchans   = 37,
+	.clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+	.set_burst_length = sun6i_set_burst_length_a31,
+	.src_burst_lengths = BIT(1) | BIT(8),
+	.dst_burst_lengths = BIT(1) | BIT(8),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+static struct sun6i_dma_config sun8i_a83t_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 28,
+	.nr_max_vchans   = 39,
+	.clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+	.set_burst_length = sun6i_set_burst_length_a31,
+	.src_burst_lengths = BIT(1) | BIT(8),
+	.dst_burst_lengths = BIT(1) | BIT(8),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+/*
+ * The H3 has 12 physical channels, a maximum DRQ port id of 27,
+ * and a total of 34 usable source and destination endpoints.
+ * It also supports additional burst lengths and bus widths,
+ * and the burst length fields have different offsets.
+ */
+
+static struct sun6i_dma_config sun8i_h3_dma_cfg = {
+	.nr_max_channels = 12,
+	.nr_max_requests = 27,
+	.nr_max_vchans   = 34,
+	.clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+	.set_burst_length = sun6i_set_burst_length_h3,
+	.src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+	.dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+};
+
+/*
+ * The A64 binding uses the number of dma channels from the
+ * device tree node.
+ */
+static struct sun6i_dma_config sun50i_a64_dma_cfg = {
+	.clock_autogate_enable = sun6i_enable_clock_autogate_h3,
+	.set_burst_length = sun6i_set_burst_length_h3,
+	.src_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+	.dst_burst_lengths = BIT(1) | BIT(4) | BIT(8) | BIT(16),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_8_BYTES),
+};
+
+/*
+ * The V3s have only 8 physical channels, a maximum DRQ port id of 23,
+ * and a total of 24 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_v3s_dma_cfg = {
+	.nr_max_channels = 8,
+	.nr_max_requests = 23,
+	.nr_max_vchans   = 24,
+	.clock_autogate_enable = sun6i_enable_clock_autogate_a23,
+	.set_burst_length = sun6i_set_burst_length_a31,
+	.src_burst_lengths = BIT(1) | BIT(8),
+	.dst_burst_lengths = BIT(1) | BIT(8),
+	.src_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+	.dst_addr_widths   = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+			     BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+			     BIT(DMA_SLAVE_BUSWIDTH_4_BYTES),
+};
+
+static const struct of_device_id sun6i_dma_match[] = {
+	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
+	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ .compatible = "allwinner,sun8i-a83t-dma", .data = &sun8i_a83t_dma_cfg },
+	{ .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
+	{ .compatible = "allwinner,sun8i-v3s-dma", .data = &sun8i_v3s_dma_cfg },
+	{ .compatible = "allwinner,sun50i-a64-dma", .data = &sun50i_a64_dma_cfg },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sun6i_dma_match);
+
+static int sun6i_dma_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct sun6i_dma_dev *sdc;
+	struct resource *res;
+	int ret, i;
+
+	sdc = devm_kzalloc(&pdev->dev, sizeof(*sdc), GFP_KERNEL);
+	if (!sdc)
+		return -ENOMEM;
+
+	sdc->cfg = of_device_get_match_data(&pdev->dev);
+	if (!sdc->cfg)
+		return -ENODEV;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sdc->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(sdc->base))
+		return PTR_ERR(sdc->base);
+
+	sdc->irq = platform_get_irq(pdev, 0);
+	if (sdc->irq < 0) {
+		dev_err(&pdev->dev, "Cannot claim IRQ\n");
+		return sdc->irq;
+	}
+
+	sdc->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(sdc->clk)) {
+		dev_err(&pdev->dev, "No clock specified\n");
+		return PTR_ERR(sdc->clk);
+	}
+
+	sdc->rstc = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(sdc->rstc)) {
+		dev_err(&pdev->dev, "No reset controller specified\n");
+		return PTR_ERR(sdc->rstc);
+	}
+
+	sdc->pool = dmam_pool_create(dev_name(&pdev->dev), &pdev->dev,
+				     sizeof(struct sun6i_dma_lli), 4, 0);
+	if (!sdc->pool) {
+		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, sdc);
+	INIT_LIST_HEAD(&sdc->pending);
+	spin_lock_init(&sdc->lock);
+
+	dma_cap_set(DMA_PRIVATE, sdc->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, sdc->slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, sdc->slave.cap_mask);
+
+	INIT_LIST_HEAD(&sdc->slave.channels);
+	sdc->slave.device_free_chan_resources	= sun6i_dma_free_chan_resources;
+	sdc->slave.device_tx_status		= sun6i_dma_tx_status;
+	sdc->slave.device_issue_pending		= sun6i_dma_issue_pending;
+	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
+	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
+	sdc->slave.device_prep_dma_cyclic	= sun6i_dma_prep_dma_cyclic;
+	sdc->slave.copy_align			= DMAENGINE_ALIGN_4_BYTES;
+	sdc->slave.device_config		= sun6i_dma_config;
+	sdc->slave.device_pause			= sun6i_dma_pause;
+	sdc->slave.device_resume		= sun6i_dma_resume;
+	sdc->slave.device_terminate_all		= sun6i_dma_terminate_all;
+	sdc->slave.src_addr_widths		= sdc->cfg->src_addr_widths;
+	sdc->slave.dst_addr_widths		= sdc->cfg->dst_addr_widths;
+	sdc->slave.directions			= BIT(DMA_DEV_TO_MEM) |
+						  BIT(DMA_MEM_TO_DEV);
+	sdc->slave.residue_granularity		= DMA_RESIDUE_GRANULARITY_BURST;
+	sdc->slave.dev = &pdev->dev;
+
+	sdc->num_pchans = sdc->cfg->nr_max_channels;
+	sdc->num_vchans = sdc->cfg->nr_max_vchans;
+	sdc->max_request = sdc->cfg->nr_max_requests;
+
+	ret = of_property_read_u32(np, "dma-channels", &sdc->num_pchans);
+	if (ret && !sdc->num_pchans) {
+		dev_err(&pdev->dev, "Can't get dma-channels.\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "dma-requests", &sdc->max_request);
+	if (ret && !sdc->max_request) {
+		dev_info(&pdev->dev, "Missing dma-requests, using %u.\n",
+			 DMA_CHAN_MAX_DRQ);
+		sdc->max_request = DMA_CHAN_MAX_DRQ;
+	}
+
+	/*
+	 * If the number of vchans is not specified, derive it from the
+	 * highest port number, at most one channel per port and direction.
+	 */
+	if (!sdc->num_vchans)
+		sdc->num_vchans = 2 * (sdc->max_request + 1);
+
+	sdc->pchans = devm_kcalloc(&pdev->dev, sdc->num_pchans,
+				   sizeof(struct sun6i_pchan), GFP_KERNEL);
+	if (!sdc->pchans)
+		return -ENOMEM;
+
+	sdc->vchans = devm_kcalloc(&pdev->dev, sdc->num_vchans,
+				   sizeof(struct sun6i_vchan), GFP_KERNEL);
+	if (!sdc->vchans)
+		return -ENOMEM;
+
+	tasklet_init(&sdc->task, sun6i_dma_tasklet, (unsigned long)sdc);
+
+	for (i = 0; i < sdc->num_pchans; i++) {
+		struct sun6i_pchan *pchan = &sdc->pchans[i];
+
+		pchan->idx = i;
+		pchan->base = sdc->base + 0x100 + i * 0x40;
+	}
+
+	for (i = 0; i < sdc->num_vchans; i++) {
+		struct sun6i_vchan *vchan = &sdc->vchans[i];
+
+		INIT_LIST_HEAD(&vchan->node);
+		vchan->vc.desc_free = sun6i_dma_free_desc;
+		vchan_init(&vchan->vc, &sdc->slave);
+	}
+
+	ret = reset_control_deassert(sdc->rstc);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't deassert the device from reset\n");
+		goto err_chan_free;
+	}
+
+	ret = clk_prepare_enable(sdc->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't enable the clock\n");
+		goto err_reset_assert;
+	}
+
+	ret = devm_request_irq(&pdev->dev, sdc->irq, sun6i_dma_interrupt, 0,
+			       dev_name(&pdev->dev), sdc);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot request IRQ\n");
+		goto err_clk_disable;
+	}
+
+	ret = dma_async_device_register(&sdc->slave);
+	if (ret) {
+		dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+		goto err_irq_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node, sun6i_dma_of_xlate,
+					 sdc);
+	if (ret) {
+		dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+		goto err_dma_unregister;
+	}
+
+	if (sdc->cfg->clock_autogate_enable)
+		sdc->cfg->clock_autogate_enable(sdc);
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&sdc->slave);
+err_irq_disable:
+	sun6i_kill_tasklet(sdc);
+err_clk_disable:
+	clk_disable_unprepare(sdc->clk);
+err_reset_assert:
+	reset_control_assert(sdc->rstc);
+err_chan_free:
+	sun6i_dma_free(sdc);
+	return ret;
+}
+
+static int sun6i_dma_remove(struct platform_device *pdev)
+{
+	struct sun6i_dma_dev *sdc = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&sdc->slave);
+
+	sun6i_kill_tasklet(sdc);
+
+	clk_disable_unprepare(sdc->clk);
+	reset_control_assert(sdc->rstc);
+
+	sun6i_dma_free(sdc);
+
+	return 0;
+}
+
+static struct platform_driver sun6i_dma_driver = {
+	.probe		= sun6i_dma_probe,
+	.remove		= sun6i_dma_remove,
+	.driver = {
+		.name		= "sun6i-dma",
+		.of_match_table	= sun6i_dma_match,
+	},
+};
+module_platform_driver(sun6i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A31 DMA Controller Driver");
+MODULE_AUTHOR("Sugar <shuge@allwinnertech.com>");
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
new file mode 100644
index 0000000..9a558e3
--- /dev/null
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -0,0 +1,1609 @@
+/*
+ * DMA driver for Nvidia's Tegra20 APB DMA controller.
+ *
+ * Copyright (c) 2012-2013, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+
+#define TEGRA_APBDMA_GENERAL			0x0
+#define TEGRA_APBDMA_GENERAL_ENABLE		BIT(31)
+
+#define TEGRA_APBDMA_CONTROL			0x010
+#define TEGRA_APBDMA_IRQ_MASK			0x01c
+#define TEGRA_APBDMA_IRQ_MASK_SET		0x020
+
+/* CSR register */
+#define TEGRA_APBDMA_CHAN_CSR			0x00
+#define TEGRA_APBDMA_CSR_ENB			BIT(31)
+#define TEGRA_APBDMA_CSR_IE_EOC			BIT(30)
+#define TEGRA_APBDMA_CSR_HOLD			BIT(29)
+#define TEGRA_APBDMA_CSR_DIR			BIT(28)
+#define TEGRA_APBDMA_CSR_ONCE			BIT(27)
+#define TEGRA_APBDMA_CSR_FLOW			BIT(21)
+#define TEGRA_APBDMA_CSR_REQ_SEL_SHIFT		16
+#define TEGRA_APBDMA_CSR_REQ_SEL_MASK		0x1F
+#define TEGRA_APBDMA_CSR_WCOUNT_MASK		0xFFFC
+
+/* STATUS register */
+#define TEGRA_APBDMA_CHAN_STATUS		0x004
+#define TEGRA_APBDMA_STATUS_BUSY		BIT(31)
+#define TEGRA_APBDMA_STATUS_ISE_EOC		BIT(30)
+#define TEGRA_APBDMA_STATUS_HALT		BIT(29)
+#define TEGRA_APBDMA_STATUS_PING_PONG		BIT(28)
+#define TEGRA_APBDMA_STATUS_COUNT_SHIFT		2
+#define TEGRA_APBDMA_STATUS_COUNT_MASK		0xFFFC
+
+#define TEGRA_APBDMA_CHAN_CSRE			0x00C
+#define TEGRA_APBDMA_CHAN_CSRE_PAUSE		(1 << 31)
+
+/* AHB memory address */
+#define TEGRA_APBDMA_CHAN_AHBPTR		0x010
+
+/* AHB sequence register */
+#define TEGRA_APBDMA_CHAN_AHBSEQ		0x14
+#define TEGRA_APBDMA_AHBSEQ_INTR_ENB		BIT(31)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_8		(0 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_16	(1 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32	(2 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_64	(3 << 28)
+#define TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_128	(4 << 28)
+#define TEGRA_APBDMA_AHBSEQ_DATA_SWAP		BIT(27)
+#define TEGRA_APBDMA_AHBSEQ_BURST_1		(4 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_4		(5 << 24)
+#define TEGRA_APBDMA_AHBSEQ_BURST_8		(6 << 24)
+#define TEGRA_APBDMA_AHBSEQ_DBL_BUF		BIT(19)
+#define TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT		16
+#define TEGRA_APBDMA_AHBSEQ_WRAP_NONE		0
+
+/* APB address */
+#define TEGRA_APBDMA_CHAN_APBPTR		0x018
+
+/* APB sequence register */
+#define TEGRA_APBDMA_CHAN_APBSEQ		0x01c
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8		(0 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16	(1 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32	(2 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64	(3 << 28)
+#define TEGRA_APBDMA_APBSEQ_BUS_WIDTH_128	(4 << 28)
+#define TEGRA_APBDMA_APBSEQ_DATA_SWAP		BIT(27)
+#define TEGRA_APBDMA_APBSEQ_WRAP_WORD_1		(1 << 16)
+
+/* Tegra148 specific registers */
+#define TEGRA_APBDMA_CHAN_WCOUNT		0x20
+
+#define TEGRA_APBDMA_CHAN_WORD_TRANSFER		0x24
+
+/*
+ * If any burst is in flight and DMA paused then this is the time to complete
+ * on-flight burst and update DMA status register.
+ */
+#define TEGRA_APBDMA_BURST_COMPLETE_TIME	20
+
+/* Channel base address offset from APBDMA base address */
+#define TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET	0x1000
+
+#define TEGRA_APBDMA_SLAVE_ID_INVALID	(TEGRA_APBDMA_CSR_REQ_SEL_MASK + 1)
+
+struct tegra_dma;
+
+/*
+ * tegra_dma_chip_data Tegra chip specific DMA data
+ * @nr_channels: Number of channels available in the controller.
+ * @channel_reg_size: Channel register size/stride.
+ * @max_dma_count: Maximum DMA transfer count supported by DMA controller.
+ * @support_channel_pause: Support channel wise pause of dma.
+ * @support_separate_wcount_reg: Support separate word count register.
+ */
+struct tegra_dma_chip_data {
+	int nr_channels;
+	int channel_reg_size;
+	int max_dma_count;
+	bool support_channel_pause;
+	bool support_separate_wcount_reg;
+};
+
+/* DMA channel registers */
+struct tegra_dma_channel_regs {
+	unsigned long	csr;
+	unsigned long	ahb_ptr;
+	unsigned long	apb_ptr;
+	unsigned long	ahb_seq;
+	unsigned long	apb_seq;
+	unsigned long	wcount;
+};
+
+/*
+ * tegra_dma_sg_req: Dma request details to configure hardware. This
+ * contains the details for one transfer to configure DMA hw.
+ * The client's request for data transfer can be broken into multiple
+ * sub-transfer as per requester details and hw support.
+ * This sub transfer get added in the list of transfer and point to Tegra
+ * DMA descriptor which manages the transfer details.
+ */
+struct tegra_dma_sg_req {
+	struct tegra_dma_channel_regs	ch_regs;
+	int				req_len;
+	bool				configured;
+	bool				last_sg;
+	struct list_head		node;
+	struct tegra_dma_desc		*dma_desc;
+};
+
+/*
+ * tegra_dma_desc: Tegra DMA descriptors which manages the client requests.
+ * This descriptor keep track of transfer status, callbacks and request
+ * counts etc.
+ */
+struct tegra_dma_desc {
+	struct dma_async_tx_descriptor	txd;
+	int				bytes_requested;
+	int				bytes_transferred;
+	enum dma_status			dma_status;
+	struct list_head		node;
+	struct list_head		tx_list;
+	struct list_head		cb_node;
+	int				cb_count;
+};
+
+struct tegra_dma_channel;
+
+typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
+				bool to_terminate);
+
+/* tegra_dma_channel: Channel specific information */
+struct tegra_dma_channel {
+	struct dma_chan		dma_chan;
+	char			name[30];
+	bool			config_init;
+	int			id;
+	int			irq;
+	void __iomem		*chan_addr;
+	spinlock_t		lock;
+	bool			busy;
+	struct tegra_dma	*tdma;
+	bool			cyclic;
+
+	/* Different lists for managing the requests */
+	struct list_head	free_sg_req;
+	struct list_head	pending_sg_req;
+	struct list_head	free_dma_desc;
+	struct list_head	cb_desc;
+
+	/* ISR handler and tasklet for bottom half of isr handling */
+	dma_isr_handler		isr_handler;
+	struct tasklet_struct	tasklet;
+
+	/* Channel-slave specific configuration */
+	unsigned int slave_id;
+	struct dma_slave_config dma_sconfig;
+	struct tegra_dma_channel_regs	channel_reg;
+};
+
+/* tegra_dma: Tegra DMA specific information */
+struct tegra_dma {
+	struct dma_device		dma_dev;
+	struct device			*dev;
+	struct clk			*dma_clk;
+	struct reset_control		*rst;
+	spinlock_t			global_lock;
+	void __iomem			*base_addr;
+	const struct tegra_dma_chip_data *chip_data;
+
+	/*
+	 * Counter for managing global pausing of the DMA controller.
+	 * Only applicable for devices that don't support individual
+	 * channel pausing.
+	 */
+	u32				global_pause_count;
+
+	/* Some register need to be cache before suspend */
+	u32				reg_gen;
+
+	/* Last member of the structure */
+	struct tegra_dma_channel channels[0];
+};
+
+static inline void tdma_write(struct tegra_dma *tdma, u32 reg, u32 val)
+{
+	writel(val, tdma->base_addr + reg);
+}
+
+static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg)
+{
+	return readl(tdma->base_addr + reg);
+}
+
+static inline void tdc_write(struct tegra_dma_channel *tdc,
+		u32 reg, u32 val)
+{
+	writel(val, tdc->chan_addr + reg);
+}
+
+static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg)
+{
+	return readl(tdc->chan_addr + reg);
+}
+
+static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc)
+{
+	return container_of(dc, struct tegra_dma_channel, dma_chan);
+}
+
+static inline struct tegra_dma_desc *txd_to_tegra_dma_desc(
+		struct dma_async_tx_descriptor *td)
+{
+	return container_of(td, struct tegra_dma_desc, txd);
+}
+
+static inline struct device *tdc2dev(struct tegra_dma_channel *tdc)
+{
+	return &tdc->dma_chan.dev->device;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *tx);
+static int tegra_dma_runtime_suspend(struct device *dev);
+static int tegra_dma_runtime_resume(struct device *dev);
+
+/* Get DMA desc from free list, if not there then allocate it.  */
+static struct tegra_dma_desc *tegra_dma_desc_get(
+		struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	/* Do not allocate if desc are waiting for ack */
+	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+		if (async_tx_test_ack(&dma_desc->txd)) {
+			list_del(&dma_desc->node);
+			spin_unlock_irqrestore(&tdc->lock, flags);
+			dma_desc->txd.flags = 0;
+			return dma_desc;
+		}
+	}
+
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	/* Allocate DMA desc */
+	dma_desc = kzalloc(sizeof(*dma_desc), GFP_NOWAIT);
+	if (!dma_desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&dma_desc->txd, &tdc->dma_chan);
+	dma_desc->txd.tx_submit = tegra_dma_tx_submit;
+	dma_desc->txd.flags = 0;
+	return dma_desc;
+}
+
+static void tegra_dma_desc_put(struct tegra_dma_channel *tdc,
+		struct tegra_dma_desc *dma_desc)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (!list_empty(&dma_desc->tx_list))
+		list_splice_init(&dma_desc->tx_list, &tdc->free_sg_req);
+	list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static struct tegra_dma_sg_req *tegra_dma_sg_req_get(
+		struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sg_req = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (!list_empty(&tdc->free_sg_req)) {
+		sg_req = list_first_entry(&tdc->free_sg_req,
+					typeof(*sg_req), node);
+		list_del(&sg_req->node);
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return sg_req;
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	sg_req = kzalloc(sizeof(struct tegra_dma_sg_req), GFP_NOWAIT);
+
+	return sg_req;
+}
+
+static int tegra_dma_slave_config(struct dma_chan *dc,
+		struct dma_slave_config *sconfig)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+
+	if (!list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "Configuration not allowed\n");
+		return -EBUSY;
+	}
+
+	memcpy(&tdc->dma_sconfig, sconfig, sizeof(*sconfig));
+	if (tdc->slave_id == TEGRA_APBDMA_SLAVE_ID_INVALID &&
+	    sconfig->device_fc) {
+		if (sconfig->slave_id > TEGRA_APBDMA_CSR_REQ_SEL_MASK)
+			return -EINVAL;
+		tdc->slave_id = sconfig->slave_id;
+	}
+	tdc->config_init = true;
+	return 0;
+}
+
+static void tegra_dma_global_pause(struct tegra_dma_channel *tdc,
+	bool wait_for_burst_complete)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	spin_lock(&tdma->global_lock);
+
+	if (tdc->tdma->global_pause_count == 0) {
+		tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0);
+		if (wait_for_burst_complete)
+			udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+	}
+
+	tdc->tdma->global_pause_count++;
+
+	spin_unlock(&tdma->global_lock);
+}
+
+static void tegra_dma_global_resume(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	spin_lock(&tdma->global_lock);
+
+	if (WARN_ON(tdc->tdma->global_pause_count == 0))
+		goto out;
+
+	if (--tdc->tdma->global_pause_count == 0)
+		tdma_write(tdma, TEGRA_APBDMA_GENERAL,
+			   TEGRA_APBDMA_GENERAL_ENABLE);
+
+out:
+	spin_unlock(&tdma->global_lock);
+}
+
+static void tegra_dma_pause(struct tegra_dma_channel *tdc,
+	bool wait_for_burst_complete)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	if (tdma->chip_data->support_channel_pause) {
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE,
+				TEGRA_APBDMA_CHAN_CSRE_PAUSE);
+		if (wait_for_burst_complete)
+			udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+	} else {
+		tegra_dma_global_pause(tdc, wait_for_burst_complete);
+	}
+}
+
+static void tegra_dma_resume(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma *tdma = tdc->tdma;
+
+	if (tdma->chip_data->support_channel_pause) {
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_CSRE, 0);
+	} else {
+		tegra_dma_global_resume(tdc);
+	}
+}
+
+static void tegra_dma_stop(struct tegra_dma_channel *tdc)
+{
+	u32 csr;
+	u32 status;
+
+	/* Disable interrupts */
+	csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
+	csr &= ~TEGRA_APBDMA_CSR_IE_EOC;
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+	/* Disable DMA */
+	csr &= ~TEGRA_APBDMA_CSR_ENB;
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, csr);
+
+	/* Clear interrupt status if it is there */
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_dbg(tdc2dev(tdc), "%s():clearing interrupt\n", __func__);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+	}
+	tdc->busy = false;
+}
+
+static void tegra_dma_start(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *sg_req)
+{
+	struct tegra_dma_channel_regs *ch_regs = &sg_req->ch_regs;
+
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR, ch_regs->csr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_regs->apb_seq);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_regs->apb_ptr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_regs->ahb_seq);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_regs->ahb_ptr);
+	if (tdc->tdma->chip_data->support_separate_wcount_reg)
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT, ch_regs->wcount);
+
+	/* Start DMA */
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+				ch_regs->csr | TEGRA_APBDMA_CSR_ENB);
+}
+
+static void tegra_dma_configure_for_next(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *nsg_req)
+{
+	unsigned long status;
+
+	/*
+	 * The DMA controller reloads the new configuration for next transfer
+	 * after last burst of current transfer completes.
+	 * If there is no IEC status then this makes sure that last burst
+	 * has not be completed. There may be case that last burst is on
+	 * flight and so it can complete but because DMA is paused, it
+	 * will not generates interrupt as well as not reload the new
+	 * configuration.
+	 * If there is already IEC status then interrupt handler need to
+	 * load new configuration.
+	 */
+	tegra_dma_pause(tdc, false);
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+
+	/*
+	 * If interrupt is pending then do nothing as the ISR will handle
+	 * the programing for new request.
+	 */
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_err(tdc2dev(tdc),
+			"Skipping new configuration as interrupt is pending\n");
+		tegra_dma_resume(tdc);
+		return;
+	}
+
+	/* Safe to program new configuration */
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, nsg_req->ch_regs.apb_ptr);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, nsg_req->ch_regs.ahb_ptr);
+	if (tdc->tdma->chip_data->support_separate_wcount_reg)
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
+						nsg_req->ch_regs.wcount);
+	tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+				nsg_req->ch_regs.csr | TEGRA_APBDMA_CSR_ENB);
+	nsg_req->configured = true;
+
+	tegra_dma_resume(tdc);
+}
+
+static void tdc_start_head_req(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sg_req;
+
+	if (list_empty(&tdc->pending_sg_req))
+		return;
+
+	sg_req = list_first_entry(&tdc->pending_sg_req,
+					typeof(*sg_req), node);
+	tegra_dma_start(tdc, sg_req);
+	sg_req->configured = true;
+	tdc->busy = true;
+}
+
+static void tdc_configure_next_head_desc(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *hsgreq;
+	struct tegra_dma_sg_req *hnsgreq;
+
+	if (list_empty(&tdc->pending_sg_req))
+		return;
+
+	hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+	if (!list_is_last(&hsgreq->node, &tdc->pending_sg_req)) {
+		hnsgreq = list_first_entry(&hsgreq->node,
+					typeof(*hnsgreq), node);
+		tegra_dma_configure_for_next(tdc, hnsgreq);
+	}
+}
+
+static inline int get_current_xferred_count(struct tegra_dma_channel *tdc,
+	struct tegra_dma_sg_req *sg_req, unsigned long status)
+{
+	return sg_req->req_len - (status & TEGRA_APBDMA_STATUS_COUNT_MASK) - 4;
+}
+
+static void tegra_dma_abort_all(struct tegra_dma_channel *tdc)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+
+	while (!list_empty(&tdc->pending_sg_req)) {
+		sgreq = list_first_entry(&tdc->pending_sg_req,
+						typeof(*sgreq), node);
+		list_move_tail(&sgreq->node, &tdc->free_sg_req);
+		if (sgreq->last_sg) {
+			dma_desc = sgreq->dma_desc;
+			dma_desc->dma_status = DMA_ERROR;
+			list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+
+			/* Add in cb list if it is not there. */
+			if (!dma_desc->cb_count)
+				list_add_tail(&dma_desc->cb_node,
+							&tdc->cb_desc);
+			dma_desc->cb_count++;
+		}
+	}
+	tdc->isr_handler = NULL;
+}
+
+static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
+		struct tegra_dma_sg_req *last_sg_req, bool to_terminate)
+{
+	struct tegra_dma_sg_req *hsgreq = NULL;
+
+	if (list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "Dma is running without req\n");
+		tegra_dma_stop(tdc);
+		return false;
+	}
+
+	/*
+	 * Check that head req on list should be in flight.
+	 * If it is not in flight then abort transfer as
+	 * looping of transfer can not continue.
+	 */
+	hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
+	if (!hsgreq->configured) {
+		tegra_dma_stop(tdc);
+		dev_err(tdc2dev(tdc), "Error in dma transfer, aborting dma\n");
+		tegra_dma_abort_all(tdc);
+		return false;
+	}
+
+	/* Configure next request */
+	if (!to_terminate)
+		tdc_configure_next_head_desc(tdc);
+	return true;
+}
+
+static void handle_once_dma_done(struct tegra_dma_channel *tdc,
+	bool to_terminate)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+
+	tdc->busy = false;
+	sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+	dma_desc = sgreq->dma_desc;
+	dma_desc->bytes_transferred += sgreq->req_len;
+
+	list_del(&sgreq->node);
+	if (sgreq->last_sg) {
+		dma_desc->dma_status = DMA_COMPLETE;
+		dma_cookie_complete(&dma_desc->txd);
+		if (!dma_desc->cb_count)
+			list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+		dma_desc->cb_count++;
+		list_add_tail(&dma_desc->node, &tdc->free_dma_desc);
+	}
+	list_add_tail(&sgreq->node, &tdc->free_sg_req);
+
+	/* Do not start DMA if it is going to be terminate */
+	if (to_terminate || list_empty(&tdc->pending_sg_req))
+		return;
+
+	tdc_start_head_req(tdc);
+}
+
+static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
+		bool to_terminate)
+{
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+	bool st;
+
+	sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
+	dma_desc = sgreq->dma_desc;
+	dma_desc->bytes_transferred += sgreq->req_len;
+
+	/* Callback need to be call */
+	if (!dma_desc->cb_count)
+		list_add_tail(&dma_desc->cb_node, &tdc->cb_desc);
+	dma_desc->cb_count++;
+
+	/* If not last req then put at end of pending list */
+	if (!list_is_last(&sgreq->node, &tdc->pending_sg_req)) {
+		list_move_tail(&sgreq->node, &tdc->pending_sg_req);
+		sgreq->configured = false;
+		st = handle_continuous_head_request(tdc, sgreq, to_terminate);
+		if (!st)
+			dma_desc->dma_status = DMA_ERROR;
+	}
+}
+
+static void tegra_dma_tasklet(unsigned long data)
+{
+	struct tegra_dma_channel *tdc = (struct tegra_dma_channel *)data;
+	struct dmaengine_desc_callback cb;
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+	int cb_count;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	while (!list_empty(&tdc->cb_desc)) {
+		dma_desc  = list_first_entry(&tdc->cb_desc,
+					typeof(*dma_desc), cb_node);
+		list_del(&dma_desc->cb_node);
+		dmaengine_desc_get_callback(&dma_desc->txd, &cb);
+		cb_count = dma_desc->cb_count;
+		dma_desc->cb_count = 0;
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		while (cb_count--)
+			dmaengine_desc_callback_invoke(&cb, NULL);
+		spin_lock_irqsave(&tdc->lock, flags);
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
+{
+	struct tegra_dma_channel *tdc = dev_id;
+	unsigned long status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
+		tdc->isr_handler(tdc, false);
+		tasklet_schedule(&tdc->tasklet);
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return IRQ_HANDLED;
+	}
+
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	dev_info(tdc2dev(tdc),
+		"Interrupt already served status 0x%08lx\n", status);
+	return IRQ_NONE;
+}
+
+static dma_cookie_t tegra_dma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct tegra_dma_desc *dma_desc = txd_to_tegra_dma_desc(txd);
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(txd->chan);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	dma_desc->dma_status = DMA_IN_PROGRESS;
+	cookie = dma_cookie_assign(&dma_desc->txd);
+	list_splice_tail_init(&dma_desc->tx_list, &tdc->pending_sg_req);
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return cookie;
+}
+
+static void tegra_dma_issue_pending(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (list_empty(&tdc->pending_sg_req)) {
+		dev_err(tdc2dev(tdc), "No DMA request\n");
+		goto end;
+	}
+	if (!tdc->busy) {
+		tdc_start_head_req(tdc);
+
+		/* Continuous single mode: Configure next req */
+		if (tdc->cyclic) {
+			/*
+			 * Wait for 1 burst time for configure DMA for
+			 * next transfer.
+			 */
+			udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+			tdc_configure_next_head_desc(tdc);
+		}
+	}
+end:
+	spin_unlock_irqrestore(&tdc->lock, flags);
+}
+
+static int tegra_dma_terminate_all(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_sg_req *sgreq;
+	struct tegra_dma_desc *dma_desc;
+	unsigned long flags;
+	unsigned long status;
+	unsigned long wcount;
+	bool was_busy;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	if (list_empty(&tdc->pending_sg_req)) {
+		spin_unlock_irqrestore(&tdc->lock, flags);
+		return 0;
+	}
+
+	if (!tdc->busy)
+		goto skip_dma_stop;
+
+	/* Pause DMA before checking the queue status */
+	tegra_dma_pause(tdc, true);
+
+	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
+		dev_dbg(tdc2dev(tdc), "%s():handling isr\n", __func__);
+		tdc->isr_handler(tdc, true);
+		status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
+	}
+	if (tdc->tdma->chip_data->support_separate_wcount_reg)
+		wcount = tdc_read(tdc, TEGRA_APBDMA_CHAN_WORD_TRANSFER);
+	else
+		wcount = status;
+
+	was_busy = tdc->busy;
+	tegra_dma_stop(tdc);
+
+	if (!list_empty(&tdc->pending_sg_req) && was_busy) {
+		sgreq = list_first_entry(&tdc->pending_sg_req,
+					typeof(*sgreq), node);
+		sgreq->dma_desc->bytes_transferred +=
+				get_current_xferred_count(tdc, sgreq, wcount);
+	}
+	tegra_dma_resume(tdc);
+
+skip_dma_stop:
+	tegra_dma_abort_all(tdc);
+
+	while (!list_empty(&tdc->cb_desc)) {
+		dma_desc  = list_first_entry(&tdc->cb_desc,
+					typeof(*dma_desc), cb_node);
+		list_del(&dma_desc->cb_node);
+		dma_desc->cb_count = 0;
+	}
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return 0;
+}
+
+static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc;
+	struct tegra_dma_sg_req *sg_req;
+	enum dma_status ret;
+	unsigned long flags;
+	unsigned int residual;
+
+	ret = dma_cookie_status(dc, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_irqsave(&tdc->lock, flags);
+
+	/* Check on wait_ack desc status */
+	list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) {
+		if (dma_desc->txd.cookie == cookie) {
+			ret = dma_desc->dma_status;
+			goto found;
+		}
+	}
+
+	/* Check in pending list */
+	list_for_each_entry(sg_req, &tdc->pending_sg_req, node) {
+		dma_desc = sg_req->dma_desc;
+		if (dma_desc->txd.cookie == cookie) {
+			ret = dma_desc->dma_status;
+			goto found;
+		}
+	}
+
+	dev_dbg(tdc2dev(tdc), "cookie %d not found\n", cookie);
+	dma_desc = NULL;
+
+found:
+	if (dma_desc && txstate) {
+		residual = dma_desc->bytes_requested -
+			   (dma_desc->bytes_transferred %
+			    dma_desc->bytes_requested);
+		dma_set_residue(txstate, residual);
+	}
+
+	spin_unlock_irqrestore(&tdc->lock, flags);
+	return ret;
+}
+
+static inline int get_bus_width(struct tegra_dma_channel *tdc,
+		enum dma_slave_buswidth slave_bw)
+{
+	switch (slave_bw) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_8;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_16;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_64;
+	default:
+		dev_warn(tdc2dev(tdc),
+			"slave bw is not supported, using 32bits\n");
+		return TEGRA_APBDMA_APBSEQ_BUS_WIDTH_32;
+	}
+}
+
+static inline int get_burst_size(struct tegra_dma_channel *tdc,
+	u32 burst_size, enum dma_slave_buswidth slave_bw, int len)
+{
+	int burst_byte;
+	int burst_ahb_width;
+
+	/*
+	 * burst_size from client is in terms of the bus_width.
+	 * convert them into AHB memory width which is 4 byte.
+	 */
+	burst_byte = burst_size * slave_bw;
+	burst_ahb_width = burst_byte / 4;
+
+	/* If burst size is 0 then calculate the burst size based on length */
+	if (!burst_ahb_width) {
+		if (len & 0xF)
+			return TEGRA_APBDMA_AHBSEQ_BURST_1;
+		else if ((len >> 4) & 0x1)
+			return TEGRA_APBDMA_AHBSEQ_BURST_4;
+		else
+			return TEGRA_APBDMA_AHBSEQ_BURST_8;
+	}
+	if (burst_ahb_width < 4)
+		return TEGRA_APBDMA_AHBSEQ_BURST_1;
+	else if (burst_ahb_width < 8)
+		return TEGRA_APBDMA_AHBSEQ_BURST_4;
+	else
+		return TEGRA_APBDMA_AHBSEQ_BURST_8;
+}
+
+static int get_transfer_param(struct tegra_dma_channel *tdc,
+	enum dma_transfer_direction direction, unsigned long *apb_addr,
+	unsigned long *apb_seq,	unsigned long *csr, unsigned int *burst_size,
+	enum dma_slave_buswidth *slave_bw)
+{
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		*apb_addr = tdc->dma_sconfig.dst_addr;
+		*apb_seq = get_bus_width(tdc, tdc->dma_sconfig.dst_addr_width);
+		*burst_size = tdc->dma_sconfig.dst_maxburst;
+		*slave_bw = tdc->dma_sconfig.dst_addr_width;
+		*csr = TEGRA_APBDMA_CSR_DIR;
+		return 0;
+
+	case DMA_DEV_TO_MEM:
+		*apb_addr = tdc->dma_sconfig.src_addr;
+		*apb_seq = get_bus_width(tdc, tdc->dma_sconfig.src_addr_width);
+		*burst_size = tdc->dma_sconfig.src_maxburst;
+		*slave_bw = tdc->dma_sconfig.src_addr_width;
+		*csr = 0;
+		return 0;
+
+	default:
+		dev_err(tdc2dev(tdc), "Dma direction is not supported\n");
+		return -EINVAL;
+	}
+	return -EINVAL;
+}
+
+static void tegra_dma_prep_wcount(struct tegra_dma_channel *tdc,
+	struct tegra_dma_channel_regs *ch_regs, u32 len)
+{
+	u32 len_field = (len - 4) & 0xFFFC;
+
+	if (tdc->tdma->chip_data->support_separate_wcount_reg)
+		ch_regs->wcount = len_field;
+	else
+		ch_regs->csr |= len_field;
+}
+
+static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
+	struct dma_chan *dc, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc;
+	unsigned int i;
+	struct scatterlist *sg;
+	unsigned long csr, ahb_seq, apb_ptr, apb_seq;
+	struct list_head req_list;
+	struct tegra_dma_sg_req  *sg_req = NULL;
+	u32 burst_size;
+	enum dma_slave_buswidth slave_bw;
+
+	if (!tdc->config_init) {
+		dev_err(tdc2dev(tdc), "dma channel is not configured\n");
+		return NULL;
+	}
+	if (sg_len < 1) {
+		dev_err(tdc2dev(tdc), "Invalid segment length %d\n", sg_len);
+		return NULL;
+	}
+
+	if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw) < 0)
+		return NULL;
+
+	INIT_LIST_HEAD(&req_list);
+
+	ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+					TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+	csr |= TEGRA_APBDMA_CSR_ONCE;
+
+	if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) {
+		csr |= TEGRA_APBDMA_CSR_FLOW;
+		csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		csr |= TEGRA_APBDMA_CSR_IE_EOC;
+
+	apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+	dma_desc = tegra_dma_desc_get(tdc);
+	if (!dma_desc) {
+		dev_err(tdc2dev(tdc), "Dma descriptors not available\n");
+		return NULL;
+	}
+	INIT_LIST_HEAD(&dma_desc->tx_list);
+	INIT_LIST_HEAD(&dma_desc->cb_node);
+	dma_desc->cb_count = 0;
+	dma_desc->bytes_requested = 0;
+	dma_desc->bytes_transferred = 0;
+	dma_desc->dma_status = DMA_IN_PROGRESS;
+
+	/* Make transfer requests */
+	for_each_sg(sgl, sg, sg_len, i) {
+		u32 len, mem;
+
+		mem = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		if ((len & 3) || (mem & 3) ||
+				(len > tdc->tdma->chip_data->max_dma_count)) {
+			dev_err(tdc2dev(tdc),
+				"Dma length/memory address is not supported\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		sg_req = tegra_dma_sg_req_get(tdc);
+		if (!sg_req) {
+			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+		dma_desc->bytes_requested += len;
+
+		sg_req->ch_regs.apb_ptr = apb_ptr;
+		sg_req->ch_regs.ahb_ptr = mem;
+		sg_req->ch_regs.csr = csr;
+		tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len);
+		sg_req->ch_regs.apb_seq = apb_seq;
+		sg_req->ch_regs.ahb_seq = ahb_seq;
+		sg_req->configured = false;
+		sg_req->last_sg = false;
+		sg_req->dma_desc = dma_desc;
+		sg_req->req_len = len;
+
+		list_add_tail(&sg_req->node, &dma_desc->tx_list);
+	}
+	sg_req->last_sg = true;
+	if (flags & DMA_CTRL_ACK)
+		dma_desc->txd.flags = DMA_CTRL_ACK;
+
+	/*
+	 * Make sure that mode should not be conflicting with currently
+	 * configured mode.
+	 */
+	if (!tdc->isr_handler) {
+		tdc->isr_handler = handle_once_dma_done;
+		tdc->cyclic = false;
+	} else {
+		if (tdc->cyclic) {
+			dev_err(tdc2dev(tdc), "DMA configured in cyclic mode\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+	}
+
+	return &dma_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
+	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma_desc *dma_desc = NULL;
+	struct tegra_dma_sg_req *sg_req = NULL;
+	unsigned long csr, ahb_seq, apb_ptr, apb_seq;
+	int len;
+	size_t remain_len;
+	dma_addr_t mem = buf_addr;
+	u32 burst_size;
+	enum dma_slave_buswidth slave_bw;
+
+	if (!buf_len || !period_len) {
+		dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (!tdc->config_init) {
+		dev_err(tdc2dev(tdc), "DMA slave is not configured\n");
+		return NULL;
+	}
+
+	/*
+	 * We allow to take more number of requests till DMA is
+	 * not started. The driver will loop over all requests.
+	 * Once DMA is started then new requests can be queued only after
+	 * terminating the DMA.
+	 */
+	if (tdc->busy) {
+		dev_err(tdc2dev(tdc), "Request not allowed when dma running\n");
+		return NULL;
+	}
+
+	/*
+	 * We only support cycle transfer when buf_len is multiple of
+	 * period_len.
+	 */
+	if (buf_len % period_len) {
+		dev_err(tdc2dev(tdc), "buf_len is not multiple of period_len\n");
+		return NULL;
+	}
+
+	len = period_len;
+	if ((len & 3) || (buf_addr & 3) ||
+			(len > tdc->tdma->chip_data->max_dma_count)) {
+		dev_err(tdc2dev(tdc), "Req len/mem address is not correct\n");
+		return NULL;
+	}
+
+	if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw) < 0)
+		return NULL;
+
+	ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
+					TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
+	ahb_seq |= TEGRA_APBDMA_AHBSEQ_BUS_WIDTH_32;
+
+	if (tdc->slave_id != TEGRA_APBDMA_SLAVE_ID_INVALID) {
+		csr |= TEGRA_APBDMA_CSR_FLOW;
+		csr |= tdc->slave_id << TEGRA_APBDMA_CSR_REQ_SEL_SHIFT;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		csr |= TEGRA_APBDMA_CSR_IE_EOC;
+
+	apb_seq |= TEGRA_APBDMA_APBSEQ_WRAP_WORD_1;
+
+	dma_desc = tegra_dma_desc_get(tdc);
+	if (!dma_desc) {
+		dev_err(tdc2dev(tdc), "not enough descriptors available\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&dma_desc->tx_list);
+	INIT_LIST_HEAD(&dma_desc->cb_node);
+	dma_desc->cb_count = 0;
+
+	dma_desc->bytes_transferred = 0;
+	dma_desc->bytes_requested = buf_len;
+	remain_len = buf_len;
+
+	/* Split transfer equal to period size */
+	while (remain_len) {
+		sg_req = tegra_dma_sg_req_get(tdc);
+		if (!sg_req) {
+			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+
+		ahb_seq |= get_burst_size(tdc, burst_size, slave_bw, len);
+		sg_req->ch_regs.apb_ptr = apb_ptr;
+		sg_req->ch_regs.ahb_ptr = mem;
+		sg_req->ch_regs.csr = csr;
+		tegra_dma_prep_wcount(tdc, &sg_req->ch_regs, len);
+		sg_req->ch_regs.apb_seq = apb_seq;
+		sg_req->ch_regs.ahb_seq = ahb_seq;
+		sg_req->configured = false;
+		sg_req->last_sg = false;
+		sg_req->dma_desc = dma_desc;
+		sg_req->req_len = len;
+
+		list_add_tail(&sg_req->node, &dma_desc->tx_list);
+		remain_len -= len;
+		mem += len;
+	}
+	sg_req->last_sg = true;
+	if (flags & DMA_CTRL_ACK)
+		dma_desc->txd.flags = DMA_CTRL_ACK;
+
+	/*
+	 * Make sure that mode should not be conflicting with currently
+	 * configured mode.
+	 */
+	if (!tdc->isr_handler) {
+		tdc->isr_handler = handle_cont_sngl_cycle_dma_done;
+		tdc->cyclic = true;
+	} else {
+		if (!tdc->cyclic) {
+			dev_err(tdc2dev(tdc), "DMA configuration conflict\n");
+			tegra_dma_desc_put(tdc, dma_desc);
+			return NULL;
+		}
+	}
+
+	return &dma_desc->txd;
+}
+
+static int tegra_dma_alloc_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma *tdma = tdc->tdma;
+	int ret;
+
+	dma_cookie_init(&tdc->dma_chan);
+	tdc->config_init = false;
+
+	ret = pm_runtime_get_sync(tdma->dev);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static void tegra_dma_free_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_dma_channel *tdc = to_tegra_dma_chan(dc);
+	struct tegra_dma *tdma = tdc->tdma;
+	struct tegra_dma_desc *dma_desc;
+	struct tegra_dma_sg_req *sg_req;
+	struct list_head dma_desc_list;
+	struct list_head sg_req_list;
+	unsigned long flags;
+
+	INIT_LIST_HEAD(&dma_desc_list);
+	INIT_LIST_HEAD(&sg_req_list);
+
+	dev_dbg(tdc2dev(tdc), "Freeing channel %d\n", tdc->id);
+
+	if (tdc->busy)
+		tegra_dma_terminate_all(dc);
+
+	spin_lock_irqsave(&tdc->lock, flags);
+	list_splice_init(&tdc->pending_sg_req, &sg_req_list);
+	list_splice_init(&tdc->free_sg_req, &sg_req_list);
+	list_splice_init(&tdc->free_dma_desc, &dma_desc_list);
+	INIT_LIST_HEAD(&tdc->cb_desc);
+	tdc->config_init = false;
+	tdc->isr_handler = NULL;
+	spin_unlock_irqrestore(&tdc->lock, flags);
+
+	while (!list_empty(&dma_desc_list)) {
+		dma_desc = list_first_entry(&dma_desc_list,
+					typeof(*dma_desc), node);
+		list_del(&dma_desc->node);
+		kfree(dma_desc);
+	}
+
+	while (!list_empty(&sg_req_list)) {
+		sg_req = list_first_entry(&sg_req_list, typeof(*sg_req), node);
+		list_del(&sg_req->node);
+		kfree(sg_req);
+	}
+	pm_runtime_put(tdma->dev);
+
+	tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct tegra_dma *tdma = ofdma->of_dma_data;
+	struct dma_chan *chan;
+	struct tegra_dma_channel *tdc;
+
+	if (dma_spec->args[0] > TEGRA_APBDMA_CSR_REQ_SEL_MASK) {
+		dev_err(tdma->dev, "Invalid slave id: %d\n", dma_spec->args[0]);
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&tdma->dma_dev);
+	if (!chan)
+		return NULL;
+
+	tdc = to_tegra_dma_chan(chan);
+	tdc->slave_id = dma_spec->args[0];
+
+	return chan;
+}
+
+/* Tegra20 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra20_dma_chip_data = {
+	.nr_channels		= 16,
+	.channel_reg_size	= 0x20,
+	.max_dma_count		= 1024UL * 64,
+	.support_channel_pause	= false,
+	.support_separate_wcount_reg = false,
+};
+
+/* Tegra30 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra30_dma_chip_data = {
+	.nr_channels		= 32,
+	.channel_reg_size	= 0x20,
+	.max_dma_count		= 1024UL * 64,
+	.support_channel_pause	= false,
+	.support_separate_wcount_reg = false,
+};
+
+/* Tegra114 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra114_dma_chip_data = {
+	.nr_channels		= 32,
+	.channel_reg_size	= 0x20,
+	.max_dma_count		= 1024UL * 64,
+	.support_channel_pause	= true,
+	.support_separate_wcount_reg = false,
+};
+
+/* Tegra148 specific DMA controller information */
+static const struct tegra_dma_chip_data tegra148_dma_chip_data = {
+	.nr_channels		= 32,
+	.channel_reg_size	= 0x40,
+	.max_dma_count		= 1024UL * 64,
+	.support_channel_pause	= true,
+	.support_separate_wcount_reg = true,
+};
+
+static int tegra_dma_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct tegra_dma *tdma;
+	int ret;
+	int i;
+	const struct tegra_dma_chip_data *cdata;
+
+	cdata = of_device_get_match_data(&pdev->dev);
+	if (!cdata) {
+		dev_err(&pdev->dev, "Error: No device match data found\n");
+		return -ENODEV;
+	}
+
+	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
+			sizeof(struct tegra_dma_channel), GFP_KERNEL);
+	if (!tdma)
+		return -ENOMEM;
+
+	tdma->dev = &pdev->dev;
+	tdma->chip_data = cdata;
+	platform_set_drvdata(pdev, tdma);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tdma->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tdma->base_addr))
+		return PTR_ERR(tdma->base_addr);
+
+	tdma->dma_clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(tdma->dma_clk)) {
+		dev_err(&pdev->dev, "Error: Missing controller clock\n");
+		return PTR_ERR(tdma->dma_clk);
+	}
+
+	tdma->rst = devm_reset_control_get(&pdev->dev, "dma");
+	if (IS_ERR(tdma->rst)) {
+		dev_err(&pdev->dev, "Error: Missing reset\n");
+		return PTR_ERR(tdma->rst);
+	}
+
+	spin_lock_init(&tdma->global_lock);
+
+	pm_runtime_enable(&pdev->dev);
+	if (!pm_runtime_enabled(&pdev->dev))
+		ret = tegra_dma_runtime_resume(&pdev->dev);
+	else
+		ret = pm_runtime_get_sync(&pdev->dev);
+
+	if (ret < 0) {
+		pm_runtime_disable(&pdev->dev);
+		return ret;
+	}
+
+	/* Reset DMA controller */
+	reset_control_assert(tdma->rst);
+	udelay(2);
+	reset_control_deassert(tdma->rst);
+
+	/* Enable global DMA registers */
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+
+	pm_runtime_put(&pdev->dev);
+
+	INIT_LIST_HEAD(&tdma->dma_dev.channels);
+	for (i = 0; i < cdata->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+		tdc->chan_addr = tdma->base_addr +
+				 TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
+				 (i * cdata->channel_reg_size);
+
+		res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
+		if (!res) {
+			ret = -EINVAL;
+			dev_err(&pdev->dev, "No irq resource for chan %d\n", i);
+			goto err_irq;
+		}
+		tdc->irq = res->start;
+		snprintf(tdc->name, sizeof(tdc->name), "apbdma.%d", i);
+		ret = request_irq(tdc->irq, tegra_dma_isr, 0, tdc->name, tdc);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"request_irq failed with err %d channel %d\n",
+				ret, i);
+			goto err_irq;
+		}
+
+		tdc->dma_chan.device = &tdma->dma_dev;
+		dma_cookie_init(&tdc->dma_chan);
+		list_add_tail(&tdc->dma_chan.device_node,
+				&tdma->dma_dev.channels);
+		tdc->tdma = tdma;
+		tdc->id = i;
+		tdc->slave_id = TEGRA_APBDMA_SLAVE_ID_INVALID;
+
+		tasklet_init(&tdc->tasklet, tegra_dma_tasklet,
+				(unsigned long)tdc);
+		spin_lock_init(&tdc->lock);
+
+		INIT_LIST_HEAD(&tdc->pending_sg_req);
+		INIT_LIST_HEAD(&tdc->free_sg_req);
+		INIT_LIST_HEAD(&tdc->free_dma_desc);
+		INIT_LIST_HEAD(&tdc->cb_desc);
+	}
+
+	dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+	tdma->global_pause_count = 0;
+	tdma->dma_dev.dev = &pdev->dev;
+	tdma->dma_dev.device_alloc_chan_resources =
+					tegra_dma_alloc_chan_resources;
+	tdma->dma_dev.device_free_chan_resources =
+					tegra_dma_free_chan_resources;
+	tdma->dma_dev.device_prep_slave_sg = tegra_dma_prep_slave_sg;
+	tdma->dma_dev.device_prep_dma_cyclic = tegra_dma_prep_dma_cyclic;
+	tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+	tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) |
+		BIT(DMA_SLAVE_BUSWIDTH_8_BYTES);
+	tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	/*
+	 * XXX The hardware appears to support
+	 * DMA_RESIDUE_GRANULARITY_BURST-level reporting, but it's
+	 * only used by this driver during tegra_dma_terminate_all()
+	 */
+	tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	tdma->dma_dev.device_config = tegra_dma_slave_config;
+	tdma->dma_dev.device_terminate_all = tegra_dma_terminate_all;
+	tdma->dma_dev.device_tx_status = tegra_dma_tx_status;
+	tdma->dma_dev.device_issue_pending = tegra_dma_issue_pending;
+
+	ret = dma_async_device_register(&tdma->dma_dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Tegra20 APB DMA driver registration failed %d\n", ret);
+		goto err_irq;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 tegra_dma_of_xlate, tdma);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"Tegra20 APB DMA OF registration failed %d\n", ret);
+		goto err_unregister_dma_dev;
+	}
+
+	dev_info(&pdev->dev, "Tegra20 APB DMA driver register %d channels\n",
+			cdata->nr_channels);
+	return 0;
+
+err_unregister_dma_dev:
+	dma_async_device_unregister(&tdma->dma_dev);
+err_irq:
+	while (--i >= 0) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+
+		free_irq(tdc->irq, tdc);
+		tasklet_kill(&tdc->tasklet);
+	}
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_dma_runtime_suspend(&pdev->dev);
+	return ret;
+}
+
+static int tegra_dma_remove(struct platform_device *pdev)
+{
+	struct tegra_dma *tdma = platform_get_drvdata(pdev);
+	int i;
+	struct tegra_dma_channel *tdc;
+
+	dma_async_device_unregister(&tdma->dma_dev);
+
+	for (i = 0; i < tdma->chip_data->nr_channels; ++i) {
+		tdc = &tdma->channels[i];
+		free_irq(tdc->irq, tdc);
+		tasklet_kill(&tdc->tasklet);
+	}
+
+	pm_runtime_disable(&pdev->dev);
+	if (!pm_runtime_status_suspended(&pdev->dev))
+		tegra_dma_runtime_suspend(&pdev->dev);
+
+	return 0;
+}
+
+static int tegra_dma_runtime_suspend(struct device *dev)
+{
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
+	int i;
+
+	tdma->reg_gen = tdma_read(tdma, TEGRA_APBDMA_GENERAL);
+	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
+
+		/* Only save the state of DMA channels that are in use */
+		if (!tdc->config_init)
+			continue;
+
+		ch_reg->csr = tdc_read(tdc, TEGRA_APBDMA_CHAN_CSR);
+		ch_reg->ahb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBPTR);
+		ch_reg->apb_ptr = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBPTR);
+		ch_reg->ahb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_AHBSEQ);
+		ch_reg->apb_seq = tdc_read(tdc, TEGRA_APBDMA_CHAN_APBSEQ);
+		if (tdma->chip_data->support_separate_wcount_reg)
+			ch_reg->wcount = tdc_read(tdc,
+						  TEGRA_APBDMA_CHAN_WCOUNT);
+	}
+
+	clk_disable_unprepare(tdma->dma_clk);
+
+	return 0;
+}
+
+static int tegra_dma_runtime_resume(struct device *dev)
+{
+	struct tegra_dma *tdma = dev_get_drvdata(dev);
+	int i, ret;
+
+	ret = clk_prepare_enable(tdma->dma_clk);
+	if (ret < 0) {
+		dev_err(dev, "clk_enable failed: %d\n", ret);
+		return ret;
+	}
+
+	tdma_write(tdma, TEGRA_APBDMA_GENERAL, tdma->reg_gen);
+	tdma_write(tdma, TEGRA_APBDMA_CONTROL, 0);
+	tdma_write(tdma, TEGRA_APBDMA_IRQ_MASK_SET, 0xFFFFFFFFul);
+
+	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
+		struct tegra_dma_channel *tdc = &tdma->channels[i];
+		struct tegra_dma_channel_regs *ch_reg = &tdc->channel_reg;
+
+		/* Only restore the state of DMA channels that are in use */
+		if (!tdc->config_init)
+			continue;
+
+		if (tdma->chip_data->support_separate_wcount_reg)
+			tdc_write(tdc, TEGRA_APBDMA_CHAN_WCOUNT,
+				  ch_reg->wcount);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBSEQ, ch_reg->apb_seq);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_APBPTR, ch_reg->apb_ptr);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBSEQ, ch_reg->ahb_seq);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_AHBPTR, ch_reg->ahb_ptr);
+		tdc_write(tdc, TEGRA_APBDMA_CHAN_CSR,
+			(ch_reg->csr & ~TEGRA_APBDMA_CSR_ENB));
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops tegra_dma_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dma_runtime_suspend, tegra_dma_runtime_resume,
+			   NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
+static const struct of_device_id tegra_dma_of_match[] = {
+	{
+		.compatible = "nvidia,tegra148-apbdma",
+		.data = &tegra148_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra114-apbdma",
+		.data = &tegra114_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra30-apbdma",
+		.data = &tegra30_dma_chip_data,
+	}, {
+		.compatible = "nvidia,tegra20-apbdma",
+		.data = &tegra20_dma_chip_data,
+	}, {
+	},
+};
+MODULE_DEVICE_TABLE(of, tegra_dma_of_match);
+
+static struct platform_driver tegra_dmac_driver = {
+	.driver = {
+		.name	= "tegra-apbdma",
+		.pm	= &tegra_dma_dev_pm_ops,
+		.of_match_table = tegra_dma_of_match,
+	},
+	.probe		= tegra_dma_probe,
+	.remove		= tegra_dma_remove,
+};
+
+module_platform_driver(tegra_dmac_driver);
+
+MODULE_ALIAS("platform:tegra20-apbdma");
+MODULE_DESCRIPTION("NVIDIA Tegra APB DMA Controller driver");
+MODULE_AUTHOR("Laxman Dewangan <ldewangan@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
new file mode 100644
index 0000000..b26256f
--- /dev/null
+++ b/drivers/dma/tegra210-adma.c
@@ -0,0 +1,830 @@
+/*
+ * ADMA driver for Nvidia's Tegra210 ADMA controller.
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include "virt-dma.h"
+
+#define ADMA_CH_CMD					0x00
+#define ADMA_CH_STATUS					0x0c
+#define ADMA_CH_STATUS_XFER_EN				BIT(0)
+
+#define ADMA_CH_INT_STATUS				0x10
+#define ADMA_CH_INT_STATUS_XFER_DONE			BIT(0)
+
+#define ADMA_CH_INT_CLEAR				0x1c
+#define ADMA_CH_CTRL					0x24
+#define ADMA_CH_CTRL_TX_REQ(val)			(((val) & 0xf) << 28)
+#define ADMA_CH_CTRL_TX_REQ_MAX				10
+#define ADMA_CH_CTRL_RX_REQ(val)			(((val) & 0xf) << 24)
+#define ADMA_CH_CTRL_RX_REQ_MAX				10
+#define ADMA_CH_CTRL_DIR(val)				(((val) & 0xf) << 12)
+#define ADMA_CH_CTRL_DIR_AHUB2MEM			2
+#define ADMA_CH_CTRL_DIR_MEM2AHUB			4
+#define ADMA_CH_CTRL_MODE_CONTINUOUS			(2 << 8)
+#define ADMA_CH_CTRL_FLOWCTRL_EN			BIT(1)
+
+#define ADMA_CH_CONFIG					0x28
+#define ADMA_CH_CONFIG_SRC_BUF(val)			(((val) & 0x7) << 28)
+#define ADMA_CH_CONFIG_TRG_BUF(val)			(((val) & 0x7) << 24)
+#define ADMA_CH_CONFIG_BURST_SIZE(val)			(((val) & 0x7) << 20)
+#define ADMA_CH_CONFIG_BURST_16				5
+#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val)		((val) & 0xf)
+#define ADMA_CH_CONFIG_MAX_BUFS				8
+
+#define ADMA_CH_FIFO_CTRL				0x2c
+#define ADMA_CH_FIFO_CTRL_OVRFW_THRES(val)		(((val) & 0xf) << 24)
+#define ADMA_CH_FIFO_CTRL_STARV_THRES(val)		(((val) & 0xf) << 16)
+#define ADMA_CH_FIFO_CTRL_TX_SIZE(val)			(((val) & 0xf) << 8)
+#define ADMA_CH_FIFO_CTRL_RX_SIZE(val)			((val) & 0xf)
+
+#define ADMA_CH_LOWER_SRC_ADDR				0x34
+#define ADMA_CH_LOWER_TRG_ADDR				0x3c
+#define ADMA_CH_TC					0x44
+#define ADMA_CH_TC_COUNT_MASK				0x3ffffffc
+
+#define ADMA_CH_XFER_STATUS				0x54
+#define ADMA_CH_XFER_STATUS_COUNT_MASK			0xffff
+
+#define ADMA_GLOBAL_CMD					0xc00
+#define ADMA_GLOBAL_SOFT_RESET				0xc04
+#define ADMA_GLOBAL_INT_CLEAR				0xc20
+#define ADMA_GLOBAL_CTRL				0xc24
+
+#define ADMA_CH_REG_OFFSET(a)				(a * 0x80)
+
+#define ADMA_CH_FIFO_CTRL_DEFAULT	(ADMA_CH_FIFO_CTRL_OVRFW_THRES(1) | \
+					 ADMA_CH_FIFO_CTRL_STARV_THRES(1) | \
+					 ADMA_CH_FIFO_CTRL_TX_SIZE(3)     | \
+					 ADMA_CH_FIFO_CTRL_RX_SIZE(3))
+struct tegra_adma;
+
+/*
+ * struct tegra_adma_chip_data - Tegra chip specific data
+ * @nr_channels: Number of DMA channels available.
+ */
+struct tegra_adma_chip_data {
+	int nr_channels;
+};
+
+/*
+ * struct tegra_adma_chan_regs - Tegra ADMA channel registers
+ */
+struct tegra_adma_chan_regs {
+	unsigned int ctrl;
+	unsigned int config;
+	unsigned int src_addr;
+	unsigned int trg_addr;
+	unsigned int fifo_ctrl;
+	unsigned int tc;
+};
+
+/*
+ * struct tegra_adma_desc - Tegra ADMA descriptor to manage transfer requests.
+ */
+struct tegra_adma_desc {
+	struct virt_dma_desc		vd;
+	struct tegra_adma_chan_regs	ch_regs;
+	size_t				buf_len;
+	size_t				period_len;
+	size_t				num_periods;
+};
+
+/*
+ * struct tegra_adma_chan - Tegra ADMA channel information
+ */
+struct tegra_adma_chan {
+	struct virt_dma_chan		vc;
+	struct tegra_adma_desc		*desc;
+	struct tegra_adma		*tdma;
+	int				irq;
+	void __iomem			*chan_addr;
+
+	/* Slave channel configuration info */
+	struct dma_slave_config		sconfig;
+	enum dma_transfer_direction	sreq_dir;
+	unsigned int			sreq_index;
+	bool				sreq_reserved;
+
+	/* Transfer count and position info */
+	unsigned int			tx_buf_count;
+	unsigned int			tx_buf_pos;
+};
+
+/*
+ * struct tegra_adma - Tegra ADMA controller information
+ */
+struct tegra_adma {
+	struct dma_device		dma_dev;
+	struct device			*dev;
+	void __iomem			*base_addr;
+	unsigned int			nr_channels;
+	unsigned long			rx_requests_reserved;
+	unsigned long			tx_requests_reserved;
+
+	/* Used to store global command register state when suspending */
+	unsigned int			global_cmd;
+
+	/* Last member of the structure */
+	struct tegra_adma_chan		channels[0];
+};
+
+static inline void tdma_write(struct tegra_adma *tdma, u32 reg, u32 val)
+{
+	writel(val, tdma->base_addr + reg);
+}
+
+static inline u32 tdma_read(struct tegra_adma *tdma, u32 reg)
+{
+	return readl(tdma->base_addr + reg);
+}
+
+static inline void tdma_ch_write(struct tegra_adma_chan *tdc, u32 reg, u32 val)
+{
+	writel(val, tdc->chan_addr + reg);
+}
+
+static inline u32 tdma_ch_read(struct tegra_adma_chan *tdc, u32 reg)
+{
+	return readl(tdc->chan_addr + reg);
+}
+
+static inline struct tegra_adma_chan *to_tegra_adma_chan(struct dma_chan *dc)
+{
+	return container_of(dc, struct tegra_adma_chan, vc.chan);
+}
+
+static inline struct tegra_adma_desc *to_tegra_adma_desc(
+		struct dma_async_tx_descriptor *td)
+{
+	return container_of(td, struct tegra_adma_desc, vd.tx);
+}
+
+static inline struct device *tdc2dev(struct tegra_adma_chan *tdc)
+{
+	return tdc->tdma->dev;
+}
+
+static void tegra_adma_desc_free(struct virt_dma_desc *vd)
+{
+	kfree(container_of(vd, struct tegra_adma_desc, vd));
+}
+
+static int tegra_adma_slave_config(struct dma_chan *dc,
+				   struct dma_slave_config *sconfig)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+	memcpy(&tdc->sconfig, sconfig, sizeof(*sconfig));
+
+	return 0;
+}
+
+static int tegra_adma_init(struct tegra_adma *tdma)
+{
+	u32 status;
+	int ret;
+
+	/* Clear any interrupts */
+	tdma_write(tdma, ADMA_GLOBAL_INT_CLEAR, 0x1);
+
+	/* Assert soft reset */
+	tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1);
+
+	/* Wait for reset to clear */
+	ret = readx_poll_timeout(readl,
+				 tdma->base_addr + ADMA_GLOBAL_SOFT_RESET,
+				 status, status == 0, 20, 10000);
+	if (ret)
+		return ret;
+
+	/* Enable global ADMA registers */
+	tdma_write(tdma, ADMA_GLOBAL_CMD, 1);
+
+	return 0;
+}
+
+static int tegra_adma_request_alloc(struct tegra_adma_chan *tdc,
+				    enum dma_transfer_direction direction)
+{
+	struct tegra_adma *tdma = tdc->tdma;
+	unsigned int sreq_index = tdc->sreq_index;
+
+	if (tdc->sreq_reserved)
+		return tdc->sreq_dir == direction ? 0 : -EINVAL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		if (sreq_index > ADMA_CH_CTRL_TX_REQ_MAX) {
+			dev_err(tdma->dev, "invalid DMA request\n");
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(sreq_index, &tdma->tx_requests_reserved)) {
+			dev_err(tdma->dev, "DMA request reserved\n");
+			return -EINVAL;
+		}
+		break;
+
+	case DMA_DEV_TO_MEM:
+		if (sreq_index > ADMA_CH_CTRL_RX_REQ_MAX) {
+			dev_err(tdma->dev, "invalid DMA request\n");
+			return -EINVAL;
+		}
+
+		if (test_and_set_bit(sreq_index, &tdma->rx_requests_reserved)) {
+			dev_err(tdma->dev, "DMA request reserved\n");
+			return -EINVAL;
+		}
+		break;
+
+	default:
+		dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+			 dma_chan_name(&tdc->vc.chan));
+		return -EINVAL;
+	}
+
+	tdc->sreq_dir = direction;
+	tdc->sreq_reserved = true;
+
+	return 0;
+}
+
+static void tegra_adma_request_free(struct tegra_adma_chan *tdc)
+{
+	struct tegra_adma *tdma = tdc->tdma;
+
+	if (!tdc->sreq_reserved)
+		return;
+
+	switch (tdc->sreq_dir) {
+	case DMA_MEM_TO_DEV:
+		clear_bit(tdc->sreq_index, &tdma->tx_requests_reserved);
+		break;
+
+	case DMA_DEV_TO_MEM:
+		clear_bit(tdc->sreq_index, &tdma->rx_requests_reserved);
+		break;
+
+	default:
+		dev_WARN(tdma->dev, "channel %s has invalid transfer type\n",
+			 dma_chan_name(&tdc->vc.chan));
+		return;
+	}
+
+	tdc->sreq_reserved = false;
+}
+
+static u32 tegra_adma_irq_status(struct tegra_adma_chan *tdc)
+{
+	u32 status = tdma_ch_read(tdc, ADMA_CH_INT_STATUS);
+
+	return status & ADMA_CH_INT_STATUS_XFER_DONE;
+}
+
+static u32 tegra_adma_irq_clear(struct tegra_adma_chan *tdc)
+{
+	u32 status = tegra_adma_irq_status(tdc);
+
+	if (status)
+		tdma_ch_write(tdc, ADMA_CH_INT_CLEAR, status);
+
+	return status;
+}
+
+static void tegra_adma_stop(struct tegra_adma_chan *tdc)
+{
+	unsigned int status;
+
+	/* Disable ADMA */
+	tdma_ch_write(tdc, ADMA_CH_CMD, 0);
+
+	/* Clear interrupt status */
+	tegra_adma_irq_clear(tdc);
+
+	if (readx_poll_timeout_atomic(readl, tdc->chan_addr + ADMA_CH_STATUS,
+			status, !(status & ADMA_CH_STATUS_XFER_EN),
+			20, 10000)) {
+		dev_err(tdc2dev(tdc), "unable to stop DMA channel\n");
+		return;
+	}
+
+	kfree(tdc->desc);
+	tdc->desc = NULL;
+}
+
+static void tegra_adma_start(struct tegra_adma_chan *tdc)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc);
+	struct tegra_adma_chan_regs *ch_regs;
+	struct tegra_adma_desc *desc;
+
+	if (!vd)
+		return;
+
+	list_del(&vd->node);
+
+	desc = to_tegra_adma_desc(&vd->tx);
+
+	if (!desc) {
+		dev_warn(tdc2dev(tdc), "unable to start DMA, no descriptor\n");
+		return;
+	}
+
+	ch_regs = &desc->ch_regs;
+
+	tdc->tx_buf_pos = 0;
+	tdc->tx_buf_count = 0;
+	tdma_ch_write(tdc, ADMA_CH_TC, ch_regs->tc);
+	tdma_ch_write(tdc, ADMA_CH_CTRL, ch_regs->ctrl);
+	tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_regs->src_addr);
+	tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_regs->trg_addr);
+	tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_regs->fifo_ctrl);
+	tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_regs->config);
+
+	/* Start ADMA */
+	tdma_ch_write(tdc, ADMA_CH_CMD, 1);
+
+	tdc->desc = desc;
+}
+
+static unsigned int tegra_adma_get_residue(struct tegra_adma_chan *tdc)
+{
+	struct tegra_adma_desc *desc = tdc->desc;
+	unsigned int max = ADMA_CH_XFER_STATUS_COUNT_MASK + 1;
+	unsigned int pos = tdma_ch_read(tdc, ADMA_CH_XFER_STATUS);
+	unsigned int periods_remaining;
+
+	/*
+	 * Handle wrap around of buffer count register
+	 */
+	if (pos < tdc->tx_buf_pos)
+		tdc->tx_buf_count += pos + (max - tdc->tx_buf_pos);
+	else
+		tdc->tx_buf_count += pos - tdc->tx_buf_pos;
+
+	periods_remaining = tdc->tx_buf_count % desc->num_periods;
+	tdc->tx_buf_pos = pos;
+
+	return desc->buf_len - (periods_remaining * desc->period_len);
+}
+
+static irqreturn_t tegra_adma_isr(int irq, void *dev_id)
+{
+	struct tegra_adma_chan *tdc = dev_id;
+	unsigned long status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	status = tegra_adma_irq_clear(tdc);
+	if (status == 0 || !tdc->desc) {
+		spin_unlock_irqrestore(&tdc->vc.lock, flags);
+		return IRQ_NONE;
+	}
+
+	vchan_cyclic_callback(&tdc->desc->vd);
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static void tegra_adma_issue_pending(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	if (vchan_issue_pending(&tdc->vc)) {
+		if (!tdc->desc)
+			tegra_adma_start(tdc);
+	}
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+}
+
+static int tegra_adma_terminate_all(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	if (tdc->desc)
+		tegra_adma_stop(tdc);
+
+	tegra_adma_request_free(tdc);
+	vchan_get_all_descriptors(&tdc->vc, &head);
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+	vchan_dma_desc_free_list(&tdc->vc, &head);
+
+	return 0;
+}
+
+static enum dma_status tegra_adma_tx_status(struct dma_chan *dc,
+					    dma_cookie_t cookie,
+					    struct dma_tx_state *txstate)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	struct tegra_adma_desc *desc;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	unsigned int residual;
+
+	ret = dma_cookie_status(dc, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&tdc->vc.lock, flags);
+
+	vd = vchan_find_desc(&tdc->vc, cookie);
+	if (vd) {
+		desc = to_tegra_adma_desc(&vd->tx);
+		residual = desc->ch_regs.tc;
+	} else if (tdc->desc && tdc->desc->vd.tx.cookie == cookie) {
+		residual = tegra_adma_get_residue(tdc);
+	} else {
+		residual = 0;
+	}
+
+	spin_unlock_irqrestore(&tdc->vc.lock, flags);
+
+	dma_set_residue(txstate, residual);
+
+	return ret;
+}
+
+static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
+				      struct tegra_adma_desc *desc,
+				      dma_addr_t buf_addr,
+				      enum dma_transfer_direction direction)
+{
+	struct tegra_adma_chan_regs *ch_regs = &desc->ch_regs;
+	unsigned int burst_size, adma_dir;
+
+	if (desc->num_periods > ADMA_CH_CONFIG_MAX_BUFS)
+		return -EINVAL;
+
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		adma_dir = ADMA_CH_CTRL_DIR_MEM2AHUB;
+		burst_size = fls(tdc->sconfig.dst_maxburst);
+		ch_regs->config = ADMA_CH_CONFIG_SRC_BUF(desc->num_periods - 1);
+		ch_regs->ctrl = ADMA_CH_CTRL_TX_REQ(tdc->sreq_index);
+		ch_regs->src_addr = buf_addr;
+		break;
+
+	case DMA_DEV_TO_MEM:
+		adma_dir = ADMA_CH_CTRL_DIR_AHUB2MEM;
+		burst_size = fls(tdc->sconfig.src_maxburst);
+		ch_regs->config = ADMA_CH_CONFIG_TRG_BUF(desc->num_periods - 1);
+		ch_regs->ctrl = ADMA_CH_CTRL_RX_REQ(tdc->sreq_index);
+		ch_regs->trg_addr = buf_addr;
+		break;
+
+	default:
+		dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
+		return -EINVAL;
+	}
+
+	if (!burst_size || burst_size > ADMA_CH_CONFIG_BURST_16)
+		burst_size = ADMA_CH_CONFIG_BURST_16;
+
+	ch_regs->ctrl |= ADMA_CH_CTRL_DIR(adma_dir) |
+			 ADMA_CH_CTRL_MODE_CONTINUOUS |
+			 ADMA_CH_CTRL_FLOWCTRL_EN;
+	ch_regs->config |= ADMA_CH_CONFIG_BURST_SIZE(burst_size);
+	ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
+	ch_regs->fifo_ctrl = ADMA_CH_FIFO_CTRL_DEFAULT;
+	ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK;
+
+	return tegra_adma_request_alloc(tdc, direction);
+}
+
+static struct dma_async_tx_descriptor *tegra_adma_prep_dma_cyclic(
+	struct dma_chan *dc, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	struct tegra_adma_desc *desc = NULL;
+
+	if (!buf_len || !period_len || period_len > ADMA_CH_TC_COUNT_MASK) {
+		dev_err(tdc2dev(tdc), "invalid buffer/period len\n");
+		return NULL;
+	}
+
+	if (buf_len % period_len) {
+		dev_err(tdc2dev(tdc), "buf_len not a multiple of period_len\n");
+		return NULL;
+	}
+
+	if (!IS_ALIGNED(buf_addr, 4)) {
+		dev_err(tdc2dev(tdc), "invalid buffer alignment\n");
+		return NULL;
+	}
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->buf_len = buf_len;
+	desc->period_len = period_len;
+	desc->num_periods = buf_len / period_len;
+
+	if (tegra_adma_set_xfer_params(tdc, desc, buf_addr, direction)) {
+		kfree(desc);
+		return NULL;
+	}
+
+	return vchan_tx_prep(&tdc->vc, &desc->vd, flags);
+}
+
+static int tegra_adma_alloc_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+	int ret;
+
+	ret = request_irq(tdc->irq, tegra_adma_isr, 0, dma_chan_name(dc), tdc);
+	if (ret) {
+		dev_err(tdc2dev(tdc), "failed to get interrupt for %s\n",
+			dma_chan_name(dc));
+		return ret;
+	}
+
+	ret = pm_runtime_get_sync(tdc2dev(tdc));
+	if (ret < 0) {
+		free_irq(tdc->irq, tdc);
+		return ret;
+	}
+
+	dma_cookie_init(&tdc->vc.chan);
+
+	return 0;
+}
+
+static void tegra_adma_free_chan_resources(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+	tegra_adma_terminate_all(dc);
+	vchan_free_chan_resources(&tdc->vc);
+	tasklet_kill(&tdc->vc.task);
+	free_irq(tdc->irq, tdc);
+	pm_runtime_put(tdc2dev(tdc));
+
+	tdc->sreq_index = 0;
+	tdc->sreq_dir = DMA_TRANS_NONE;
+}
+
+static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct tegra_adma *tdma = ofdma->of_dma_data;
+	struct tegra_adma_chan *tdc;
+	struct dma_chan *chan;
+	unsigned int sreq_index;
+
+	if (dma_spec->args_count != 1)
+		return NULL;
+
+	sreq_index = dma_spec->args[0];
+
+	if (sreq_index == 0) {
+		dev_err(tdma->dev, "DMA request must not be 0\n");
+		return NULL;
+	}
+
+	chan = dma_get_any_slave_channel(&tdma->dma_dev);
+	if (!chan)
+		return NULL;
+
+	tdc = to_tegra_adma_chan(chan);
+	tdc->sreq_index = sreq_index;
+
+	return chan;
+}
+
+static int tegra_adma_runtime_suspend(struct device *dev)
+{
+	struct tegra_adma *tdma = dev_get_drvdata(dev);
+
+	tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD);
+
+	return pm_clk_suspend(dev);
+}
+
+static int tegra_adma_runtime_resume(struct device *dev)
+{
+	struct tegra_adma *tdma = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_clk_resume(dev);
+	if (ret)
+		return ret;
+
+	tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd);
+
+	return 0;
+}
+
+static const struct tegra_adma_chip_data tegra210_chip_data = {
+	.nr_channels = 22,
+};
+
+static const struct of_device_id tegra_adma_of_match[] = {
+	{ .compatible = "nvidia,tegra210-adma", .data = &tegra210_chip_data },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_adma_of_match);
+
+static int tegra_adma_probe(struct platform_device *pdev)
+{
+	const struct tegra_adma_chip_data *cdata;
+	struct tegra_adma *tdma;
+	struct resource	*res;
+	int ret, i;
+
+	cdata = of_device_get_match_data(&pdev->dev);
+	if (!cdata) {
+		dev_err(&pdev->dev, "device match data not found\n");
+		return -ENODEV;
+	}
+
+	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
+			    sizeof(struct tegra_adma_chan), GFP_KERNEL);
+	if (!tdma)
+		return -ENOMEM;
+
+	tdma->dev = &pdev->dev;
+	tdma->nr_channels = cdata->nr_channels;
+	platform_set_drvdata(pdev, tdma);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tdma->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(tdma->base_addr))
+		return PTR_ERR(tdma->base_addr);
+
+	ret = pm_clk_create(&pdev->dev);
+	if (ret)
+		return ret;
+
+	ret = of_pm_clk_add_clk(&pdev->dev, "d_audio");
+	if (ret)
+		goto clk_destroy;
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0)
+		goto rpm_disable;
+
+	ret = tegra_adma_init(tdma);
+	if (ret)
+		goto rpm_put;
+
+	INIT_LIST_HEAD(&tdma->dma_dev.channels);
+	for (i = 0; i < tdma->nr_channels; i++) {
+		struct tegra_adma_chan *tdc = &tdma->channels[i];
+
+		tdc->chan_addr = tdma->base_addr + ADMA_CH_REG_OFFSET(i);
+
+		tdc->irq = of_irq_get(pdev->dev.of_node, i);
+		if (tdc->irq <= 0) {
+			ret = tdc->irq ?: -ENXIO;
+			goto irq_dispose;
+		}
+
+		vchan_init(&tdc->vc, &tdma->dma_dev);
+		tdc->vc.desc_free = tegra_adma_desc_free;
+		tdc->tdma = tdma;
+	}
+
+	dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
+
+	tdma->dma_dev.dev = &pdev->dev;
+	tdma->dma_dev.device_alloc_chan_resources =
+					tegra_adma_alloc_chan_resources;
+	tdma->dma_dev.device_free_chan_resources =
+					tegra_adma_free_chan_resources;
+	tdma->dma_dev.device_issue_pending = tegra_adma_issue_pending;
+	tdma->dma_dev.device_prep_dma_cyclic = tegra_adma_prep_dma_cyclic;
+	tdma->dma_dev.device_config = tegra_adma_slave_config;
+	tdma->dma_dev.device_tx_status = tegra_adma_tx_status;
+	tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all;
+	tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	tdma->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+
+	ret = dma_async_device_register(&tdma->dma_dev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "ADMA registration failed: %d\n", ret);
+		goto irq_dispose;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 tegra_dma_of_xlate, tdma);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "ADMA OF registration failed %d\n", ret);
+		goto dma_remove;
+	}
+
+	pm_runtime_put(&pdev->dev);
+
+	dev_info(&pdev->dev, "Tegra210 ADMA driver registered %d channels\n",
+		 tdma->nr_channels);
+
+	return 0;
+
+dma_remove:
+	dma_async_device_unregister(&tdma->dma_dev);
+irq_dispose:
+	while (--i >= 0)
+		irq_dispose_mapping(tdma->channels[i].irq);
+rpm_put:
+	pm_runtime_put_sync(&pdev->dev);
+rpm_disable:
+	pm_runtime_disable(&pdev->dev);
+clk_destroy:
+	pm_clk_destroy(&pdev->dev);
+
+	return ret;
+}
+
+static int tegra_adma_remove(struct platform_device *pdev)
+{
+	struct tegra_adma *tdma = platform_get_drvdata(pdev);
+	int i;
+
+	dma_async_device_unregister(&tdma->dma_dev);
+
+	for (i = 0; i < tdma->nr_channels; ++i)
+		irq_dispose_mapping(tdma->channels[i].irq);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	pm_clk_destroy(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int tegra_adma_pm_suspend(struct device *dev)
+{
+	return pm_runtime_suspended(dev) == false;
+}
+#endif
+
+static const struct dev_pm_ops tegra_adma_dev_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_adma_runtime_suspend,
+			   tegra_adma_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_adma_pm_suspend, NULL)
+};
+
+static struct platform_driver tegra_admac_driver = {
+	.driver = {
+		.name	= "tegra-adma",
+		.pm	= &tegra_adma_dev_pm_ops,
+		.of_match_table = tegra_adma_of_match,
+	},
+	.probe		= tegra_adma_probe,
+	.remove		= tegra_adma_remove,
+};
+
+module_platform_driver(tegra_admac_driver);
+
+MODULE_ALIAS("platform:tegra210-adma");
+MODULE_DESCRIPTION("NVIDIA Tegra ADMA driver");
+MODULE_AUTHOR("Dara Ramesh <dramesh@nvidia.com>");
+MODULE_AUTHOR("Jon Hunter <jonathanh@nvidia.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
new file mode 100644
index 0000000..e5e74e1
--- /dev/null
+++ b/drivers/dma/ti/Kconfig
@@ -0,0 +1,37 @@
+#
+# Texas Instruments DMA drivers
+#
+
+config TI_CPPI41
+	tristate "Texas Instruments CPPI 4.1 DMA support"
+	depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX)
+	select DMA_ENGINE
+	help
+	  The Communications Port Programming Interface (CPPI) 4.1 DMA engine
+	  is currently used by the USB driver on AM335x and DA8xx platforms.
+
+config TI_EDMA
+	tristate "Texas Instruments EDMA support"
+	depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if (ARCH_OMAP || COMPILE_TEST)
+	default y
+	help
+	  Enable support for the TI EDMA (Enhanced DMA) controller. This DMA
+	  engine is found on TI DaVinci, AM33xx, AM43xx, DRA7xx and Keystone 2
+	  parts.
+
+config DMA_OMAP
+	tristate "Texas Instruments sDMA (omap-dma) support"
+	depends on ARCH_OMAP || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if (SOC_DRA7XX || COMPILE_TEST)
+	default y
+	help
+	  Enable support for the TI sDMA (System DMA or DMA4) controller. This
+	  DMA engine is found on OMAP and DRA7xx parts.
+
+config TI_DMA_CROSSBAR
+	bool
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
new file mode 100644
index 0000000..113e59e
--- /dev/null
+++ b/drivers/dma/ti/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_TI_CPPI41) += cppi41.o
+obj-$(CONFIG_TI_EDMA) += edma.o
+obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c
new file mode 100644
index 0000000..e507ec3
--- /dev/null
+++ b/drivers/dma/ti/cppi41.c
@@ -0,0 +1,1237 @@
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+#include "../dmaengine.h"
+
+#define DESC_TYPE	27
+#define DESC_TYPE_HOST	0x10
+#define DESC_TYPE_TEARD	0x13
+
+#define TD_DESC_IS_RX	(1 << 16)
+#define TD_DESC_DMA_NUM	10
+
+#define DESC_LENGTH_BITS_NUM	21
+
+#define DESC_TYPE_USB	(5 << 26)
+#define DESC_PD_COMPLETE	(1 << 31)
+
+/* DMA engine */
+#define DMA_TDFDQ	4
+#define DMA_TXGCR(x)	(0x800 + (x) * 0x20)
+#define DMA_RXGCR(x)	(0x808 + (x) * 0x20)
+#define RXHPCRA0		4
+
+#define GCR_CHAN_ENABLE		(1 << 31)
+#define GCR_TEARDOWN		(1 << 30)
+#define GCR_STARV_RETRY		(1 << 24)
+#define GCR_DESC_TYPE_HOST	(1 << 14)
+
+/* DMA scheduler */
+#define DMA_SCHED_CTRL		0
+#define DMA_SCHED_CTRL_EN	(1 << 31)
+#define DMA_SCHED_WORD(x)	((x) * 4 + 0x800)
+
+#define SCHED_ENTRY0_CHAN(x)	((x) << 0)
+#define SCHED_ENTRY0_IS_RX	(1 << 7)
+
+#define SCHED_ENTRY1_CHAN(x)	((x) << 8)
+#define SCHED_ENTRY1_IS_RX	(1 << 15)
+
+#define SCHED_ENTRY2_CHAN(x)	((x) << 16)
+#define SCHED_ENTRY2_IS_RX	(1 << 23)
+
+#define SCHED_ENTRY3_CHAN(x)	((x) << 24)
+#define SCHED_ENTRY3_IS_RX	(1 << 31)
+
+/* Queue manager */
+/* 4 KiB of memory for descriptors, 2 for each endpoint */
+#define ALLOC_DECS_NUM		128
+#define DESCS_AREAS		1
+#define TOTAL_DESCS_NUM		(ALLOC_DECS_NUM * DESCS_AREAS)
+#define QMGR_SCRATCH_SIZE	(TOTAL_DESCS_NUM * 4)
+
+#define QMGR_LRAM0_BASE		0x80
+#define QMGR_LRAM_SIZE		0x84
+#define QMGR_LRAM1_BASE		0x88
+#define QMGR_MEMBASE(x)		(0x1000 + (x) * 0x10)
+#define QMGR_MEMCTRL(x)		(0x1004 + (x) * 0x10)
+#define QMGR_MEMCTRL_IDX_SH	16
+#define QMGR_MEMCTRL_DESC_SH	8
+
+#define QMGR_PEND(x)	(0x90 + (x) * 4)
+
+#define QMGR_PENDING_SLOT_Q(x)	(x / 32)
+#define QMGR_PENDING_BIT_Q(x)	(x % 32)
+
+#define QMGR_QUEUE_A(n)	(0x2000 + (n) * 0x10)
+#define QMGR_QUEUE_B(n)	(0x2004 + (n) * 0x10)
+#define QMGR_QUEUE_C(n)	(0x2008 + (n) * 0x10)
+#define QMGR_QUEUE_D(n)	(0x200c + (n) * 0x10)
+
+/* Packet Descriptor */
+#define PD2_ZERO_LENGTH		(1 << 19)
+
+struct cppi41_channel {
+	struct dma_chan chan;
+	struct dma_async_tx_descriptor txd;
+	struct cppi41_dd *cdd;
+	struct cppi41_desc *desc;
+	dma_addr_t desc_phys;
+	void __iomem *gcr_reg;
+	int is_tx;
+	u32 residue;
+
+	unsigned int q_num;
+	unsigned int q_comp_num;
+	unsigned int port_num;
+
+	unsigned td_retry;
+	unsigned td_queued:1;
+	unsigned td_seen:1;
+	unsigned td_desc_seen:1;
+
+	struct list_head node;		/* Node for pending list */
+};
+
+struct cppi41_desc {
+	u32 pd0;
+	u32 pd1;
+	u32 pd2;
+	u32 pd3;
+	u32 pd4;
+	u32 pd5;
+	u32 pd6;
+	u32 pd7;
+} __aligned(32);
+
+struct chan_queues {
+	u16 submit;
+	u16 complete;
+};
+
+struct cppi41_dd {
+	struct dma_device ddev;
+
+	void *qmgr_scratch;
+	dma_addr_t scratch_phys;
+
+	struct cppi41_desc *cd;
+	dma_addr_t descs_phys;
+	u32 first_td_desc;
+	struct cppi41_channel *chan_busy[ALLOC_DECS_NUM];
+
+	void __iomem *ctrl_mem;
+	void __iomem *sched_mem;
+	void __iomem *qmgr_mem;
+	unsigned int irq;
+	const struct chan_queues *queues_rx;
+	const struct chan_queues *queues_tx;
+	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
+	u32 n_chans;
+	u8 platform;
+
+	struct list_head pending;	/* Pending queued transfers */
+	spinlock_t lock;		/* Lock for pending list */
+
+	/* context for suspend/resume */
+	unsigned int dma_tdfdq;
+
+	bool is_suspended;
+};
+
+static struct chan_queues am335x_usb_queues_tx[] = {
+	/* USB0 ENDP 1 */
+	[ 0] = { .submit = 32, .complete =  93},
+	[ 1] = { .submit = 34, .complete =  94},
+	[ 2] = { .submit = 36, .complete =  95},
+	[ 3] = { .submit = 38, .complete =  96},
+	[ 4] = { .submit = 40, .complete =  97},
+	[ 5] = { .submit = 42, .complete =  98},
+	[ 6] = { .submit = 44, .complete =  99},
+	[ 7] = { .submit = 46, .complete = 100},
+	[ 8] = { .submit = 48, .complete = 101},
+	[ 9] = { .submit = 50, .complete = 102},
+	[10] = { .submit = 52, .complete = 103},
+	[11] = { .submit = 54, .complete = 104},
+	[12] = { .submit = 56, .complete = 105},
+	[13] = { .submit = 58, .complete = 106},
+	[14] = { .submit = 60, .complete = 107},
+
+	/* USB1 ENDP1 */
+	[15] = { .submit = 62, .complete = 125},
+	[16] = { .submit = 64, .complete = 126},
+	[17] = { .submit = 66, .complete = 127},
+	[18] = { .submit = 68, .complete = 128},
+	[19] = { .submit = 70, .complete = 129},
+	[20] = { .submit = 72, .complete = 130},
+	[21] = { .submit = 74, .complete = 131},
+	[22] = { .submit = 76, .complete = 132},
+	[23] = { .submit = 78, .complete = 133},
+	[24] = { .submit = 80, .complete = 134},
+	[25] = { .submit = 82, .complete = 135},
+	[26] = { .submit = 84, .complete = 136},
+	[27] = { .submit = 86, .complete = 137},
+	[28] = { .submit = 88, .complete = 138},
+	[29] = { .submit = 90, .complete = 139},
+};
+
+static const struct chan_queues am335x_usb_queues_rx[] = {
+	/* USB0 ENDP 1 */
+	[ 0] = { .submit =  1, .complete = 109},
+	[ 1] = { .submit =  2, .complete = 110},
+	[ 2] = { .submit =  3, .complete = 111},
+	[ 3] = { .submit =  4, .complete = 112},
+	[ 4] = { .submit =  5, .complete = 113},
+	[ 5] = { .submit =  6, .complete = 114},
+	[ 6] = { .submit =  7, .complete = 115},
+	[ 7] = { .submit =  8, .complete = 116},
+	[ 8] = { .submit =  9, .complete = 117},
+	[ 9] = { .submit = 10, .complete = 118},
+	[10] = { .submit = 11, .complete = 119},
+	[11] = { .submit = 12, .complete = 120},
+	[12] = { .submit = 13, .complete = 121},
+	[13] = { .submit = 14, .complete = 122},
+	[14] = { .submit = 15, .complete = 123},
+
+	/* USB1 ENDP 1 */
+	[15] = { .submit = 16, .complete = 141},
+	[16] = { .submit = 17, .complete = 142},
+	[17] = { .submit = 18, .complete = 143},
+	[18] = { .submit = 19, .complete = 144},
+	[19] = { .submit = 20, .complete = 145},
+	[20] = { .submit = 21, .complete = 146},
+	[21] = { .submit = 22, .complete = 147},
+	[22] = { .submit = 23, .complete = 148},
+	[23] = { .submit = 24, .complete = 149},
+	[24] = { .submit = 25, .complete = 150},
+	[25] = { .submit = 26, .complete = 151},
+	[26] = { .submit = 27, .complete = 152},
+	[27] = { .submit = 28, .complete = 153},
+	[28] = { .submit = 29, .complete = 154},
+	[29] = { .submit = 30, .complete = 155},
+};
+
+static const struct chan_queues da8xx_usb_queues_tx[] = {
+	[0] = { .submit =  16, .complete = 24},
+	[1] = { .submit =  18, .complete = 24},
+	[2] = { .submit =  20, .complete = 24},
+	[3] = { .submit =  22, .complete = 24},
+};
+
+static const struct chan_queues da8xx_usb_queues_rx[] = {
+	[0] = { .submit =  1, .complete = 26},
+	[1] = { .submit =  3, .complete = 26},
+	[2] = { .submit =  5, .complete = 26},
+	[3] = { .submit =  7, .complete = 26},
+};
+
+struct cppi_glue_infos {
+	const struct chan_queues *queues_rx;
+	const struct chan_queues *queues_tx;
+	struct chan_queues td_queue;
+	u16 first_completion_queue;
+	u16 qmgr_num_pend;
+};
+
+static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c)
+{
+	return container_of(c, struct cppi41_channel, chan);
+}
+
+static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc)
+{
+	struct cppi41_channel *c;
+	u32 descs_size;
+	u32 desc_num;
+
+	descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM;
+
+	if (!((desc >= cdd->descs_phys) &&
+			(desc < (cdd->descs_phys + descs_size)))) {
+		return NULL;
+	}
+
+	desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc);
+	BUG_ON(desc_num >= ALLOC_DECS_NUM);
+	c = cdd->chan_busy[desc_num];
+	cdd->chan_busy[desc_num] = NULL;
+
+	/* Usecount for chan_busy[], paired with push_desc_queue() */
+	pm_runtime_put(cdd->ddev.dev);
+
+	return c;
+}
+
+static void cppi_writel(u32 val, void *__iomem *mem)
+{
+	__raw_writel(val, mem);
+}
+
+static u32 cppi_readl(void *__iomem *mem)
+{
+	return __raw_readl(mem);
+}
+
+static u32 pd_trans_len(u32 val)
+{
+	return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1);
+}
+
+static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num)
+{
+	u32 desc;
+
+	desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num));
+	desc &= ~0x1f;
+	return desc;
+}
+
+static irqreturn_t cppi41_irq(int irq, void *data)
+{
+	struct cppi41_dd *cdd = data;
+	u16 first_completion_queue = cdd->first_completion_queue;
+	u16 qmgr_num_pend = cdd->qmgr_num_pend;
+	struct cppi41_channel *c;
+	int i;
+
+	for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend;
+			i++) {
+		u32 val;
+		u32 q_num;
+
+		val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i));
+		if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) {
+			u32 mask;
+			/* set corresponding bit for completetion Q 93 */
+			mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue);
+			/* not set all bits for queues less than Q 93 */
+			mask--;
+			/* now invert and keep only Q 93+ set */
+			val &= ~mask;
+		}
+
+		if (val)
+			__iormb();
+
+		while (val) {
+			u32 desc, len;
+
+			/*
+			 * This should never trigger, see the comments in
+			 * push_desc_queue()
+			 */
+			WARN_ON(cdd->is_suspended);
+
+			q_num = __fls(val);
+			val &= ~(1 << q_num);
+			q_num += 32 * i;
+			desc = cppi41_pop_desc(cdd, q_num);
+			c = desc_to_chan(cdd, desc);
+			if (WARN_ON(!c)) {
+				pr_err("%s() q %d desc %08x\n", __func__,
+						q_num, desc);
+				continue;
+			}
+
+			if (c->desc->pd2 & PD2_ZERO_LENGTH)
+				len = 0;
+			else
+				len = pd_trans_len(c->desc->pd0);
+
+			c->residue = pd_trans_len(c->desc->pd6) - len;
+			dma_cookie_complete(&c->txd);
+			dmaengine_desc_get_callback_invoke(&c->txd, NULL);
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	dma_cookie_t cookie;
+
+	cookie = dma_cookie_assign(tx);
+
+	return cookie;
+}
+
+static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	struct cppi41_dd *cdd = c->cdd;
+	int error;
+
+	error = pm_runtime_get_sync(cdd->ddev.dev);
+	if (error < 0) {
+		dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n",
+			__func__, error);
+		pm_runtime_put_noidle(cdd->ddev.dev);
+
+		return error;
+	}
+
+	dma_cookie_init(chan);
+	dma_async_tx_descriptor_init(&c->txd, chan);
+	c->txd.tx_submit = cppi41_tx_submit;
+
+	if (!c->is_tx)
+		cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+	pm_runtime_mark_last_busy(cdd->ddev.dev);
+	pm_runtime_put_autosuspend(cdd->ddev.dev);
+
+	return 0;
+}
+
+static void cppi41_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	struct cppi41_dd *cdd = c->cdd;
+	int error;
+
+	error = pm_runtime_get_sync(cdd->ddev.dev);
+	if (error < 0) {
+		pm_runtime_put_noidle(cdd->ddev.dev);
+
+		return;
+	}
+
+	WARN_ON(!list_empty(&cdd->pending));
+
+	pm_runtime_mark_last_busy(cdd->ddev.dev);
+	pm_runtime_put_autosuspend(cdd->ddev.dev);
+}
+
+static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	dma_set_residue(txstate, c->residue);
+
+	return ret;
+}
+
+static void push_desc_queue(struct cppi41_channel *c)
+{
+	struct cppi41_dd *cdd = c->cdd;
+	u32 desc_num;
+	u32 desc_phys;
+	u32 reg;
+
+	c->residue = 0;
+
+	reg = GCR_CHAN_ENABLE;
+	if (!c->is_tx) {
+		reg |= GCR_STARV_RETRY;
+		reg |= GCR_DESC_TYPE_HOST;
+		reg |= c->q_comp_num;
+	}
+
+	cppi_writel(reg, c->gcr_reg);
+
+	/*
+	 * We don't use writel() but __raw_writel() so we have to make sure
+	 * that the DMA descriptor in coherent memory made to the main memory
+	 * before starting the dma engine.
+	 */
+	__iowmb();
+
+	/*
+	 * DMA transfers can take at least 200ms to complete with USB mass
+	 * storage connected. To prevent autosuspend timeouts, we must use
+	 * pm_runtime_get/put() when chan_busy[] is modified. This will get
+	 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the
+	 * outcome of the transfer.
+	 */
+	pm_runtime_get(cdd->ddev.dev);
+
+	desc_phys = lower_32_bits(c->desc_phys);
+	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
+	WARN_ON(cdd->chan_busy[desc_num]);
+	cdd->chan_busy[desc_num] = c;
+
+	reg = (sizeof(struct cppi41_desc) - 24) / 4;
+	reg |= desc_phys;
+	cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num));
+}
+
+/*
+ * Caller must hold cdd->lock to prevent push_desc_queue()
+ * getting called out of order. We have both cppi41_dma_issue_pending()
+ * and cppi41_runtime_resume() call this function.
+ */
+static void cppi41_run_queue(struct cppi41_dd *cdd)
+{
+	struct cppi41_channel *c, *_c;
+
+	list_for_each_entry_safe(c, _c, &cdd->pending, node) {
+		push_desc_queue(c);
+		list_del(&c->node);
+	}
+}
+
+static void cppi41_dma_issue_pending(struct dma_chan *chan)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	struct cppi41_dd *cdd = c->cdd;
+	unsigned long flags;
+	int error;
+
+	error = pm_runtime_get(cdd->ddev.dev);
+	if ((error != -EINPROGRESS) && error < 0) {
+		pm_runtime_put_noidle(cdd->ddev.dev);
+		dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n",
+			error);
+
+		return;
+	}
+
+	spin_lock_irqsave(&cdd->lock, flags);
+	list_add_tail(&c->node, &cdd->pending);
+	if (!cdd->is_suspended)
+		cppi41_run_queue(cdd);
+	spin_unlock_irqrestore(&cdd->lock, flags);
+
+	pm_runtime_mark_last_busy(cdd->ddev.dev);
+	pm_runtime_put_autosuspend(cdd->ddev.dev);
+}
+
+static u32 get_host_pd0(u32 length)
+{
+	u32 reg;
+
+	reg = DESC_TYPE_HOST << DESC_TYPE;
+	reg |= length;
+
+	return reg;
+}
+
+static u32 get_host_pd1(struct cppi41_channel *c)
+{
+	u32 reg;
+
+	reg = 0;
+
+	return reg;
+}
+
+static u32 get_host_pd2(struct cppi41_channel *c)
+{
+	u32 reg;
+
+	reg = DESC_TYPE_USB;
+	reg |= c->q_comp_num;
+
+	return reg;
+}
+
+static u32 get_host_pd3(u32 length)
+{
+	u32 reg;
+
+	/* PD3 = packet size */
+	reg = length;
+
+	return reg;
+}
+
+static u32 get_host_pd6(u32 length)
+{
+	u32 reg;
+
+	/* PD6 buffer size */
+	reg = DESC_PD_COMPLETE;
+	reg |= length;
+
+	return reg;
+}
+
+static u32 get_host_pd4_or_7(u32 addr)
+{
+	u32 reg;
+
+	reg = addr;
+
+	return reg;
+}
+
+static u32 get_host_pd5(void)
+{
+	u32 reg;
+
+	reg = 0;
+
+	return reg;
+}
+
+static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len,
+	enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	struct cppi41_desc *d;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	d = c->desc;
+	for_each_sg(sgl, sg, sg_len, i) {
+		u32 addr;
+		u32 len;
+
+		/* We need to use more than one desc once musb supports sg */
+		addr = lower_32_bits(sg_dma_address(sg));
+		len = sg_dma_len(sg);
+
+		d->pd0 = get_host_pd0(len);
+		d->pd1 = get_host_pd1(c);
+		d->pd2 = get_host_pd2(c);
+		d->pd3 = get_host_pd3(len);
+		d->pd4 = get_host_pd4_or_7(addr);
+		d->pd5 = get_host_pd5();
+		d->pd6 = get_host_pd6(len);
+		d->pd7 = get_host_pd4_or_7(addr);
+
+		d++;
+	}
+
+	return &c->txd;
+}
+
+static void cppi41_compute_td_desc(struct cppi41_desc *d)
+{
+	d->pd0 = DESC_TYPE_TEARD << DESC_TYPE;
+}
+
+static int cppi41_tear_down_chan(struct cppi41_channel *c)
+{
+	struct dmaengine_result abort_result;
+	struct cppi41_dd *cdd = c->cdd;
+	struct cppi41_desc *td;
+	u32 reg;
+	u32 desc_phys;
+	u32 td_desc_phys;
+
+	td = cdd->cd;
+	td += cdd->first_td_desc;
+
+	td_desc_phys = cdd->descs_phys;
+	td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc);
+
+	if (!c->td_queued) {
+		cppi41_compute_td_desc(td);
+		__iowmb();
+
+		reg = (sizeof(struct cppi41_desc) - 24) / 4;
+		reg |= td_desc_phys;
+		cppi_writel(reg, cdd->qmgr_mem +
+				QMGR_QUEUE_D(cdd->td_queue.submit));
+
+		reg = GCR_CHAN_ENABLE;
+		if (!c->is_tx) {
+			reg |= GCR_STARV_RETRY;
+			reg |= GCR_DESC_TYPE_HOST;
+			reg |= cdd->td_queue.complete;
+		}
+		reg |= GCR_TEARDOWN;
+		cppi_writel(reg, c->gcr_reg);
+		c->td_queued = 1;
+		c->td_retry = 500;
+	}
+
+	if (!c->td_seen || !c->td_desc_seen) {
+
+		desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete);
+		if (!desc_phys && c->is_tx)
+			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
+
+		if (desc_phys == c->desc_phys) {
+			c->td_desc_seen = 1;
+
+		} else if (desc_phys == td_desc_phys) {
+			u32 pd0;
+
+			__iormb();
+			pd0 = td->pd0;
+			WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD);
+			WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX));
+			WARN_ON((pd0 & 0x1f) != c->port_num);
+			c->td_seen = 1;
+		} else if (desc_phys) {
+			WARN_ON_ONCE(1);
+		}
+	}
+	c->td_retry--;
+	/*
+	 * If the TX descriptor / channel is in use, the caller needs to poke
+	 * his TD bit multiple times. After that he hardware releases the
+	 * transfer descriptor followed by TD descriptor. Waiting seems not to
+	 * cause any difference.
+	 * RX seems to be thrown out right away. However once the TearDown
+	 * descriptor gets through we are done. If we have seens the transfer
+	 * descriptor before the TD we fetch it from enqueue, it has to be
+	 * there waiting for us.
+	 */
+	if (!c->td_seen && c->td_retry) {
+		udelay(1);
+		return -EAGAIN;
+	}
+	WARN_ON(!c->td_retry);
+
+	if (!c->td_desc_seen) {
+		desc_phys = cppi41_pop_desc(cdd, c->q_num);
+		if (!desc_phys)
+			desc_phys = cppi41_pop_desc(cdd, c->q_comp_num);
+		WARN_ON(!desc_phys);
+	}
+
+	c->td_queued = 0;
+	c->td_seen = 0;
+	c->td_desc_seen = 0;
+	cppi_writel(0, c->gcr_reg);
+
+	/* Invoke the callback to do the necessary clean-up */
+	abort_result.result = DMA_TRANS_ABORTED;
+	dma_cookie_complete(&c->txd);
+	dmaengine_desc_get_callback_invoke(&c->txd, &abort_result);
+
+	return 0;
+}
+
+static int cppi41_stop_chan(struct dma_chan *chan)
+{
+	struct cppi41_channel *c = to_cpp41_chan(chan);
+	struct cppi41_dd *cdd = c->cdd;
+	u32 desc_num;
+	u32 desc_phys;
+	int ret;
+
+	desc_phys = lower_32_bits(c->desc_phys);
+	desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc);
+	if (!cdd->chan_busy[desc_num]) {
+		struct cppi41_channel *cc, *_ct;
+
+		/*
+		 * channels might still be in the pendling list if
+		 * cppi41_dma_issue_pending() is called after
+		 * cppi41_runtime_suspend() is called
+		 */
+		list_for_each_entry_safe(cc, _ct, &cdd->pending, node) {
+			if (cc != c)
+				continue;
+			list_del(&cc->node);
+			break;
+		}
+		return 0;
+	}
+
+	ret = cppi41_tear_down_chan(c);
+	if (ret)
+		return ret;
+
+	WARN_ON(!cdd->chan_busy[desc_num]);
+	cdd->chan_busy[desc_num] = NULL;
+
+	/* Usecount for chan_busy[], paired with push_desc_queue() */
+	pm_runtime_put(cdd->ddev.dev);
+
+	return 0;
+}
+
+static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd)
+{
+	struct cppi41_channel *cchan, *chans;
+	int i;
+	u32 n_chans = cdd->n_chans;
+
+	/*
+	 * The channels can only be used as TX or as RX. So we add twice
+	 * that much dma channels because USB can only do RX or TX.
+	 */
+	n_chans *= 2;
+
+	chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL);
+	if (!chans)
+		return -ENOMEM;
+
+	for (i = 0; i < n_chans; i++) {
+		cchan = &chans[i];
+
+		cchan->cdd = cdd;
+		if (i & 1) {
+			cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1);
+			cchan->is_tx = 1;
+		} else {
+			cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1);
+			cchan->is_tx = 0;
+		}
+		cchan->port_num = i >> 1;
+		cchan->desc = &cdd->cd[i];
+		cchan->desc_phys = cdd->descs_phys;
+		cchan->desc_phys += i * sizeof(struct cppi41_desc);
+		cchan->chan.device = &cdd->ddev;
+		list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels);
+	}
+	cdd->first_td_desc = n_chans;
+
+	return 0;
+}
+
+static void purge_descs(struct device *dev, struct cppi41_dd *cdd)
+{
+	unsigned int mem_decs;
+	int i;
+
+	mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc);
+
+	for (i = 0; i < DESCS_AREAS; i++) {
+
+		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i));
+		cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i));
+
+		dma_free_coherent(dev, mem_decs, cdd->cd,
+				cdd->descs_phys);
+	}
+}
+
+static void disable_sched(struct cppi41_dd *cdd)
+{
+	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
+}
+
+static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd)
+{
+	disable_sched(cdd);
+
+	purge_descs(dev, cdd);
+
+	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+	dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch,
+			cdd->scratch_phys);
+}
+
+static int init_descs(struct device *dev, struct cppi41_dd *cdd)
+{
+	unsigned int desc_size;
+	unsigned int mem_decs;
+	int i;
+	u32 reg;
+	u32 idx;
+
+	BUILD_BUG_ON(sizeof(struct cppi41_desc) &
+			(sizeof(struct cppi41_desc) - 1));
+	BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32);
+	BUILD_BUG_ON(ALLOC_DECS_NUM < 32);
+
+	desc_size = sizeof(struct cppi41_desc);
+	mem_decs = ALLOC_DECS_NUM * desc_size;
+
+	idx = 0;
+	for (i = 0; i < DESCS_AREAS; i++) {
+
+		reg = idx << QMGR_MEMCTRL_IDX_SH;
+		reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH;
+		reg |= ilog2(ALLOC_DECS_NUM) - 5;
+
+		BUILD_BUG_ON(DESCS_AREAS != 1);
+		cdd->cd = dma_alloc_coherent(dev, mem_decs,
+				&cdd->descs_phys, GFP_KERNEL);
+		if (!cdd->cd)
+			return -ENOMEM;
+
+		cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+		cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i));
+
+		idx += ALLOC_DECS_NUM;
+	}
+	return 0;
+}
+
+static void init_sched(struct cppi41_dd *cdd)
+{
+	unsigned ch;
+	unsigned word;
+	u32 reg;
+
+	word = 0;
+	cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL);
+	for (ch = 0; ch < cdd->n_chans; ch += 2) {
+
+		reg = SCHED_ENTRY0_CHAN(ch);
+		reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX;
+
+		reg |= SCHED_ENTRY2_CHAN(ch + 1);
+		reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX;
+		cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word));
+		word++;
+	}
+	reg = cdd->n_chans * 2 - 1;
+	reg |= DMA_SCHED_CTRL_EN;
+	cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL);
+}
+
+static int init_cppi41(struct device *dev, struct cppi41_dd *cdd)
+{
+	int ret;
+
+	BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1));
+	cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE,
+			&cdd->scratch_phys, GFP_KERNEL);
+	if (!cdd->qmgr_scratch)
+		return -ENOMEM;
+
+	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+	cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+	ret = init_descs(dev, cdd);
+	if (ret)
+		goto err_td;
+
+	cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ);
+	init_sched(cdd);
+
+	return 0;
+err_td:
+	deinit_cppi41(dev, cdd);
+	return ret;
+}
+
+static struct platform_driver cpp41_dma_driver;
+/*
+ * The param format is:
+ * X Y
+ * X: Port
+ * Y: 0 = RX else TX
+ */
+#define INFO_PORT	0
+#define INFO_IS_TX	1
+
+static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct cppi41_channel *cchan;
+	struct cppi41_dd *cdd;
+	const struct chan_queues *queues;
+	u32 *num = param;
+
+	if (chan->device->dev->driver != &cpp41_dma_driver.driver)
+		return false;
+
+	cchan = to_cpp41_chan(chan);
+
+	if (cchan->port_num != num[INFO_PORT])
+		return false;
+
+	if (cchan->is_tx && !num[INFO_IS_TX])
+		return false;
+	cdd = cchan->cdd;
+	if (cchan->is_tx)
+		queues = cdd->queues_tx;
+	else
+		queues = cdd->queues_rx;
+
+	BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) !=
+		     ARRAY_SIZE(am335x_usb_queues_tx));
+	if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx)))
+		return false;
+
+	cchan->q_num = queues[cchan->port_num].submit;
+	cchan->q_comp_num = queues[cchan->port_num].complete;
+	return true;
+}
+
+static struct of_dma_filter_info cpp41_dma_info = {
+	.filter_fn = cpp41_dma_filter_fn,
+};
+
+static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec,
+		struct of_dma *ofdma)
+{
+	int count = dma_spec->args_count;
+	struct of_dma_filter_info *info = ofdma->of_dma_data;
+
+	if (!info || !info->filter_fn)
+		return NULL;
+
+	if (count != 2)
+		return NULL;
+
+	return dma_request_channel(info->dma_cap, info->filter_fn,
+			&dma_spec->args[0]);
+}
+
+static const struct cppi_glue_infos am335x_usb_infos = {
+	.queues_rx = am335x_usb_queues_rx,
+	.queues_tx = am335x_usb_queues_tx,
+	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 93,
+	.qmgr_num_pend = 5,
+};
+
+static const struct cppi_glue_infos da8xx_usb_infos = {
+	.queues_rx = da8xx_usb_queues_rx,
+	.queues_tx = da8xx_usb_queues_tx,
+	.td_queue = { .submit = 31, .complete = 0 },
+	.first_completion_queue = 24,
+	.qmgr_num_pend = 2,
+};
+
+static const struct of_device_id cppi41_dma_ids[] = {
+	{ .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos},
+	{ .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos},
+	{},
+};
+MODULE_DEVICE_TABLE(of, cppi41_dma_ids);
+
+static const struct cppi_glue_infos *get_glue_info(struct device *dev)
+{
+	const struct of_device_id *of_id;
+
+	of_id = of_match_node(cppi41_dma_ids, dev->of_node);
+	if (!of_id)
+		return NULL;
+	return of_id->data;
+}
+
+#define CPPI41_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static int cppi41_dma_probe(struct platform_device *pdev)
+{
+	struct cppi41_dd *cdd;
+	struct device *dev = &pdev->dev;
+	const struct cppi_glue_infos *glue_info;
+	struct resource *mem;
+	int index;
+	int irq;
+	int ret;
+
+	glue_info = get_glue_info(dev);
+	if (!glue_info)
+		return -EINVAL;
+
+	cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL);
+	if (!cdd)
+		return -ENOMEM;
+
+	dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask);
+	cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources;
+	cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources;
+	cdd->ddev.device_tx_status = cppi41_dma_tx_status;
+	cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
+	cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
+	cdd->ddev.device_terminate_all = cppi41_stop_chan;
+	cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
+	cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
+	cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	cdd->ddev.dev = dev;
+	INIT_LIST_HEAD(&cdd->ddev.channels);
+	cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
+
+	index = of_property_match_string(dev->of_node,
+					 "reg-names", "controller");
+	if (index < 0)
+		return index;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	cdd->ctrl_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->ctrl_mem))
+		return PTR_ERR(cdd->ctrl_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1);
+	cdd->sched_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->sched_mem))
+		return PTR_ERR(cdd->sched_mem);
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2);
+	cdd->qmgr_mem = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(cdd->qmgr_mem))
+		return PTR_ERR(cdd->qmgr_mem);
+
+	spin_lock_init(&cdd->lock);
+	INIT_LIST_HEAD(&cdd->pending);
+
+	platform_set_drvdata(pdev, cdd);
+
+	pm_runtime_enable(dev);
+	pm_runtime_set_autosuspend_delay(dev, 100);
+	pm_runtime_use_autosuspend(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0)
+		goto err_get_sync;
+
+	cdd->queues_rx = glue_info->queues_rx;
+	cdd->queues_tx = glue_info->queues_tx;
+	cdd->td_queue = glue_info->td_queue;
+	cdd->qmgr_num_pend = glue_info->qmgr_num_pend;
+	cdd->first_completion_queue = glue_info->first_completion_queue;
+
+	ret = of_property_read_u32(dev->of_node,
+				   "#dma-channels", &cdd->n_chans);
+	if (ret)
+		goto err_get_n_chans;
+
+	ret = init_cppi41(dev, cdd);
+	if (ret)
+		goto err_init_cppi;
+
+	ret = cppi41_add_chans(dev, cdd);
+	if (ret)
+		goto err_chans;
+
+	irq = irq_of_parse_and_map(dev->of_node, 0);
+	if (!irq) {
+		ret = -EINVAL;
+		goto err_chans;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED,
+			dev_name(dev), cdd);
+	if (ret)
+		goto err_chans;
+	cdd->irq = irq;
+
+	ret = dma_async_device_register(&cdd->ddev);
+	if (ret)
+		goto err_chans;
+
+	ret = of_dma_controller_register(dev->of_node,
+			cppi41_dma_xlate, &cpp41_dma_info);
+	if (ret)
+		goto err_of;
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	return 0;
+err_of:
+	dma_async_device_unregister(&cdd->ddev);
+err_chans:
+	deinit_cppi41(dev, cdd);
+err_init_cppi:
+	pm_runtime_dont_use_autosuspend(dev);
+err_get_n_chans:
+err_get_sync:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+	return ret;
+}
+
+static int cppi41_dma_remove(struct platform_device *pdev)
+{
+	struct cppi41_dd *cdd = platform_get_drvdata(pdev);
+	int error;
+
+	error = pm_runtime_get_sync(&pdev->dev);
+	if (error < 0)
+		dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n",
+			__func__, error);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&cdd->ddev);
+
+	devm_free_irq(&pdev->dev, cdd->irq, cdd);
+	deinit_cppi41(&pdev->dev, cdd);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+static int __maybe_unused cppi41_suspend(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+
+	cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ);
+	disable_sched(cdd);
+
+	return 0;
+}
+
+static int __maybe_unused cppi41_resume(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+	struct cppi41_channel *c;
+	int i;
+
+	for (i = 0; i < DESCS_AREAS; i++)
+		cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i));
+
+	list_for_each_entry(c, &cdd->ddev.channels, chan.device_node)
+		if (!c->is_tx)
+			cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0);
+
+	init_sched(cdd);
+
+	cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ);
+	cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE);
+	cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE);
+	cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE);
+
+	return 0;
+}
+
+static int __maybe_unused cppi41_runtime_suspend(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cdd->lock, flags);
+	cdd->is_suspended = true;
+	WARN_ON(!list_empty(&cdd->pending));
+	spin_unlock_irqrestore(&cdd->lock, flags);
+
+	return 0;
+}
+
+static int __maybe_unused cppi41_runtime_resume(struct device *dev)
+{
+	struct cppi41_dd *cdd = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&cdd->lock, flags);
+	cdd->is_suspended = false;
+	cppi41_run_queue(cdd);
+	spin_unlock_irqrestore(&cdd->lock, flags);
+
+	return 0;
+}
+
+static const struct dev_pm_ops cppi41_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume)
+	SET_RUNTIME_PM_OPS(cppi41_runtime_suspend,
+			   cppi41_runtime_resume,
+			   NULL)
+};
+
+static struct platform_driver cpp41_dma_driver = {
+	.probe  = cppi41_dma_probe,
+	.remove = cppi41_dma_remove,
+	.driver = {
+		.name = "cppi41-dma-engine",
+		.pm = &cppi41_pm_ops,
+		.of_match_table = of_match_ptr(cppi41_dma_ids),
+	},
+};
+
+module_platform_driver(cpp41_dma_driver);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>");
diff --git a/drivers/dma/ti/dma-crossbar.c b/drivers/dma/ti/dma-crossbar.c
new file mode 100644
index 0000000..9272b17
--- /dev/null
+++ b/drivers/dma/ti/dma-crossbar.c
@@ -0,0 +1,478 @@
+/*
+ *  Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#define TI_XBAR_DRA7		0
+#define TI_XBAR_AM335X		1
+static const u32 ti_xbar_type[] = {
+	[TI_XBAR_DRA7] = TI_XBAR_DRA7,
+	[TI_XBAR_AM335X] = TI_XBAR_AM335X,
+};
+
+static const struct of_device_id ti_dma_xbar_match[] = {
+	{
+		.compatible = "ti,dra7-dma-crossbar",
+		.data = &ti_xbar_type[TI_XBAR_DRA7],
+	},
+	{
+		.compatible = "ti,am335x-edma-crossbar",
+		.data = &ti_xbar_type[TI_XBAR_AM335X],
+	},
+	{},
+};
+
+/* Crossbar on AM335x/AM437x family */
+#define TI_AM335X_XBAR_LINES	64
+
+struct ti_am335x_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+
+	u32 xbar_events; /* maximum number of events to select in xbar */
+	u32 dma_requests; /* number of DMA requests on eDMA */
+};
+
+struct ti_am335x_xbar_map {
+	u16 dma_line;
+	u8 mux_val;
+};
+
+static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u8 val)
+{
+	/*
+	 * TPCC_EVT_MUX_60_63 register layout is different than the
+	 * rest, in the sense, that event 63 is mapped to lowest byte
+	 * and event 60 is mapped to highest, handle it separately.
+	 */
+	if (event >= 60 && event <= 63)
+		writeb_relaxed(val, iomem + (63 - event % 4));
+	else
+		writeb_relaxed(val, iomem + event);
+}
+
+static void ti_am335x_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_am335x_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_am335x_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR event %u on channel %u\n",
+		map->mux_val, map->dma_line);
+
+	ti_am335x_xbar_write(xbar->iomem, map->dma_line, 0);
+	kfree(map);
+}
+
+static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_am335x_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_am335x_xbar_map *map;
+
+	if (dma_spec->args_count != 3)
+		return ERR_PTR(-EINVAL);
+
+	if (dma_spec->args[2] >= xbar->xbar_events) {
+		dev_err(&pdev->dev, "Invalid XBAR event number: %d\n",
+			dma_spec->args[2]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dma_spec->args[0] >= xbar->dma_requests) {
+		dev_err(&pdev->dev, "Invalid DMA request line number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	map->dma_line = (u16)dma_spec->args[0];
+	map->mux_val = (u8)dma_spec->args[2];
+
+	dma_spec->args[2] = 0;
+	dma_spec->args_count = 2;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR event%u to DMA%u\n",
+		map->mux_val, map->dma_line);
+
+	ti_am335x_xbar_write(xbar->iomem, map->dma_line, map->mux_val);
+
+	return map;
+}
+
+static const struct of_device_id ti_am335x_master_match[] = {
+	{ .compatible = "ti,edma3-tpcc", },
+	{},
+};
+
+static int ti_am335x_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct device_node *dma_node;
+	struct ti_am335x_xbar_data *xbar;
+	struct resource *res;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	match = of_match_node(ti_am335x_master_match, dma_node);
+	if (!match) {
+		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_AM335X_XBAR_LINES);
+		xbar->dma_requests = TI_AM335X_XBAR_LINES;
+	}
+	of_node_put(dma_node);
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_events)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_AM335X_XBAR_LINES);
+		xbar->xbar_events = TI_AM335X_XBAR_LINES;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_am335x_xbar_free;
+
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++)
+		ti_am335x_xbar_write(xbar->iomem, i, 0);
+
+	ret = of_dma_router_register(node, ti_am335x_xbar_route_allocate,
+				     &xbar->dmarouter);
+
+	return ret;
+}
+
+/* Crossbar on DRA7xx family */
+#define TI_DRA7_XBAR_OUTPUTS	127
+#define TI_DRA7_XBAR_INPUTS	256
+
+struct ti_dra7_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+	struct mutex mutex;
+	unsigned long *dma_inuse;
+
+	u16 safe_val; /* Value to rest the crossbar lines */
+	u32 xbar_requests; /* number of DMA requests connected to XBAR */
+	u32 dma_requests; /* number of DMA requests forwarded to DMA */
+	u32 dma_offset;
+};
+
+struct ti_dra7_xbar_map {
+	u16 xbar_in;
+	int xbar_out;
+};
+
+static inline void ti_dra7_xbar_write(void __iomem *iomem, int xbar, u16 val)
+{
+	writew_relaxed(val, iomem + (xbar * 2));
+}
+
+static void ti_dra7_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_dra7_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_dra7_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
+	mutex_lock(&xbar->mutex);
+	clear_bit(map->xbar_out, xbar->dma_inuse);
+	mutex_unlock(&xbar->mutex);
+	kfree(map);
+}
+
+static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_dra7_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_dra7_xbar_map *map;
+
+	if (dma_spec->args[0] >= xbar->xbar_requests) {
+		dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mutex_lock(&xbar->mutex);
+	map->xbar_out = find_first_zero_bit(xbar->dma_inuse,
+					    xbar->dma_requests);
+	if (map->xbar_out == xbar->dma_requests) {
+		mutex_unlock(&xbar->mutex);
+		dev_err(&pdev->dev, "Run out of free DMA requests\n");
+		kfree(map);
+		return ERR_PTR(-ENOMEM);
+	}
+	set_bit(map->xbar_out, xbar->dma_inuse);
+	mutex_unlock(&xbar->mutex);
+
+	map->xbar_in = (u16)dma_spec->args[0];
+
+	dma_spec->args[0] = map->xbar_out + xbar->dma_offset;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dra7_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in);
+
+	return map;
+}
+
+#define TI_XBAR_EDMA_OFFSET	0
+#define TI_XBAR_SDMA_OFFSET	1
+static const u32 ti_dma_offset[] = {
+	[TI_XBAR_EDMA_OFFSET] = 0,
+	[TI_XBAR_SDMA_OFFSET] = 1,
+};
+
+static const struct of_device_id ti_dra7_master_match[] = {
+	{
+		.compatible = "ti,omap4430-sdma",
+		.data = &ti_dma_offset[TI_XBAR_SDMA_OFFSET],
+	},
+	{
+		.compatible = "ti,edma3",
+		.data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET],
+	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = &ti_dma_offset[TI_XBAR_EDMA_OFFSET],
+	},
+	{},
+};
+
+static inline void ti_dra7_xbar_reserve(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static int ti_dra7_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct device_node *dma_node;
+	struct ti_dra7_xbar_data *xbar;
+	struct property *prop;
+	struct resource *res;
+	u32 safe_val;
+	int sz;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	match = of_match_node(ti_dra7_master_match, dma_node);
+	if (!match) {
+		dev_err(&pdev->dev, "DMA master is not supported\n");
+		of_node_put(dma_node);
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_DRA7_XBAR_OUTPUTS);
+		xbar->dma_requests = TI_DRA7_XBAR_OUTPUTS;
+	}
+	of_node_put(dma_node);
+
+	xbar->dma_inuse = devm_kcalloc(&pdev->dev,
+				       BITS_TO_LONGS(xbar->dma_requests),
+				       sizeof(unsigned long), GFP_KERNEL);
+	if (!xbar->dma_inuse)
+		return -ENOMEM;
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_DRA7_XBAR_INPUTS);
+		xbar->xbar_requests = TI_DRA7_XBAR_INPUTS;
+	}
+
+	if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
+		xbar->safe_val = (u16)safe_val;
+
+
+	prop = of_find_property(node, "ti,reserved-dma-request-ranges", &sz);
+	if (prop) {
+		const char pname[] = "ti,reserved-dma-request-ranges";
+		u32 (*rsv_events)[2];
+		size_t nelm = sz / sizeof(*rsv_events);
+		int i;
+
+		if (!nelm)
+			return -EINVAL;
+
+		rsv_events = kcalloc(nelm, sizeof(*rsv_events), GFP_KERNEL);
+		if (!rsv_events)
+			return -ENOMEM;
+
+		ret = of_property_read_u32_array(node, pname, (u32 *)rsv_events,
+						 nelm * 2);
+		if (ret)
+			return ret;
+
+		for (i = 0; i < nelm; i++) {
+			ti_dra7_xbar_reserve(rsv_events[i][0], rsv_events[i][1],
+					     xbar->dma_inuse);
+		}
+		kfree(rsv_events);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_dra7_xbar_free;
+	xbar->dma_offset = *(u32 *)match->data;
+
+	mutex_init(&xbar->mutex);
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++) {
+		if (!test_bit(i, xbar->dma_inuse))
+			ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val);
+	}
+
+	ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate,
+				     &xbar->dmarouter);
+	if (ret) {
+		/* Restore the defaults for the crossbar */
+		for (i = 0; i < xbar->dma_requests; i++) {
+			if (!test_bit(i, xbar->dma_inuse))
+				ti_dra7_xbar_write(xbar->iomem, i, i);
+		}
+	}
+
+	return ret;
+}
+
+static int ti_dma_xbar_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	int ret;
+
+	match = of_match_node(ti_dma_xbar_match, pdev->dev.of_node);
+	if (unlikely(!match))
+		return -EINVAL;
+
+	switch (*(u32 *)match->data) {
+	case TI_XBAR_DRA7:
+		ret = ti_dra7_xbar_probe(pdev);
+		break;
+	case TI_XBAR_AM335X:
+		ret = ti_am335x_xbar_probe(pdev);
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported crossbar\n");
+		ret = -ENODEV;
+		break;
+	}
+
+	return ret;
+}
+
+static struct platform_driver ti_dma_xbar_driver = {
+	.driver = {
+		.name = "ti-dma-crossbar",
+		.of_match_table = of_match_ptr(ti_dma_xbar_match),
+	},
+	.probe	= ti_dma_xbar_probe,
+};
+
+static int omap_dmaxbar_init(void)
+{
+	return platform_driver_register(&ti_dma_xbar_driver);
+}
+arch_initcall(omap_dmaxbar_init);
diff --git a/drivers/dma/ti/edma.c b/drivers/dma/ti/edma.c
new file mode 100644
index 0000000..ceabdea
--- /dev/null
+++ b/drivers/dma/ti/edma.c
@@ -0,0 +1,2565 @@
+/*
+ * TI EDMA DMA engine driver
+ *
+ * Copyright 2012 Texas Instruments
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/edma.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+
+#include <linux/platform_data/edma.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT		0x00
+#define PARM_SRC		0x04
+#define PARM_A_B_CNT		0x08
+#define PARM_DST		0x0c
+#define PARM_SRC_DST_BIDX	0x10
+#define PARM_LINK_BCNTRLD	0x14
+#define PARM_SRC_DST_CIDX	0x18
+#define PARM_CCNT		0x1c
+
+#define PARM_SIZE		0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER			0x00	/* 64 bits */
+#define SH_ECR			0x08	/* 64 bits */
+#define SH_ESR			0x10	/* 64 bits */
+#define SH_CER			0x18	/* 64 bits */
+#define SH_EER			0x20	/* 64 bits */
+#define SH_EECR			0x28	/* 64 bits */
+#define SH_EESR			0x30	/* 64 bits */
+#define SH_SER			0x38	/* 64 bits */
+#define SH_SECR			0x40	/* 64 bits */
+#define SH_IER			0x50	/* 64 bits */
+#define SH_IECR			0x58	/* 64 bits */
+#define SH_IESR			0x60	/* 64 bits */
+#define SH_IPR			0x68	/* 64 bits */
+#define SH_ICR			0x70	/* 64 bits */
+#define SH_IEVAL		0x78
+#define SH_QER			0x80
+#define SH_QEER			0x84
+#define SH_QEECR		0x88
+#define SH_QEESR		0x8c
+#define SH_QSER			0x90
+#define SH_QSECR		0x94
+#define SH_SIZE			0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV		0x0000
+#define EDMA_CCCFG		0x0004
+#define EDMA_QCHMAP		0x0200	/* 8 registers */
+#define EDMA_DMAQNUM		0x0240	/* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM		0x0260
+#define EDMA_QUETCMAP		0x0280
+#define EDMA_QUEPRI		0x0284
+#define EDMA_EMR		0x0300	/* 64 bits */
+#define EDMA_EMCR		0x0308	/* 64 bits */
+#define EDMA_QEMR		0x0310
+#define EDMA_QEMCR		0x0314
+#define EDMA_CCERR		0x0318
+#define EDMA_CCERRCLR		0x031c
+#define EDMA_EEVAL		0x0320
+#define EDMA_DRAE		0x0340	/* 4 x 64 bits*/
+#define EDMA_QRAE		0x0380	/* 4 registers */
+#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
+#define EDMA_QSTAT		0x0600	/* 2 registers */
+#define EDMA_QWMTHRA		0x0620
+#define EDMA_QWMTHRB		0x0624
+#define EDMA_CCSTAT		0x0640
+
+#define EDMA_M			0x1000	/* global channel registers */
+#define EDMA_ECR		0x1008
+#define EDMA_ECRH		0x100C
+#define EDMA_SHADOW0		0x2000	/* 4 shadow regions */
+#define EDMA_PARM		0x4000	/* PaRAM entries */
+
+#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP		0x0100  /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
+#define GET_NUM_QDMACH(x)	((x & 0x70) >> 4) /* bits 4-6 */
+#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST		BIT(24)
+
+/* CCSTAT register */
+#define EDMA_CCSTAT_ACTV	BIT(4)
+
+/*
+ * Max of 20 segments per channel to conserve PaRAM slots
+ * Also note that MAX_NR_SG should be atleast the no.of periods
+ * that are required for ASoC, otherwise DMA prep calls will
+ * fail. Today davinci-pcm is the only user of this driver and
+ * requires atleast 17 slots, so we setup the default to 20.
+ */
+#define MAX_NR_SG		20
+#define EDMA_MAX_SLOTS		MAX_NR_SG
+#define EDMA_DESCRIPTORS	16
+
+#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
+#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
+#define EDMA_CONT_PARAMS_ANY		 1001
+#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
+#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
+
+/* PaRAM slots are laid out like this */
+struct edmacc_param {
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
+
+/* fields in edmacc_param.opt */
+#define SAM		BIT(0)
+#define DAM		BIT(1)
+#define SYNCDIM		BIT(2)
+#define STATIC		BIT(3)
+#define EDMA_FWID	(0x07 << 8)
+#define TCCMODE		BIT(11)
+#define EDMA_TCC(t)	((t) << 12)
+#define TCINTEN		BIT(20)
+#define ITCINTEN	BIT(21)
+#define TCCHEN		BIT(22)
+#define ITCCHEN		BIT(23)
+
+struct edma_pset {
+	u32				len;
+	dma_addr_t			addr;
+	struct edmacc_param		param;
+};
+
+struct edma_desc {
+	struct virt_dma_desc		vdesc;
+	struct list_head		node;
+	enum dma_transfer_direction	direction;
+	int				cyclic;
+	int				absync;
+	int				pset_nr;
+	struct edma_chan		*echan;
+	int				processed;
+
+	/*
+	 * The following 4 elements are used for residue accounting.
+	 *
+	 * - processed_stat: the number of SG elements we have traversed
+	 * so far to cover accounting. This is updated directly to processed
+	 * during edma_callback and is always <= processed, because processed
+	 * refers to the number of pending transfer (programmed to EDMA
+	 * controller), where as processed_stat tracks number of transfers
+	 * accounted for so far.
+	 *
+	 * - residue: The amount of bytes we have left to transfer for this desc
+	 *
+	 * - residue_stat: The residue in bytes of data we have covered
+	 * so far for accounting. This is updated directly to residue
+	 * during callbacks to keep it current.
+	 *
+	 * - sg_len: Tracks the length of the current intermediate transfer,
+	 * this is required to update the residue during intermediate transfer
+	 * completion callback.
+	 */
+	int				processed_stat;
+	u32				sg_len;
+	u32				residue;
+	u32				residue_stat;
+
+	struct edma_pset		pset[0];
+};
+
+struct edma_cc;
+
+struct edma_tc {
+	struct device_node		*node;
+	u16				id;
+};
+
+struct edma_chan {
+	struct virt_dma_chan		vchan;
+	struct list_head		node;
+	struct edma_desc		*edesc;
+	struct edma_cc			*ecc;
+	struct edma_tc			*tc;
+	int				ch_num;
+	bool				alloced;
+	bool				hw_triggered;
+	int				slot[EDMA_MAX_SLOTS];
+	int				missed;
+	struct dma_slave_config		cfg;
+};
+
+struct edma_cc {
+	struct device			*dev;
+	struct edma_soc_info		*info;
+	void __iomem			*base;
+	int				id;
+	bool				legacy_mode;
+
+	/* eDMA3 resource information */
+	unsigned			num_channels;
+	unsigned			num_qchannels;
+	unsigned			num_region;
+	unsigned			num_slots;
+	unsigned			num_tc;
+	bool				chmap_exist;
+	enum dma_event_q		default_queue;
+
+	unsigned int			ccint;
+	unsigned int			ccerrint;
+
+	/*
+	 * The slot_inuse bit for each PaRAM slot is clear unless the slot is
+	 * in use by Linux or if it is allocated to be used by DSP.
+	 */
+	unsigned long *slot_inuse;
+
+	struct dma_device		dma_slave;
+	struct dma_device		*dma_memcpy;
+	struct edma_chan		*slave_chans;
+	struct edma_tc			*tc_list;
+	int				dummy_slot;
+};
+
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+	.link_bcntrld = 0xffff,
+	.ccnt = 1,
+};
+
+#define EDMA_BINDING_LEGACY	0
+#define EDMA_BINDING_TPCC	1
+static const u32 edma_binding_type[] = {
+	[EDMA_BINDING_LEGACY] = EDMA_BINDING_LEGACY,
+	[EDMA_BINDING_TPCC] = EDMA_BINDING_TPCC,
+};
+
+static const struct of_device_id edma_of_ids[] = {
+	{
+		.compatible = "ti,edma3",
+		.data = &edma_binding_type[EDMA_BINDING_LEGACY],
+	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = &edma_binding_type[EDMA_BINDING_TPCC],
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(of, edma_of_ids);
+
+static const struct of_device_id edma_tptc_of_ids[] = {
+	{ .compatible = "ti,edma3-tptc", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, edma_tptc_of_ids);
+
+static inline unsigned int edma_read(struct edma_cc *ecc, int offset)
+{
+	return (unsigned int)__raw_readl(ecc->base + offset);
+}
+
+static inline void edma_write(struct edma_cc *ecc, int offset, int val)
+{
+	__raw_writel(val, ecc->base + offset);
+}
+
+static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and,
+			       unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset,
+					   int i)
+{
+	return edma_read(ecc, offset + (i << 2));
+}
+
+static inline void edma_write_array(struct edma_cc *ecc, int offset, int i,
+				    unsigned val)
+{
+	edma_write(ecc, offset + (i << 2), val);
+}
+
+static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
+				     unsigned and, unsigned or)
+{
+	edma_modify(ecc, offset + (i << 2), and, or);
+}
+
+static inline void edma_or_array(struct edma_cc *ecc, int offset, int i,
+				 unsigned or)
+{
+	edma_or(ecc, offset + (i << 2), or);
+}
+
+static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
+				  unsigned or)
+{
+	edma_or(ecc, offset + ((i * 2 + j) << 2), or);
+}
+
+static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
+				     int j, unsigned val)
+{
+	edma_write(ecc, offset + ((i * 2 + j) << 2), val);
+}
+
+static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset);
+}
+
+static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
+						   int offset, int i)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2));
+}
+
+static inline void edma_shadow0_write(struct edma_cc *ecc, int offset,
+				      unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset, val);
+}
+
+static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
+					    int i, unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+
+static inline unsigned int edma_param_read(struct edma_cc *ecc, int offset,
+					   int param_no)
+{
+	return edma_read(ecc, EDMA_PARM + offset + (param_no << 5));
+}
+
+static inline void edma_param_write(struct edma_cc *ecc, int offset,
+				    int param_no, unsigned val)
+{
+	edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val);
+}
+
+static inline void edma_param_modify(struct edma_cc *ecc, int offset,
+				     int param_no, unsigned and, unsigned or)
+{
+	edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+
+static inline void edma_param_and(struct edma_cc *ecc, int offset, int param_no,
+				  unsigned and)
+{
+	edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and);
+}
+
+static inline void edma_param_or(struct edma_cc *ecc, int offset, int param_no,
+				 unsigned or)
+{
+	edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or);
+}
+
+static inline void edma_set_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
+					  int priority)
+{
+	int bit = queue_no * 4;
+
+	edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
+}
+
+static void edma_set_chmap(struct edma_chan *echan, int slot)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	if (ecc->chmap_exist) {
+		slot = EDMA_CHAN_SLOT(slot);
+		edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5));
+	}
+}
+
+static void edma_setup_interrupt(struct edma_chan *echan, bool enable)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	if (enable) {
+		edma_shadow0_write_array(ecc, SH_ICR, channel >> 5,
+					 BIT(channel & 0x1f));
+		edma_shadow0_write_array(ecc, SH_IESR, channel >> 5,
+					 BIT(channel & 0x1f));
+	} else {
+		edma_shadow0_write_array(ecc, SH_IECR, channel >> 5,
+					 BIT(channel & 0x1f));
+	}
+}
+
+/*
+ * paRAM slot management functions
+ */
+static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
+			    const struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
+}
+
+static int edma_read_slot(struct edma_cc *ecc, unsigned slot,
+			   struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return -EINVAL;
+	memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE);
+
+	return 0;
+}
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer.  Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+static int edma_alloc_slot(struct edma_cc *ecc, int slot)
+{
+	if (slot >= 0) {
+		slot = EDMA_CHAN_SLOT(slot);
+		/* Requesting entry paRAM slot for a HW triggered channel. */
+		if (ecc->chmap_exist && slot < ecc->num_channels)
+			slot = EDMA_SLOT_ANY;
+	}
+
+	if (slot < 0) {
+		if (ecc->chmap_exist)
+			slot = 0;
+		else
+			slot = ecc->num_channels;
+		for (;;) {
+			slot = find_next_zero_bit(ecc->slot_inuse,
+						  ecc->num_slots,
+						  slot);
+			if (slot == ecc->num_slots)
+				return -ENOMEM;
+			if (!test_and_set_bit(slot, ecc->slot_inuse))
+				break;
+		}
+	} else if (slot >= ecc->num_slots) {
+		return -EINVAL;
+	} else if (test_and_set_bit(slot, ecc->slot_inuse)) {
+		return -EBUSY;
+	}
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+
+	return EDMA_CTLR_CHAN(ecc->id, slot);
+}
+
+static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+	clear_bit(slot, ecc->slot_inuse);
+}
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @ecc: pointer to edma_cc struct
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
+{
+	if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to)))
+		dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n");
+
+	from = EDMA_CHAN_SLOT(from);
+	to = EDMA_CHAN_SLOT(to);
+	if (from >= ecc->num_slots || to >= ecc->num_slots)
+		return;
+
+	edma_param_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			  PARM_OFFSET(to));
+}
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot being examined
+ * @dst:  true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot,
+				    bool dst)
+{
+	u32 offs;
+
+	slot = EDMA_CHAN_SLOT(slot);
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ecc, offs);
+}
+
+/*
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software.  (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ */
+static void edma_start(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	if (!echan->hw_triggered) {
+		/* EDMA channels without event association */
+		dev_dbg(ecc->dev, "ESR%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_ESR, j));
+		edma_shadow0_write_array(ecc, SH_ESR, j, mask);
+	} else {
+		/* EDMA channel with event association */
+		dev_dbg(ecc->dev, "ER%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_ER, j));
+		/* Clear any pending event or error */
+		edma_write_array(ecc, EDMA_ECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_EESR, j, mask);
+		dev_dbg(ecc->dev, "EER%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_EER, j));
+	}
+}
+
+static void edma_stop(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_EECR, j, mask);
+	edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+	edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+	edma_write_array(ecc, EDMA_EMCR, j, mask);
+
+	/* clear possibly pending completion interrupt */
+	edma_shadow0_write_array(ecc, SH_ICR, j, mask);
+
+	dev_dbg(ecc->dev, "EER%d %08x\n", j,
+		edma_shadow0_read_array(ecc, SH_EER, j));
+
+	/* REVISIT:  consider guarding against inappropriate event
+	 * chaining by overwriting with dummy_paramset.
+	 */
+}
+
+/*
+ * Temporarily disable EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers
+ */
+static void edma_pause(struct edma_chan *echan)
+{
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(echan->ecc, SH_EECR, channel >> 5, mask);
+}
+
+/* Re-enable EDMA hardware events on the specified channel.  */
+static void edma_resume(struct edma_chan *echan)
+{
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(echan->ecc, SH_EESR, channel >> 5, mask);
+}
+
+static void edma_trigger_channel(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask);
+
+	dev_dbg(ecc->dev, "ESR%d %08x\n", (channel >> 5),
+		edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
+}
+
+static void edma_clean_channel(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	dev_dbg(ecc->dev, "EMR%d %08x\n", j, edma_read_array(ecc, EDMA_EMR, j));
+	edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+	/* Clear the corresponding EMR bits */
+	edma_write_array(ecc, EDMA_EMCR, j, mask);
+	/* Clear any SER */
+	edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+	edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+}
+
+/* Move channel to a specific event queue */
+static void edma_assign_channel_eventq(struct edma_chan *echan,
+				       enum dma_event_q eventq_no)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int bit = (channel & 0x7) * 4;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = ecc->default_queue;
+	if (eventq_no >= ecc->num_tc)
+		return;
+
+	eventq_no &= 7;
+	edma_modify_array(ecc, EDMA_DMAQNUM, (channel >> 3), ~(0x7 << bit),
+			  eventq_no << bit);
+}
+
+static int edma_alloc_channel(struct edma_chan *echan,
+			      enum dma_event_q eventq_no)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	/* ensure access through shadow region 0 */
+	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+
+	/* ensure no events are pending */
+	edma_stop(echan);
+
+	edma_setup_interrupt(echan, true);
+
+	edma_assign_channel_eventq(echan, eventq_no);
+
+	return 0;
+}
+
+static void edma_free_channel(struct edma_chan *echan)
+{
+	/* ensure no events are pending */
+	edma_stop(echan);
+	/* REVISIT should probably take out of shadow region 0 */
+	edma_setup_interrupt(echan, false);
+}
+
+static inline struct edma_cc *to_edma_cc(struct dma_device *d)
+{
+	return container_of(d, struct edma_cc, dma_slave);
+}
+
+static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct edma_chan, vchan.chan);
+}
+
+static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct edma_desc, vdesc.tx);
+}
+
+static void edma_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct edma_desc, vdesc));
+}
+
+/* Dispatch a queued descriptor to the controller (caller holds lock) */
+static void edma_execute(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	struct virt_dma_desc *vdesc;
+	struct edma_desc *edesc;
+	struct device *dev = echan->vchan.chan.device->dev;
+	int i, j, left, nslots;
+
+	if (!echan->edesc) {
+		/* Setup is needed for the first transfer */
+		vdesc = vchan_next_desc(&echan->vchan);
+		if (!vdesc)
+			return;
+		list_del(&vdesc->node);
+		echan->edesc = to_edma_desc(&vdesc->tx);
+	}
+
+	edesc = echan->edesc;
+
+	/* Find out how many left */
+	left = edesc->pset_nr - edesc->processed;
+	nslots = min(MAX_NR_SG, left);
+	edesc->sg_len = 0;
+
+	/* Write descriptor PaRAM set(s) */
+	for (i = 0; i < nslots; i++) {
+		j = i + edesc->processed;
+		edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param);
+		edesc->sg_len += edesc->pset[j].len;
+		dev_vdbg(dev,
+			 "\n pset[%d]:\n"
+			 "  chnum\t%d\n"
+			 "  slot\t%d\n"
+			 "  opt\t%08x\n"
+			 "  src\t%08x\n"
+			 "  dst\t%08x\n"
+			 "  abcnt\t%08x\n"
+			 "  ccnt\t%08x\n"
+			 "  bidx\t%08x\n"
+			 "  cidx\t%08x\n"
+			 "  lkrld\t%08x\n",
+			 j, echan->ch_num, echan->slot[i],
+			 edesc->pset[j].param.opt,
+			 edesc->pset[j].param.src,
+			 edesc->pset[j].param.dst,
+			 edesc->pset[j].param.a_b_cnt,
+			 edesc->pset[j].param.ccnt,
+			 edesc->pset[j].param.src_dst_bidx,
+			 edesc->pset[j].param.src_dst_cidx,
+			 edesc->pset[j].param.link_bcntrld);
+		/* Link to the previous slot if not the last set */
+		if (i != (nslots - 1))
+			edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
+	}
+
+	edesc->processed += nslots;
+
+	/*
+	 * If this is either the last set in a set of SG-list transactions
+	 * then setup a link to the dummy slot, this results in all future
+	 * events being absorbed and that's OK because we're done
+	 */
+	if (edesc->processed == edesc->pset_nr) {
+		if (edesc->cyclic)
+			edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]);
+		else
+			edma_link(ecc, echan->slot[nslots - 1],
+				  echan->ecc->dummy_slot);
+	}
+
+	if (echan->missed) {
+		/*
+		 * This happens due to setup times between intermediate
+		 * transfers in long SG lists which have to be broken up into
+		 * transfers of MAX_NR_SG
+		 */
+		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
+		edma_clean_channel(echan);
+		edma_stop(echan);
+		edma_start(echan);
+		edma_trigger_channel(echan);
+		echan->missed = 0;
+	} else if (edesc->processed <= MAX_NR_SG) {
+		dev_dbg(dev, "first transfer starting on channel %d\n",
+			echan->ch_num);
+		edma_start(echan);
+	} else {
+		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
+			echan->ch_num, edesc->processed);
+		edma_resume(echan);
+	}
+}
+
+static int edma_terminate_all(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after edma_dma() returns (even if it does, it will see
+	 * echan->edesc is NULL and exit.)
+	 */
+	if (echan->edesc) {
+		edma_stop(echan);
+		/* Move the cyclic channel back to default queue */
+		if (!echan->tc && echan->edesc->cyclic)
+			edma_assign_channel_eventq(echan, EVENTQ_DEFAULT);
+
+		vchan_terminate_vdesc(&echan->edesc->vdesc);
+		echan->edesc = NULL;
+	}
+
+	vchan_get_all_descriptors(&echan->vchan, &head);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&echan->vchan, &head);
+
+	return 0;
+}
+
+static void edma_synchronize(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	vchan_synchronize(&echan->vchan);
+}
+
+static int edma_slave_config(struct dma_chan *chan,
+	struct dma_slave_config *cfg)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	if (cfg->src_maxburst > chan->device->max_burst ||
+	    cfg->dst_maxburst > chan->device->max_burst)
+		return -EINVAL;
+
+	memcpy(&echan->cfg, cfg, sizeof(echan->cfg));
+
+	return 0;
+}
+
+static int edma_dma_pause(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	if (!echan->edesc)
+		return -EINVAL;
+
+	edma_pause(echan);
+	return 0;
+}
+
+static int edma_dma_resume(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+
+	edma_resume(echan);
+	return 0;
+}
+
+/*
+ * A PaRAM set configuration abstraction used by other modes
+ * @chan: Channel who's PaRAM set we're configuring
+ * @pset: PaRAM set to initialize and setup.
+ * @src_addr: Source address of the DMA
+ * @dst_addr: Destination address of the DMA
+ * @burst: In units of dev_width, how much to send
+ * @dev_width: How much is the dev_width
+ * @dma_length: Total length of the DMA transfer
+ * @direction: Direction of the transfer
+ */
+static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
+			    dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+			    unsigned int acnt, unsigned int dma_length,
+			    enum dma_transfer_direction direction)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edmacc_param *param = &epset->param;
+	int bcnt, ccnt, cidx;
+	int src_bidx, dst_bidx, src_cidx, dst_cidx;
+	int absync;
+
+	/* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
+	if (!burst)
+		burst = 1;
+	/*
+	 * If the maxburst is equal to the fifo width, use
+	 * A-synced transfers. This allows for large contiguous
+	 * buffer transfers using only one PaRAM set.
+	 */
+	if (burst == 1) {
+		/*
+		 * For the A-sync case, bcnt and ccnt are the remainder
+		 * and quotient respectively of the division of:
+		 * (dma_length / acnt) by (SZ_64K -1). This is so
+		 * that in case bcnt over flows, we have ccnt to use.
+		 * Note: In A-sync tranfer only, bcntrld is used, but it
+		 * only applies for sg_dma_len(sg) >= SZ_64K.
+		 * In this case, the best way adopted is- bccnt for the
+		 * first frame will be the remainder below. Then for
+		 * every successive frame, bcnt will be SZ_64K-1. This
+		 * is assured as bcntrld = 0xffff in end of function.
+		 */
+		absync = false;
+		ccnt = dma_length / acnt / (SZ_64K - 1);
+		bcnt = dma_length / acnt - ccnt * (SZ_64K - 1);
+		/*
+		 * If bcnt is non-zero, we have a remainder and hence an
+		 * extra frame to transfer, so increment ccnt.
+		 */
+		if (bcnt)
+			ccnt++;
+		else
+			bcnt = SZ_64K - 1;
+		cidx = acnt;
+	} else {
+		/*
+		 * If maxburst is greater than the fifo address_width,
+		 * use AB-synced transfers where A count is the fifo
+		 * address_width and B count is the maxburst. In this
+		 * case, we are limited to transfers of C count frames
+		 * of (address_width * maxburst) where C count is limited
+		 * to SZ_64K-1. This places an upper bound on the length
+		 * of an SG segment that can be handled.
+		 */
+		absync = true;
+		bcnt = burst;
+		ccnt = dma_length / (acnt * bcnt);
+		if (ccnt > (SZ_64K - 1)) {
+			dev_err(dev, "Exceeded max SG segment size\n");
+			return -EINVAL;
+		}
+		cidx = acnt * bcnt;
+	}
+
+	epset->len = dma_length;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		src_bidx = acnt;
+		src_cidx = cidx;
+		dst_bidx = 0;
+		dst_cidx = 0;
+		epset->addr = src_addr;
+	} else if (direction == DMA_DEV_TO_MEM)  {
+		src_bidx = 0;
+		src_cidx = 0;
+		dst_bidx = acnt;
+		dst_cidx = cidx;
+		epset->addr = dst_addr;
+	} else if (direction == DMA_MEM_TO_MEM)  {
+		src_bidx = acnt;
+		src_cidx = cidx;
+		dst_bidx = acnt;
+		dst_cidx = cidx;
+	} else {
+		dev_err(dev, "%s: direction not implemented yet\n", __func__);
+		return -EINVAL;
+	}
+
+	param->opt = EDMA_TCC(EDMA_CHAN_SLOT(echan->ch_num));
+	/* Configure A or AB synchronized transfers */
+	if (absync)
+		param->opt |= SYNCDIM;
+
+	param->src = src_addr;
+	param->dst = dst_addr;
+
+	param->src_dst_bidx = (dst_bidx << 16) | src_bidx;
+	param->src_dst_cidx = (dst_cidx << 16) | src_cidx;
+
+	param->a_b_cnt = bcnt << 16 | acnt;
+	param->ccnt = ccnt;
+	/*
+	 * Only time when (bcntrld) auto reload is required is for
+	 * A-sync case, and in this case, a requirement of reload value
+	 * of SZ_64K-1 only is assured. 'link' is initially set to NULL
+	 * and then later will be populated by edma_execute.
+	 */
+	param->link_bcntrld = 0xffffffff;
+	return absync;
+}
+
+static struct dma_async_tx_descriptor *edma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags, void *context)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	dma_addr_t src_addr = 0, dst_addr = 0;
+	enum dma_slave_buswidth dev_width;
+	u32 burst;
+	struct scatterlist *sg;
+	int i, nslots, ret;
+
+	if (unlikely(!echan || !sgl || !sg_len))
+		return NULL;
+
+	if (direction == DMA_DEV_TO_MEM) {
+		src_addr = echan->cfg.src_addr;
+		dev_width = echan->cfg.src_addr_width;
+		burst = echan->cfg.src_maxburst;
+	} else if (direction == DMA_MEM_TO_DEV) {
+		dst_addr = echan->cfg.dst_addr;
+		dev_width = echan->cfg.dst_addr_width;
+		burst = echan->cfg.dst_maxburst;
+	} else {
+		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
+		return NULL;
+	}
+
+	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
+		return NULL;
+	}
+
+	edesc = kzalloc(struct_size(edesc, pset, sg_len), GFP_ATOMIC);
+	if (!edesc)
+		return NULL;
+
+	edesc->pset_nr = sg_len;
+	edesc->residue = 0;
+	edesc->direction = direction;
+	edesc->echan = echan;
+
+	/* Allocate a PaRAM slot, if needed */
+	nslots = min_t(unsigned, MAX_NR_SG, sg_len);
+
+	for (i = 0; i < nslots; i++) {
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				kfree(edesc);
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
+				return NULL;
+			}
+		}
+	}
+
+	/* Configure PaRAM sets for each SG */
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* Get address for each SG */
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr = sg_dma_address(sg);
+		else
+			src_addr = sg_dma_address(sg);
+
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width,
+				       sg_dma_len(sg), direction);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
+		}
+
+		edesc->absync = ret;
+		edesc->residue += sg_dma_len(sg);
+
+		if (i == sg_len - 1)
+			/* Enable completion interrupt */
+			edesc->pset[i].param.opt |= TCINTEN;
+		else if (!((i+1) % MAX_NR_SG))
+			/*
+			 * Enable early completion interrupt for the
+			 * intermediateset. In this case the driver will be
+			 * notified when the paRAM set is submitted to TC. This
+			 * will allow more time to set up the next set of slots.
+			 */
+			edesc->pset[i].param.opt |= (TCINTEN | TCCMODE);
+	}
+	edesc->residue_stat = edesc->residue;
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	int ret, nslots;
+	struct edma_desc *edesc;
+	struct device *dev = chan->device->dev;
+	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned int width, pset_len, array_size;
+
+	if (unlikely(!echan || !len))
+		return NULL;
+
+	/* Align the array size (acnt block) with the transfer properties */
+	switch (__ffs((src | dest | len))) {
+	case 0:
+		array_size = SZ_32K - 1;
+		break;
+	case 1:
+		array_size = SZ_32K - 2;
+		break;
+	default:
+		array_size = SZ_32K - 4;
+		break;
+	}
+
+	if (len < SZ_64K) {
+		/*
+		 * Transfer size less than 64K can be handled with one paRAM
+		 * slot and with one burst.
+		 * ACNT = length
+		 */
+		width = len;
+		pset_len = len;
+		nslots = 1;
+	} else {
+		/*
+		 * Transfer size bigger than 64K will be handled with maximum of
+		 * two paRAM slots.
+		 * slot1: (full_length / 32767) times 32767 bytes bursts.
+		 *	  ACNT = 32767, length1: (full_length / 32767) * 32767
+		 * slot2: the remaining amount of data after slot1.
+		 *	  ACNT = full_length - length1, length2 = ACNT
+		 *
+		 * When the full_length is multibple of 32767 one slot can be
+		 * used to complete the transfer.
+		 */
+		width = array_size;
+		pset_len = rounddown(len, width);
+		/* One slot is enough for lengths multiple of (SZ_32K -1) */
+		if (unlikely(pset_len == len))
+			nslots = 1;
+		else
+			nslots = 2;
+	}
+
+	edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
+	if (!edesc)
+		return NULL;
+
+	edesc->pset_nr = nslots;
+	edesc->residue = edesc->residue_stat = len;
+	edesc->direction = DMA_MEM_TO_MEM;
+	edesc->echan = echan;
+
+	ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
+			       width, pset_len, DMA_MEM_TO_MEM);
+	if (ret < 0) {
+		kfree(edesc);
+		return NULL;
+	}
+
+	edesc->absync = ret;
+
+	edesc->pset[0].param.opt |= ITCCHEN;
+	if (nslots == 1) {
+		/* Enable transfer complete interrupt */
+		edesc->pset[0].param.opt |= TCINTEN;
+	} else {
+		/* Enable transfer complete chaining for the first slot */
+		edesc->pset[0].param.opt |= TCCHEN;
+
+		if (echan->slot[1] < 0) {
+			echan->slot[1] = edma_alloc_slot(echan->ecc,
+							 EDMA_SLOT_ANY);
+			if (echan->slot[1] < 0) {
+				kfree(edesc);
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
+				return NULL;
+			}
+		}
+		dest += pset_len;
+		src += pset_len;
+		pset_len = width = len % array_size;
+
+		ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1,
+				       width, pset_len, DMA_MEM_TO_MEM);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
+		}
+
+		edesc->pset[1].param.opt |= ITCCHEN;
+		edesc->pset[1].param.opt |= TCINTEN;
+	}
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long tx_flags)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = chan->device->dev;
+	struct edma_desc *edesc;
+	dma_addr_t src_addr, dst_addr;
+	enum dma_slave_buswidth dev_width;
+	bool use_intermediate = false;
+	u32 burst;
+	int i, ret, nslots;
+
+	if (unlikely(!echan || !buf_len || !period_len))
+		return NULL;
+
+	if (direction == DMA_DEV_TO_MEM) {
+		src_addr = echan->cfg.src_addr;
+		dst_addr = buf_addr;
+		dev_width = echan->cfg.src_addr_width;
+		burst = echan->cfg.src_maxburst;
+	} else if (direction == DMA_MEM_TO_DEV) {
+		src_addr = buf_addr;
+		dst_addr = echan->cfg.dst_addr;
+		dev_width = echan->cfg.dst_addr_width;
+		burst = echan->cfg.dst_maxburst;
+	} else {
+		dev_err(dev, "%s: bad direction: %d\n", __func__, direction);
+		return NULL;
+	}
+
+	if (dev_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) {
+		dev_err(dev, "%s: Undefined slave buswidth\n", __func__);
+		return NULL;
+	}
+
+	if (unlikely(buf_len % period_len)) {
+		dev_err(dev, "Period should be multiple of Buffer length\n");
+		return NULL;
+	}
+
+	nslots = (buf_len / period_len) + 1;
+
+	/*
+	 * Cyclic DMA users such as audio cannot tolerate delays introduced
+	 * by cases where the number of periods is more than the maximum
+	 * number of SGs the EDMA driver can handle at a time. For DMA types
+	 * such as Slave SGs, such delays are tolerable and synchronized,
+	 * but the synchronization is difficult to achieve with Cyclic and
+	 * cannot be guaranteed, so we error out early.
+	 */
+	if (nslots > MAX_NR_SG) {
+		/*
+		 * If the burst and period sizes are the same, we can put
+		 * the full buffer into a single period and activate
+		 * intermediate interrupts. This will produce interrupts
+		 * after each burst, which is also after each desired period.
+		 */
+		if (burst == period_len) {
+			period_len = buf_len;
+			nslots = 2;
+			use_intermediate = true;
+		} else {
+			return NULL;
+		}
+	}
+
+	edesc = kzalloc(struct_size(edesc, pset, nslots), GFP_ATOMIC);
+	if (!edesc)
+		return NULL;
+
+	edesc->cyclic = 1;
+	edesc->pset_nr = nslots;
+	edesc->residue = edesc->residue_stat = buf_len;
+	edesc->direction = direction;
+	edesc->echan = echan;
+
+	dev_dbg(dev, "%s: channel=%d nslots=%d period_len=%zu buf_len=%zu\n",
+		__func__, echan->ch_num, nslots, period_len, buf_len);
+
+	for (i = 0; i < nslots; i++) {
+		/* Allocate a PaRAM slot, if needed */
+		if (echan->slot[i] < 0) {
+			echan->slot[i] =
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
+			if (echan->slot[i] < 0) {
+				kfree(edesc);
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
+				return NULL;
+			}
+		}
+
+		if (i == nslots - 1) {
+			memcpy(&edesc->pset[i], &edesc->pset[0],
+			       sizeof(edesc->pset[0]));
+			break;
+		}
+
+		ret = edma_config_pset(chan, &edesc->pset[i], src_addr,
+				       dst_addr, burst, dev_width, period_len,
+				       direction);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
+		}
+
+		if (direction == DMA_DEV_TO_MEM)
+			dst_addr += period_len;
+		else
+			src_addr += period_len;
+
+		dev_vdbg(dev, "%s: Configure period %d of buf:\n", __func__, i);
+		dev_vdbg(dev,
+			"\n pset[%d]:\n"
+			"  chnum\t%d\n"
+			"  slot\t%d\n"
+			"  opt\t%08x\n"
+			"  src\t%08x\n"
+			"  dst\t%08x\n"
+			"  abcnt\t%08x\n"
+			"  ccnt\t%08x\n"
+			"  bidx\t%08x\n"
+			"  cidx\t%08x\n"
+			"  lkrld\t%08x\n",
+			i, echan->ch_num, echan->slot[i],
+			edesc->pset[i].param.opt,
+			edesc->pset[i].param.src,
+			edesc->pset[i].param.dst,
+			edesc->pset[i].param.a_b_cnt,
+			edesc->pset[i].param.ccnt,
+			edesc->pset[i].param.src_dst_bidx,
+			edesc->pset[i].param.src_dst_cidx,
+			edesc->pset[i].param.link_bcntrld);
+
+		edesc->absync = ret;
+
+		/*
+		 * Enable period interrupt only if it is requested
+		 */
+		if (tx_flags & DMA_PREP_INTERRUPT) {
+			edesc->pset[i].param.opt |= TCINTEN;
+
+			/* Also enable intermediate interrupts if necessary */
+			if (use_intermediate)
+				edesc->pset[i].param.opt |= ITCINTEN;
+		}
+	}
+
+	/* Place the cyclic channel to highest priority queue */
+	if (!echan->tc)
+		edma_assign_channel_eventq(echan, EVENTQ_0);
+
+	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
+}
+
+static void edma_completion_handler(struct edma_chan *echan)
+{
+	struct device *dev = echan->vchan.chan.device->dev;
+	struct edma_desc *edesc;
+
+	spin_lock(&echan->vchan.lock);
+	edesc = echan->edesc;
+	if (edesc) {
+		if (edesc->cyclic) {
+			vchan_cyclic_callback(&edesc->vdesc);
+			spin_unlock(&echan->vchan.lock);
+			return;
+		} else if (edesc->processed == edesc->pset_nr) {
+			edesc->residue = 0;
+			edma_stop(echan);
+			vchan_cookie_complete(&edesc->vdesc);
+			echan->edesc = NULL;
+
+			dev_dbg(dev, "Transfer completed on channel %d\n",
+				echan->ch_num);
+		} else {
+			dev_dbg(dev, "Sub transfer completed on channel %d\n",
+				echan->ch_num);
+
+			edma_pause(echan);
+
+			/* Update statistics for tx_status */
+			edesc->residue -= edesc->sg_len;
+			edesc->residue_stat = edesc->residue;
+			edesc->processed_stat = edesc->processed;
+		}
+		edma_execute(echan);
+	}
+
+	spin_unlock(&echan->vchan.lock);
+}
+
+/* eDMA interrupt handler */
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int ctlr;
+	u32 sh_ier;
+	u32 sh_ipr;
+	u32 bank;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_vdbg(ecc->dev, "dma_irq_handler\n");
+
+	sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
+	if (!sh_ipr) {
+		sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1);
+		if (!sh_ipr)
+			return IRQ_NONE;
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1);
+		bank = 1;
+	} else {
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0);
+		bank = 0;
+	}
+
+	do {
+		u32 slot;
+		u32 channel;
+
+		slot = __ffs(sh_ipr);
+		sh_ipr &= ~(BIT(slot));
+
+		if (sh_ier & BIT(slot)) {
+			channel = (bank << 5) | slot;
+			/* Clear the corresponding IPR bits */
+			edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot));
+			edma_completion_handler(&ecc->slave_chans[channel]);
+		}
+	} while (sh_ipr);
+
+	edma_shadow0_write(ecc, SH_IEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+static void edma_error_handler(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = echan->vchan.chan.device->dev;
+	struct edmacc_param p;
+	int err;
+
+	if (!echan->edesc)
+		return;
+
+	spin_lock(&echan->vchan.lock);
+
+	err = edma_read_slot(ecc, echan->slot[0], &p);
+
+	/*
+	 * Issue later based on missed flag which will be sure
+	 * to happen as:
+	 * (1) we finished transmitting an intermediate slot and
+	 *     edma_execute is coming up.
+	 * (2) or we finished current transfer and issue will
+	 *     call edma_execute.
+	 *
+	 * Important note: issuing can be dangerous here and
+	 * lead to some nasty recursion when we are in a NULL
+	 * slot. So we avoid doing so and set the missed flag.
+	 */
+	if (err || (p.a_b_cnt == 0 && p.ccnt == 0)) {
+		dev_dbg(dev, "Error on null slot, setting miss\n");
+		echan->missed = 1;
+	} else {
+		/*
+		 * The slot is already programmed but the event got
+		 * missed, so its safe to issue it here.
+		 */
+		dev_dbg(dev, "Missed event, TRIGGERING\n");
+		edma_clean_channel(echan);
+		edma_stop(echan);
+		edma_start(echan);
+		edma_trigger_channel(echan);
+	}
+	spin_unlock(&echan->vchan.lock);
+}
+
+static inline bool edma_error_pending(struct edma_cc *ecc)
+{
+	if (edma_read_array(ecc, EDMA_EMR, 0) ||
+	    edma_read_array(ecc, EDMA_EMR, 1) ||
+	    edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR))
+		return true;
+
+	return false;
+}
+
+/* eDMA error interrupt handler */
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int i, j;
+	int ctlr;
+	unsigned int cnt = 0;
+	unsigned int val;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_vdbg(ecc->dev, "dma_ccerr_handler\n");
+
+	if (!edma_error_pending(ecc)) {
+		/*
+		 * The registers indicate no pending error event but the irq
+		 * handler has been called.
+		 * Ask eDMA to re-evaluate the error registers.
+		 */
+		dev_err(ecc->dev, "%s: Error interrupt without error event!\n",
+			__func__);
+		edma_write(ecc, EDMA_EEVAL, 1);
+		return IRQ_NONE;
+	}
+
+	while (1) {
+		/* Event missed register(s) */
+		for (j = 0; j < 2; j++) {
+			unsigned long emr;
+
+			val = edma_read_array(ecc, EDMA_EMR, j);
+			if (!val)
+				continue;
+
+			dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
+			emr = val;
+			for (i = find_next_bit(&emr, 32, 0); i < 32;
+			     i = find_next_bit(&emr, 32, i + 1)) {
+				int k = (j << 5) + i;
+
+				/* Clear the corresponding EMR bits */
+				edma_write_array(ecc, EDMA_EMCR, j, BIT(i));
+				/* Clear any SER */
+				edma_shadow0_write_array(ecc, SH_SECR, j,
+							 BIT(i));
+				edma_error_handler(&ecc->slave_chans[k]);
+			}
+		}
+
+		val = edma_read(ecc, EDMA_QEMR);
+		if (val) {
+			dev_dbg(ecc->dev, "QEMR 0x%02x\n", val);
+			/* Not reported, just clear the interrupt reason. */
+			edma_write(ecc, EDMA_QEMCR, val);
+			edma_shadow0_write(ecc, SH_QSECR, val);
+		}
+
+		val = edma_read(ecc, EDMA_CCERR);
+		if (val) {
+			dev_warn(ecc->dev, "CCERR 0x%08x\n", val);
+			/* Not reported, just clear the interrupt reason. */
+			edma_write(ecc, EDMA_CCERRCLR, val);
+		}
+
+		if (!edma_error_pending(ecc))
+			break;
+		cnt++;
+		if (cnt > 10)
+			break;
+	}
+	edma_write(ecc, EDMA_EEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+/* Alloc channel resources */
+static int edma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = ecc->dev;
+	enum dma_event_q eventq_no = EVENTQ_DEFAULT;
+	int ret;
+
+	if (echan->tc) {
+		eventq_no = echan->tc->id;
+	} else if (ecc->tc_list) {
+		/* memcpy channel */
+		echan->tc = &ecc->tc_list[ecc->info->default_queue];
+		eventq_no = echan->tc->id;
+	}
+
+	ret = edma_alloc_channel(echan, eventq_no);
+	if (ret)
+		return ret;
+
+	echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num);
+	if (echan->slot[0] < 0) {
+		dev_err(dev, "Entry slot allocation failed for channel %u\n",
+			EDMA_CHAN_SLOT(echan->ch_num));
+		ret = echan->slot[0];
+		goto err_slot;
+	}
+
+	/* Set up channel -> slot mapping for the entry slot */
+	edma_set_chmap(echan, echan->slot[0]);
+	echan->alloced = true;
+
+	dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id,
+		echan->hw_triggered ? "HW" : "SW");
+
+	return 0;
+
+err_slot:
+	edma_free_channel(echan);
+	return ret;
+}
+
+/* Free channel resources */
+static void edma_free_chan_resources(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct device *dev = echan->ecc->dev;
+	int i;
+
+	/* Terminate transfers */
+	edma_stop(echan);
+
+	vchan_free_chan_resources(&echan->vchan);
+
+	/* Free EDMA PaRAM slots */
+	for (i = 0; i < EDMA_MAX_SLOTS; i++) {
+		if (echan->slot[i] >= 0) {
+			edma_free_slot(echan->ecc, echan->slot[i]);
+			echan->slot[i] = -1;
+		}
+	}
+
+	/* Set entry slot to the dummy slot */
+	edma_set_chmap(echan, echan->ecc->dummy_slot);
+
+	/* Free EDMA channel */
+	if (echan->alloced) {
+		edma_free_channel(echan);
+		echan->alloced = false;
+	}
+
+	echan->tc = NULL;
+	echan->hw_triggered = false;
+
+	dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id);
+}
+
+/* Send pending descriptor to hardware */
+static void edma_issue_pending(struct dma_chan *chan)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	if (vchan_issue_pending(&echan->vchan) && !echan->edesc)
+		edma_execute(echan);
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+}
+
+/*
+ * This limit exists to avoid a possible infinite loop when waiting for proof
+ * that a particular transfer is completed. This limit can be hit if there
+ * are large bursts to/from slow devices or the CPU is never able to catch
+ * the DMA hardware idle. On an AM335x transfering 48 bytes from the UART
+ * RX-FIFO, as many as 55 loops have been seen.
+ */
+#define EDMA_MAX_TR_WAIT_LOOPS 1000
+
+static u32 edma_residue(struct edma_desc *edesc)
+{
+	bool dst = edesc->direction == DMA_DEV_TO_MEM;
+	int loop_count = EDMA_MAX_TR_WAIT_LOOPS;
+	struct edma_chan *echan = edesc->echan;
+	struct edma_pset *pset = edesc->pset;
+	dma_addr_t done, pos;
+	int i;
+
+	/*
+	 * We always read the dst/src position from the first RamPar
+	 * pset. That's the one which is active now.
+	 */
+	pos = edma_get_position(echan->ecc, echan->slot[0], dst);
+
+	/*
+	 * "pos" may represent a transfer request that is still being
+	 * processed by the EDMACC or EDMATC. We will busy wait until
+	 * any one of the situations occurs:
+	 *   1. the DMA hardware is idle
+	 *   2. a new transfer request is setup
+	 *   3. we hit the loop limit
+	 */
+	while (edma_read(echan->ecc, EDMA_CCSTAT) & EDMA_CCSTAT_ACTV) {
+		/* check if a new transfer request is setup */
+		if (edma_get_position(echan->ecc,
+				      echan->slot[0], dst) != pos) {
+			break;
+		}
+
+		if (!--loop_count) {
+			dev_dbg_ratelimited(echan->vchan.chan.device->dev,
+				"%s: timeout waiting for PaRAM update\n",
+				__func__);
+			break;
+		}
+
+		cpu_relax();
+	}
+
+	/*
+	 * Cyclic is simple. Just subtract pset[0].addr from pos.
+	 *
+	 * We never update edesc->residue in the cyclic case, so we
+	 * can tell the remaining room to the end of the circular
+	 * buffer.
+	 */
+	if (edesc->cyclic) {
+		done = pos - pset->addr;
+		edesc->residue_stat = edesc->residue - done;
+		return edesc->residue_stat;
+	}
+
+	/*
+	 * For SG operation we catch up with the last processed
+	 * status.
+	 */
+	pset += edesc->processed_stat;
+
+	for (i = edesc->processed_stat; i < edesc->processed; i++, pset++) {
+		/*
+		 * If we are inside this pset address range, we know
+		 * this is the active one. Get the current delta and
+		 * stop walking the psets.
+		 */
+		if (pos >= pset->addr && pos < pset->addr + pset->len)
+			return edesc->residue_stat - (pos - pset->addr);
+
+		/* Otherwise mark it done and update residue_stat. */
+		edesc->processed_stat++;
+		edesc->residue_stat -= pset->len;
+	}
+	return edesc->residue_stat;
+}
+
+/* Check request completion status */
+static enum dma_status edma_tx_status(struct dma_chan *chan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct edma_chan *echan = to_edma_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&echan->vchan.lock, flags);
+	if (echan->edesc && echan->edesc->vdesc.tx.cookie == cookie)
+		txstate->residue = edma_residue(echan->edesc);
+	else if ((vdesc = vchan_find_desc(&echan->vchan, cookie)))
+		txstate->residue = to_edma_desc(&vdesc->tx)->residue;
+	spin_unlock_irqrestore(&echan->vchan.lock, flags);
+
+	return ret;
+}
+
+static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels)
+{
+	if (!memcpy_channels)
+		return false;
+	while (*memcpy_channels != -1) {
+		if (*memcpy_channels == ch_num)
+			return true;
+		memcpy_channels++;
+	}
+	return false;
+}
+
+#define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
+{
+	struct dma_device *s_ddev = &ecc->dma_slave;
+	struct dma_device *m_ddev = NULL;
+	s32 *memcpy_channels = ecc->info->memcpy_channels;
+	int i, j;
+
+	dma_cap_zero(s_ddev->cap_mask);
+	dma_cap_set(DMA_SLAVE, s_ddev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask);
+	if (ecc->legacy_mode && !memcpy_channels) {
+		dev_warn(ecc->dev,
+			 "Legacy memcpy is enabled, things might not work\n");
+
+		dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask);
+		s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		s_ddev->directions = BIT(DMA_MEM_TO_MEM);
+	}
+
+	s_ddev->device_prep_slave_sg = edma_prep_slave_sg;
+	s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+	s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+	s_ddev->device_free_chan_resources = edma_free_chan_resources;
+	s_ddev->device_issue_pending = edma_issue_pending;
+	s_ddev->device_tx_status = edma_tx_status;
+	s_ddev->device_config = edma_slave_config;
+	s_ddev->device_pause = edma_dma_pause;
+	s_ddev->device_resume = edma_dma_resume;
+	s_ddev->device_terminate_all = edma_terminate_all;
+	s_ddev->device_synchronize = edma_synchronize;
+
+	s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
+	s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	s_ddev->max_burst = SZ_32K - 1; /* CIDX: 16bit signed */
+
+	s_ddev->dev = ecc->dev;
+	INIT_LIST_HEAD(&s_ddev->channels);
+
+	if (memcpy_channels) {
+		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		if (!m_ddev) {
+			dev_warn(ecc->dev, "memcpy is disabled due to OoM\n");
+			memcpy_channels = NULL;
+			goto ch_setup;
+		}
+		ecc->dma_memcpy = m_ddev;
+
+		dma_cap_zero(m_ddev->cap_mask);
+		dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask);
+
+		m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+		m_ddev->device_free_chan_resources = edma_free_chan_resources;
+		m_ddev->device_issue_pending = edma_issue_pending;
+		m_ddev->device_tx_status = edma_tx_status;
+		m_ddev->device_config = edma_slave_config;
+		m_ddev->device_pause = edma_dma_pause;
+		m_ddev->device_resume = edma_dma_resume;
+		m_ddev->device_terminate_all = edma_terminate_all;
+		m_ddev->device_synchronize = edma_synchronize;
+
+		m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->directions = BIT(DMA_MEM_TO_MEM);
+		m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+		m_ddev->dev = ecc->dev;
+		INIT_LIST_HEAD(&m_ddev->channels);
+	} else if (!ecc->legacy_mode) {
+		dev_info(ecc->dev, "memcpy is disabled\n");
+	}
+
+ch_setup:
+	for (i = 0; i < ecc->num_channels; i++) {
+		struct edma_chan *echan = &ecc->slave_chans[i];
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
+		echan->ecc = ecc;
+		echan->vchan.desc_free = edma_desc_free;
+
+		if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels))
+			vchan_init(&echan->vchan, m_ddev);
+		else
+			vchan_init(&echan->vchan, s_ddev);
+
+		INIT_LIST_HEAD(&echan->node);
+		for (j = 0; j < EDMA_MAX_SLOTS; j++)
+			echan->slot[j] = -1;
+	}
+}
+
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+			      struct edma_cc *ecc)
+{
+	int i;
+	u32 value, cccfg;
+	s8 (*queue_priority_map)[2];
+
+	/* Decode the eDMA3 configuration from CCCFG register */
+	cccfg = edma_read(ecc, EDMA_CCCFG);
+
+	value = GET_NUM_REGN(cccfg);
+	ecc->num_region = BIT(value);
+
+	value = GET_NUM_DMACH(cccfg);
+	ecc->num_channels = BIT(value + 1);
+
+	value = GET_NUM_QDMACH(cccfg);
+	ecc->num_qchannels = value * 2;
+
+	value = GET_NUM_PAENTRY(cccfg);
+	ecc->num_slots = BIT(value + 4);
+
+	value = GET_NUM_EVQUE(cccfg);
+	ecc->num_tc = value + 1;
+
+	ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false;
+
+	dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
+	dev_dbg(dev, "num_region: %u\n", ecc->num_region);
+	dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
+	dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels);
+	dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
+	dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+	dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no");
+
+	/* Nothing need to be done if queue priority is provided */
+	if (pdata->queue_priority_mapping)
+		return 0;
+
+	/*
+	 * Configure TC/queue priority as follows:
+	 * Q0 - priority 0
+	 * Q1 - priority 1
+	 * Q2 - priority 2
+	 * ...
+	 * The meaning of priority numbers: 0 highest priority, 7 lowest
+	 * priority. So Q0 is the highest priority queue and the last queue has
+	 * the lowest priority.
+	 */
+	queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8),
+					  GFP_KERNEL);
+	if (!queue_priority_map)
+		return -ENOMEM;
+
+	for (i = 0; i < ecc->num_tc; i++) {
+		queue_priority_map[i][0] = i;
+		queue_priority_map[i][1] = i;
+	}
+	queue_priority_map[i][0] = -1;
+	queue_priority_map[i][1] = -1;
+
+	pdata->queue_priority_mapping = queue_priority_map;
+	/* Default queue has the lowest priority */
+	pdata->default_queue = i - 1;
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
+			       size_t sz)
+{
+	const char pname[] = "ti,edma-xbar-event-map";
+	struct resource res;
+	void __iomem *xbar;
+	s16 (*xbar_chans)[2];
+	size_t nelm = sz / sizeof(s16);
+	u32 shift, offset, mux;
+	int ret, i;
+
+	xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL);
+	if (!xbar_chans)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(dev->of_node, 1, &res);
+	if (ret)
+		return -ENOMEM;
+
+	xbar = devm_ioremap(dev, res.start, resource_size(&res));
+	if (!xbar)
+		return -ENOMEM;
+
+	ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans,
+					 nelm);
+	if (ret)
+		return -EIO;
+
+	/* Invalidate last entry for the other user of this mess */
+	nelm >>= 1;
+	xbar_chans[nelm][0] = -1;
+	xbar_chans[nelm][1] = -1;
+
+	for (i = 0; i < nelm; i++) {
+		shift = (xbar_chans[i][1] & 0x03) << 3;
+		offset = xbar_chans[i][1] & 0xfffffffc;
+		mux = readl(xbar + offset);
+		mux &= ~(0xff << shift);
+		mux |= xbar_chans[i][0] << shift;
+		writel(mux, (xbar + offset));
+	}
+
+	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+	return 0;
+}
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
+{
+	struct edma_soc_info *info;
+	struct property *prop;
+	int sz, ret;
+
+	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	if (legacy_mode) {
+		prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map",
+					&sz);
+		if (prop) {
+			ret = edma_xbar_event_map(dev, info, sz);
+			if (ret)
+				return ERR_PTR(ret);
+		}
+		return info;
+	}
+
+	/* Get the list of channels allocated to be used for memcpy */
+	prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
+	if (prop) {
+		const char pname[] = "ti,edma-memcpy-channels";
+		size_t nelm = sz / sizeof(s32);
+		s32 *memcpy_ch;
+
+		memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32),
+					 GFP_KERNEL);
+		if (!memcpy_ch)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u32_array(dev->of_node, pname,
+						 (u32 *)memcpy_ch, nelm);
+		if (ret)
+			return ERR_PTR(ret);
+
+		memcpy_ch[nelm] = -1;
+		info->memcpy_channels = memcpy_ch;
+	}
+
+	prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges",
+				&sz);
+	if (prop) {
+		const char pname[] = "ti,edma-reserved-slot-ranges";
+		u32 (*tmp)[2];
+		s16 (*rsv_slots)[2];
+		size_t nelm = sz / sizeof(*tmp);
+		struct edma_rsv_info *rsv_info;
+		int i;
+
+		if (!nelm)
+			return info;
+
+		tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL);
+		if (!tmp)
+			return ERR_PTR(-ENOMEM);
+
+		rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
+		if (!rsv_info) {
+			kfree(tmp);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
+					 GFP_KERNEL);
+		if (!rsv_slots) {
+			kfree(tmp);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = of_property_read_u32_array(dev->of_node, pname,
+						 (u32 *)tmp, nelm * 2);
+		if (ret) {
+			kfree(tmp);
+			return ERR_PTR(ret);
+		}
+
+		for (i = 0; i < nelm; i++) {
+			rsv_slots[i][0] = tmp[i][0];
+			rsv_slots[i][1] = tmp[i][1];
+		}
+		rsv_slots[nelm][0] = -1;
+		rsv_slots[nelm][1] = -1;
+
+		info->rsv = rsv_info;
+		info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
+
+		kfree(tmp);
+	}
+
+	return info;
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct edma_cc *ecc = ofdma->of_dma_data;
+	struct dma_chan *chan = NULL;
+	struct edma_chan *echan;
+	int i;
+
+	if (!ecc || dma_spec->args_count < 1)
+		return NULL;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		echan = &ecc->slave_chans[i];
+		if (echan->ch_num == dma_spec->args[0]) {
+			chan = &echan->vchan.chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return NULL;
+
+	if (echan->ecc->legacy_mode && dma_spec->args_count == 1)
+		goto out;
+
+	if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 &&
+	    dma_spec->args[1] < echan->ecc->num_tc) {
+		echan->tc = &echan->ecc->tc_list[dma_spec->args[1]];
+		goto out;
+	}
+
+	return NULL;
+out:
+	/* The channel is going to be used as HW synchronized */
+	echan->hw_triggered = true;
+	return dma_get_slave_channel(chan);
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	return NULL;
+}
+#endif
+
+static int edma_probe(struct platform_device *pdev)
+{
+	struct edma_soc_info	*info = pdev->dev.platform_data;
+	s8			(*queue_priority_mapping)[2];
+	int			i, off, ln;
+	const s16		(*rsv_slots)[2];
+	const s16		(*xbar_chans)[2];
+	int			irq;
+	char			*irq_name;
+	struct resource		*mem;
+	struct device_node	*node = pdev->dev.of_node;
+	struct device		*dev = &pdev->dev;
+	struct edma_cc		*ecc;
+	bool			legacy_mode = true;
+	int ret;
+
+	if (node) {
+		const struct of_device_id *match;
+
+		match = of_match_node(edma_of_ids, node);
+		if (match && (*(u32 *)match->data) == EDMA_BINDING_TPCC)
+			legacy_mode = false;
+
+		info = edma_setup_info_from_dt(dev, legacy_mode);
+		if (IS_ERR(info)) {
+			dev_err(dev, "failed to get DT data\n");
+			return PTR_ERR(info);
+		}
+	}
+
+	if (!info)
+		return -ENODEV;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
+	if (!ecc)
+		return -ENOMEM;
+
+	ecc->dev = dev;
+	ecc->id = pdev->id;
+	ecc->legacy_mode = legacy_mode;
+	/* When booting with DT the pdev->id is -1 */
+	if (ecc->id < 0)
+		ecc->id = 0;
+
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
+	if (!mem) {
+		dev_dbg(dev, "mem resource not found, using index 0\n");
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!mem) {
+			dev_err(dev, "no mem resource?\n");
+			return -ENODEV;
+		}
+	}
+	ecc->base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ecc->base))
+		return PTR_ERR(ecc->base);
+
+	platform_set_drvdata(pdev, ecc);
+
+	/* Get eDMA3 configuration from IP */
+	ret = edma_setup_from_hw(dev, info, ecc);
+	if (ret)
+		return ret;
+
+	/* Allocate memory based on the information we got from the IP */
+	ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
+					sizeof(*ecc->slave_chans), GFP_KERNEL);
+	if (!ecc->slave_chans)
+		return -ENOMEM;
+
+	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
+				       sizeof(unsigned long), GFP_KERNEL);
+	if (!ecc->slot_inuse)
+		return -ENOMEM;
+
+	ecc->default_queue = info->default_queue;
+
+	for (i = 0; i < ecc->num_slots; i++)
+		edma_write_slot(ecc, i, &dummy_paramset);
+
+	if (info->rsv) {
+		/* Set the reserved slots in inuse list */
+		rsv_slots = info->rsv->rsv_slots;
+		if (rsv_slots) {
+			for (i = 0; rsv_slots[i][0] != -1; i++) {
+				off = rsv_slots[i][0];
+				ln = rsv_slots[i][1];
+				edma_set_bits(off, ln, ecc->slot_inuse);
+			}
+		}
+	}
+
+	/* Clear the xbar mapped channels in unused list */
+	xbar_chans = info->xbar_chans;
+	if (xbar_chans) {
+		for (i = 0; xbar_chans[i][1] != -1; i++) {
+			off = xbar_chans[i][1];
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 0);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+		ecc->ccint = irq;
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 2);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+		ecc->ccerrint = irq;
+	}
+
+	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
+	if (ecc->dummy_slot < 0) {
+		dev_err(dev, "Can't allocate PaRAM dummy slot\n");
+		return ecc->dummy_slot;
+	}
+
+	queue_priority_mapping = info->queue_priority_mapping;
+
+	if (!ecc->legacy_mode) {
+		int lowest_priority = 0;
+		struct of_phandle_args tc_args;
+
+		ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
+					    sizeof(*ecc->tc_list), GFP_KERNEL);
+		if (!ecc->tc_list)
+			return -ENOMEM;
+
+		for (i = 0;; i++) {
+			ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
+							       1, i, &tc_args);
+			if (ret || i == ecc->num_tc)
+				break;
+
+			ecc->tc_list[i].node = tc_args.np;
+			ecc->tc_list[i].id = i;
+			queue_priority_mapping[i][1] = tc_args.args[0];
+			if (queue_priority_mapping[i][1] > lowest_priority) {
+				lowest_priority = queue_priority_mapping[i][1];
+				info->default_queue = i;
+			}
+		}
+	}
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
+
+	for (i = 0; i < ecc->num_region; i++) {
+		edma_write_array2(ecc, EDMA_DRAE, i, 0, 0x0);
+		edma_write_array2(ecc, EDMA_DRAE, i, 1, 0x0);
+		edma_write_array(ecc, EDMA_QRAE, i, 0x0);
+	}
+	ecc->info = info;
+
+	/* Init the dma device and channels */
+	edma_dma_init(ecc, legacy_mode);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		/* Assign all channels to the default queue */
+		edma_assign_channel_eventq(&ecc->slave_chans[i],
+					   info->default_queue);
+		/* Set entry slot to the dummy slot */
+		edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot);
+	}
+
+	ecc->dma_slave.filter.map = info->slave_map;
+	ecc->dma_slave.filter.mapcnt = info->slavecnt;
+	ecc->dma_slave.filter.fn = edma_filter_fn;
+
+	ret = dma_async_device_register(&ecc->dma_slave);
+	if (ret) {
+		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
+		goto err_reg1;
+	}
+
+	if (ecc->dma_memcpy) {
+		ret = dma_async_device_register(ecc->dma_memcpy);
+		if (ret) {
+			dev_err(dev, "memcpy ddev registration failed (%d)\n",
+				ret);
+			dma_async_device_unregister(&ecc->dma_slave);
+			goto err_reg1;
+		}
+	}
+
+	if (node)
+		of_dma_controller_register(node, of_edma_xlate, ecc);
+
+	dev_info(dev, "TI EDMA DMA engine driver\n");
+
+	return 0;
+
+err_reg1:
+	edma_free_slot(ecc, ecc->dummy_slot);
+	return ret;
+}
+
+static void edma_cleanupp_vchan(struct dma_device *dmadev)
+{
+	struct edma_chan *echan, *_echan;
+
+	list_for_each_entry_safe(echan, _echan,
+			&dmadev->channels, vchan.chan.device_node) {
+		list_del(&echan->vchan.chan.device_node);
+		tasklet_kill(&echan->vchan.task);
+	}
+}
+
+static int edma_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+
+	devm_free_irq(dev, ecc->ccint, ecc);
+	devm_free_irq(dev, ecc->ccerrint, ecc);
+
+	edma_cleanupp_vchan(&ecc->dma_slave);
+
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
+	dma_async_device_unregister(&ecc->dma_slave);
+	if (ecc->dma_memcpy)
+		dma_async_device_unregister(ecc->dma_memcpy);
+	edma_free_slot(ecc, ecc->dummy_slot);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int edma_pm_suspend(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced)
+			edma_setup_interrupt(&echan[i], false);
+	}
+
+	return 0;
+}
+
+static int edma_pm_resume(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+	s8 (*queue_priority_mapping)[2];
+
+	/* re initialize dummy slot to dummy param set */
+	edma_write_slot(ecc, ecc->dummy_slot, &dummy_paramset);
+
+	queue_priority_mapping = ecc->info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced) {
+			/* ensure access through shadow region 0 */
+			edma_or_array2(ecc, EDMA_DRAE, 0, i >> 5,
+				       BIT(i & 0x1f));
+
+			edma_setup_interrupt(&echan[i], true);
+
+			/* Set up channel -> slot mapping for the entry slot */
+			edma_set_chmap(&echan[i], echan[i].slot[0]);
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops edma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume)
+};
+
+static struct platform_driver edma_driver = {
+	.probe		= edma_probe,
+	.remove		= edma_remove,
+	.driver = {
+		.name	= "edma",
+		.pm	= &edma_pm_ops,
+		.of_match_table = edma_of_ids,
+	},
+};
+
+static int edma_tptc_probe(struct platform_device *pdev)
+{
+	pm_runtime_enable(&pdev->dev);
+	return pm_runtime_get_sync(&pdev->dev);
+}
+
+static struct platform_driver edma_tptc_driver = {
+	.probe		= edma_tptc_probe,
+	.driver = {
+		.name	= "edma3-tptc",
+		.of_match_table = edma_tptc_of_ids,
+	},
+};
+
+bool edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	bool match = false;
+
+	if (chan->device->dev->driver == &edma_driver.driver) {
+		struct edma_chan *echan = to_edma_chan(chan);
+		unsigned ch_req = *(unsigned *)param;
+		if (ch_req == echan->ch_num) {
+			/* The channel is going to be used as HW synchronized */
+			echan->hw_triggered = true;
+			match = true;
+		}
+	}
+	return match;
+}
+EXPORT_SYMBOL(edma_filter_fn);
+
+static int edma_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&edma_tptc_driver);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&edma_driver);
+}
+subsys_initcall(edma_init);
+
+static void __exit edma_exit(void)
+{
+	platform_driver_unregister(&edma_driver);
+	platform_driver_unregister(&edma_tptc_driver);
+}
+module_exit(edma_exit);
+
+MODULE_AUTHOR("Matt Porter <matt.porter@linaro.org>");
+MODULE_DESCRIPTION("TI EDMA DMA engine driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
new file mode 100644
index 0000000..a4a931d
--- /dev/null
+++ b/drivers/dma/ti/omap-dma.c
@@ -0,0 +1,1672 @@
+/*
+ * OMAP DMAengine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/omap-dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/of_device.h>
+
+#include "../virt-dma.h"
+
+#define OMAP_SDMA_REQUESTS	127
+#define OMAP_SDMA_CHANNELS	32
+
+struct omap_dmadev {
+	struct dma_device ddev;
+	spinlock_t lock;
+	void __iomem *base;
+	const struct omap_dma_reg *reg_map;
+	struct omap_system_dma_plat_info *plat;
+	bool legacy;
+	bool ll123_supported;
+	struct dma_pool *desc_pool;
+	unsigned dma_requests;
+	spinlock_t irq_lock;
+	uint32_t irq_enable_mask;
+	struct omap_chan **lch_map;
+};
+
+struct omap_chan {
+	struct virt_dma_chan vc;
+	void __iomem *channel_base;
+	const struct omap_dma_reg *reg_map;
+	uint32_t ccr;
+
+	struct dma_slave_config	cfg;
+	unsigned dma_sig;
+	bool cyclic;
+	bool paused;
+	bool running;
+
+	int dma_ch;
+	struct omap_desc *desc;
+	unsigned sgidx;
+};
+
+#define DESC_NXT_SV_REFRESH	(0x1 << 24)
+#define DESC_NXT_SV_REUSE	(0x2 << 24)
+#define DESC_NXT_DV_REFRESH	(0x1 << 26)
+#define DESC_NXT_DV_REUSE	(0x2 << 26)
+#define DESC_NTYPE_TYPE2	(0x2 << 29)
+
+/* Type 2 descriptor with Source or Destination address update */
+struct omap_type2_desc {
+	uint32_t next_desc;
+	uint32_t en;
+	uint32_t addr; /* src or dst */
+	uint16_t fn;
+	uint16_t cicr;
+	int16_t cdei;
+	int16_t csei;
+	int32_t cdfi;
+	int32_t csfi;
+} __packed;
+
+struct omap_sg {
+	dma_addr_t addr;
+	uint32_t en;		/* number of elements (24-bit) */
+	uint32_t fn;		/* number of frames (16-bit) */
+	int32_t fi;		/* for double indexing */
+	int16_t ei;		/* for double indexing */
+
+	/* Linked list */
+	struct omap_type2_desc *t2_desc;
+	dma_addr_t t2_desc_paddr;
+};
+
+struct omap_desc {
+	struct virt_dma_desc vd;
+	bool using_ll;
+	enum dma_transfer_direction dir;
+	dma_addr_t dev_addr;
+
+	int32_t fi;		/* for OMAP_DMA_SYNC_PACKET / double indexing */
+	int16_t ei;		/* for double indexing */
+	uint8_t es;		/* CSDP_DATA_TYPE_xxx */
+	uint32_t ccr;		/* CCR value */
+	uint16_t clnk_ctrl;	/* CLNK_CTRL value */
+	uint16_t cicr;		/* CICR value */
+	uint32_t csdp;		/* CSDP value */
+
+	unsigned sglen;
+	struct omap_sg sg[0];
+};
+
+enum {
+	CAPS_0_SUPPORT_LL123	= BIT(20),	/* Linked List type1/2/3 */
+	CAPS_0_SUPPORT_LL4	= BIT(21),	/* Linked List type4 */
+
+	CCR_FS			= BIT(5),
+	CCR_READ_PRIORITY	= BIT(6),
+	CCR_ENABLE		= BIT(7),
+	CCR_AUTO_INIT		= BIT(8),	/* OMAP1 only */
+	CCR_REPEAT		= BIT(9),	/* OMAP1 only */
+	CCR_OMAP31_DISABLE	= BIT(10),	/* OMAP1 only */
+	CCR_SUSPEND_SENSITIVE	= BIT(8),	/* OMAP2+ only */
+	CCR_RD_ACTIVE		= BIT(9),	/* OMAP2+ only */
+	CCR_WR_ACTIVE		= BIT(10),	/* OMAP2+ only */
+	CCR_SRC_AMODE_CONSTANT	= 0 << 12,
+	CCR_SRC_AMODE_POSTINC	= 1 << 12,
+	CCR_SRC_AMODE_SGLIDX	= 2 << 12,
+	CCR_SRC_AMODE_DBLIDX	= 3 << 12,
+	CCR_DST_AMODE_CONSTANT	= 0 << 14,
+	CCR_DST_AMODE_POSTINC	= 1 << 14,
+	CCR_DST_AMODE_SGLIDX	= 2 << 14,
+	CCR_DST_AMODE_DBLIDX	= 3 << 14,
+	CCR_CONSTANT_FILL	= BIT(16),
+	CCR_TRANSPARENT_COPY	= BIT(17),
+	CCR_BS			= BIT(18),
+	CCR_SUPERVISOR		= BIT(22),
+	CCR_PREFETCH		= BIT(23),
+	CCR_TRIGGER_SRC		= BIT(24),
+	CCR_BUFFERING_DISABLE	= BIT(25),
+	CCR_WRITE_PRIORITY	= BIT(26),
+	CCR_SYNC_ELEMENT	= 0,
+	CCR_SYNC_FRAME		= CCR_FS,
+	CCR_SYNC_BLOCK		= CCR_BS,
+	CCR_SYNC_PACKET		= CCR_BS | CCR_FS,
+
+	CSDP_DATA_TYPE_8	= 0,
+	CSDP_DATA_TYPE_16	= 1,
+	CSDP_DATA_TYPE_32	= 2,
+	CSDP_SRC_PORT_EMIFF	= 0 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_EMIFS	= 1 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_OCP_T1	= 2 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_TIPB	= 3 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_OCP_T2	= 4 << 2, /* OMAP1 only */
+	CSDP_SRC_PORT_MPUI	= 5 << 2, /* OMAP1 only */
+	CSDP_SRC_PACKED		= BIT(6),
+	CSDP_SRC_BURST_1	= 0 << 7,
+	CSDP_SRC_BURST_16	= 1 << 7,
+	CSDP_SRC_BURST_32	= 2 << 7,
+	CSDP_SRC_BURST_64	= 3 << 7,
+	CSDP_DST_PORT_EMIFF	= 0 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_EMIFS	= 1 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_OCP_T1	= 2 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_TIPB	= 3 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_OCP_T2	= 4 << 9, /* OMAP1 only */
+	CSDP_DST_PORT_MPUI	= 5 << 9, /* OMAP1 only */
+	CSDP_DST_PACKED		= BIT(13),
+	CSDP_DST_BURST_1	= 0 << 14,
+	CSDP_DST_BURST_16	= 1 << 14,
+	CSDP_DST_BURST_32	= 2 << 14,
+	CSDP_DST_BURST_64	= 3 << 14,
+	CSDP_WRITE_NON_POSTED	= 0 << 16,
+	CSDP_WRITE_POSTED	= 1 << 16,
+	CSDP_WRITE_LAST_NON_POSTED = 2 << 16,
+
+	CICR_TOUT_IE		= BIT(0),	/* OMAP1 only */
+	CICR_DROP_IE		= BIT(1),
+	CICR_HALF_IE		= BIT(2),
+	CICR_FRAME_IE		= BIT(3),
+	CICR_LAST_IE		= BIT(4),
+	CICR_BLOCK_IE		= BIT(5),
+	CICR_PKT_IE		= BIT(7),	/* OMAP2+ only */
+	CICR_TRANS_ERR_IE	= BIT(8),	/* OMAP2+ only */
+	CICR_SUPERVISOR_ERR_IE	= BIT(10),	/* OMAP2+ only */
+	CICR_MISALIGNED_ERR_IE	= BIT(11),	/* OMAP2+ only */
+	CICR_DRAIN_IE		= BIT(12),	/* OMAP2+ only */
+	CICR_SUPER_BLOCK_IE	= BIT(14),	/* OMAP2+ only */
+
+	CLNK_CTRL_ENABLE_LNK	= BIT(15),
+
+	CDP_DST_VALID_INC	= 0 << 0,
+	CDP_DST_VALID_RELOAD	= 1 << 0,
+	CDP_DST_VALID_REUSE	= 2 << 0,
+	CDP_SRC_VALID_INC	= 0 << 2,
+	CDP_SRC_VALID_RELOAD	= 1 << 2,
+	CDP_SRC_VALID_REUSE	= 2 << 2,
+	CDP_NTYPE_TYPE1		= 1 << 4,
+	CDP_NTYPE_TYPE2		= 2 << 4,
+	CDP_NTYPE_TYPE3		= 3 << 4,
+	CDP_TMODE_NORMAL	= 0 << 8,
+	CDP_TMODE_LLIST		= 1 << 8,
+	CDP_FAST		= BIT(10),
+};
+
+static const unsigned es_bytes[] = {
+	[CSDP_DATA_TYPE_8] = 1,
+	[CSDP_DATA_TYPE_16] = 2,
+	[CSDP_DATA_TYPE_32] = 4,
+};
+
+static struct of_dma_filter_info omap_dma_info = {
+	.filter_fn = omap_dma_filter_fn,
+};
+
+static inline struct omap_dmadev *to_omap_dma_dev(struct dma_device *d)
+{
+	return container_of(d, struct omap_dmadev, ddev);
+}
+
+static inline struct omap_chan *to_omap_dma_chan(struct dma_chan *c)
+{
+	return container_of(c, struct omap_chan, vc.chan);
+}
+
+static inline struct omap_desc *to_omap_dma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct omap_desc, vd.tx);
+}
+
+static void omap_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct omap_desc *d = to_omap_dma_desc(&vd->tx);
+
+	if (d->using_ll) {
+		struct omap_dmadev *od = to_omap_dma_dev(vd->tx.chan->device);
+		int i;
+
+		for (i = 0; i < d->sglen; i++) {
+			if (d->sg[i].t2_desc)
+				dma_pool_free(od->desc_pool, d->sg[i].t2_desc,
+					      d->sg[i].t2_desc_paddr);
+		}
+	}
+
+	kfree(d);
+}
+
+static void omap_dma_fill_type2_desc(struct omap_desc *d, int idx,
+				     enum dma_transfer_direction dir, bool last)
+{
+	struct omap_sg *sg = &d->sg[idx];
+	struct omap_type2_desc *t2_desc = sg->t2_desc;
+
+	if (idx)
+		d->sg[idx - 1].t2_desc->next_desc = sg->t2_desc_paddr;
+	if (last)
+		t2_desc->next_desc = 0xfffffffc;
+
+	t2_desc->en = sg->en;
+	t2_desc->addr = sg->addr;
+	t2_desc->fn = sg->fn & 0xffff;
+	t2_desc->cicr = d->cicr;
+	if (!last)
+		t2_desc->cicr &= ~CICR_BLOCK_IE;
+
+	switch (dir) {
+	case DMA_DEV_TO_MEM:
+		t2_desc->cdei = sg->ei;
+		t2_desc->csei = d->ei;
+		t2_desc->cdfi = sg->fi;
+		t2_desc->csfi = d->fi;
+
+		t2_desc->en |= DESC_NXT_DV_REFRESH;
+		t2_desc->en |= DESC_NXT_SV_REUSE;
+		break;
+	case DMA_MEM_TO_DEV:
+		t2_desc->cdei = d->ei;
+		t2_desc->csei = sg->ei;
+		t2_desc->cdfi = d->fi;
+		t2_desc->csfi = sg->fi;
+
+		t2_desc->en |= DESC_NXT_SV_REFRESH;
+		t2_desc->en |= DESC_NXT_DV_REUSE;
+		break;
+	default:
+		return;
+	}
+
+	t2_desc->en |= DESC_NTYPE_TYPE2;
+}
+
+static void omap_dma_write(uint32_t val, unsigned type, void __iomem *addr)
+{
+	switch (type) {
+	case OMAP_DMA_REG_16BIT:
+		writew_relaxed(val, addr);
+		break;
+	case OMAP_DMA_REG_2X16BIT:
+		writew_relaxed(val, addr);
+		writew_relaxed(val >> 16, addr + 2);
+		break;
+	case OMAP_DMA_REG_32BIT:
+		writel_relaxed(val, addr);
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
+static unsigned omap_dma_read(unsigned type, void __iomem *addr)
+{
+	unsigned val;
+
+	switch (type) {
+	case OMAP_DMA_REG_16BIT:
+		val = readw_relaxed(addr);
+		break;
+	case OMAP_DMA_REG_2X16BIT:
+		val = readw_relaxed(addr);
+		val |= readw_relaxed(addr + 2) << 16;
+		break;
+	case OMAP_DMA_REG_32BIT:
+		val = readl_relaxed(addr);
+		break;
+	default:
+		WARN_ON(1);
+		val = 0;
+	}
+
+	return val;
+}
+
+static void omap_dma_glbl_write(struct omap_dmadev *od, unsigned reg, unsigned val)
+{
+	const struct omap_dma_reg *r = od->reg_map + reg;
+
+	WARN_ON(r->stride);
+
+	omap_dma_write(val, r->type, od->base + r->offset);
+}
+
+static unsigned omap_dma_glbl_read(struct omap_dmadev *od, unsigned reg)
+{
+	const struct omap_dma_reg *r = od->reg_map + reg;
+
+	WARN_ON(r->stride);
+
+	return omap_dma_read(r->type, od->base + r->offset);
+}
+
+static void omap_dma_chan_write(struct omap_chan *c, unsigned reg, unsigned val)
+{
+	const struct omap_dma_reg *r = c->reg_map + reg;
+
+	omap_dma_write(val, r->type, c->channel_base + r->offset);
+}
+
+static unsigned omap_dma_chan_read(struct omap_chan *c, unsigned reg)
+{
+	const struct omap_dma_reg *r = c->reg_map + reg;
+
+	return omap_dma_read(r->type, c->channel_base + r->offset);
+}
+
+static void omap_dma_clear_csr(struct omap_chan *c)
+{
+	if (dma_omap1())
+		omap_dma_chan_read(c, CSR);
+	else
+		omap_dma_chan_write(c, CSR, ~0);
+}
+
+static unsigned omap_dma_get_csr(struct omap_chan *c)
+{
+	unsigned val = omap_dma_chan_read(c, CSR);
+
+	if (!dma_omap1())
+		omap_dma_chan_write(c, CSR, val);
+
+	return val;
+}
+
+static void omap_dma_assign(struct omap_dmadev *od, struct omap_chan *c,
+	unsigned lch)
+{
+	c->channel_base = od->base + od->plat->channel_stride * lch;
+
+	od->lch_map[lch] = c;
+}
+
+static void omap_dma_start(struct omap_chan *c, struct omap_desc *d)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	uint16_t cicr = d->cicr;
+
+	if (__dma_omap15xx(od->plat->dma_attr))
+		omap_dma_chan_write(c, CPC, 0);
+	else
+		omap_dma_chan_write(c, CDAC, 0);
+
+	omap_dma_clear_csr(c);
+
+	if (d->using_ll) {
+		uint32_t cdp = CDP_TMODE_LLIST | CDP_NTYPE_TYPE2 | CDP_FAST;
+
+		if (d->dir == DMA_DEV_TO_MEM)
+			cdp |= (CDP_DST_VALID_RELOAD | CDP_SRC_VALID_REUSE);
+		else
+			cdp |= (CDP_DST_VALID_REUSE | CDP_SRC_VALID_RELOAD);
+		omap_dma_chan_write(c, CDP, cdp);
+
+		omap_dma_chan_write(c, CNDP, d->sg[0].t2_desc_paddr);
+		omap_dma_chan_write(c, CCDN, 0);
+		omap_dma_chan_write(c, CCFN, 0xffff);
+		omap_dma_chan_write(c, CCEN, 0xffffff);
+
+		cicr &= ~CICR_BLOCK_IE;
+	} else if (od->ll123_supported) {
+		omap_dma_chan_write(c, CDP, 0);
+	}
+
+	/* Enable interrupts */
+	omap_dma_chan_write(c, CICR, cicr);
+
+	/* Enable channel */
+	omap_dma_chan_write(c, CCR, d->ccr | CCR_ENABLE);
+
+	c->running = true;
+}
+
+static void omap_dma_drain_chan(struct omap_chan *c)
+{
+	int i;
+	u32 val;
+
+	/* Wait for sDMA FIFO to drain */
+	for (i = 0; ; i++) {
+		val = omap_dma_chan_read(c, CCR);
+		if (!(val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE)))
+			break;
+
+		if (i > 100)
+			break;
+
+		udelay(5);
+	}
+
+	if (val & (CCR_RD_ACTIVE | CCR_WR_ACTIVE))
+		dev_err(c->vc.chan.device->dev,
+			"DMA drain did not complete on lch %d\n",
+			c->dma_ch);
+}
+
+static int omap_dma_stop(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	uint32_t val;
+
+	/* disable irq */
+	omap_dma_chan_write(c, CICR, 0);
+
+	omap_dma_clear_csr(c);
+
+	val = omap_dma_chan_read(c, CCR);
+	if (od->plat->errata & DMA_ERRATA_i541 && val & CCR_TRIGGER_SRC) {
+		uint32_t sysconfig;
+
+		sysconfig = omap_dma_glbl_read(od, OCP_SYSCONFIG);
+		val = sysconfig & ~DMA_SYSCONFIG_MIDLEMODE_MASK;
+		val |= DMA_SYSCONFIG_MIDLEMODE(DMA_IDLEMODE_NO_IDLE);
+		omap_dma_glbl_write(od, OCP_SYSCONFIG, val);
+
+		val = omap_dma_chan_read(c, CCR);
+		val &= ~CCR_ENABLE;
+		omap_dma_chan_write(c, CCR, val);
+
+		if (!(c->ccr & CCR_BUFFERING_DISABLE))
+			omap_dma_drain_chan(c);
+
+		omap_dma_glbl_write(od, OCP_SYSCONFIG, sysconfig);
+	} else {
+		if (!(val & CCR_ENABLE))
+			return -EINVAL;
+
+		val &= ~CCR_ENABLE;
+		omap_dma_chan_write(c, CCR, val);
+
+		if (!(c->ccr & CCR_BUFFERING_DISABLE))
+			omap_dma_drain_chan(c);
+	}
+
+	mb();
+
+	if (!__dma_omap15xx(od->plat->dma_attr) && c->cyclic) {
+		val = omap_dma_chan_read(c, CLNK_CTRL);
+
+		if (dma_omap1())
+			val |= 1 << 14; /* set the STOP_LNK bit */
+		else
+			val &= ~CLNK_CTRL_ENABLE_LNK;
+
+		omap_dma_chan_write(c, CLNK_CTRL, val);
+	}
+	c->running = false;
+	return 0;
+}
+
+static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d)
+{
+	struct omap_sg *sg = d->sg + c->sgidx;
+	unsigned cxsa, cxei, cxfi;
+
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
+		cxsa = CDSA;
+		cxei = CDEI;
+		cxfi = CDFI;
+	} else {
+		cxsa = CSSA;
+		cxei = CSEI;
+		cxfi = CSFI;
+	}
+
+	omap_dma_chan_write(c, cxsa, sg->addr);
+	omap_dma_chan_write(c, cxei, sg->ei);
+	omap_dma_chan_write(c, cxfi, sg->fi);
+	omap_dma_chan_write(c, CEN, sg->en);
+	omap_dma_chan_write(c, CFN, sg->fn);
+
+	omap_dma_start(c, d);
+	c->sgidx++;
+}
+
+static void omap_dma_start_desc(struct omap_chan *c)
+{
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+	struct omap_desc *d;
+	unsigned cxsa, cxei, cxfi;
+
+	if (!vd) {
+		c->desc = NULL;
+		return;
+	}
+
+	list_del(&vd->node);
+
+	c->desc = d = to_omap_dma_desc(&vd->tx);
+	c->sgidx = 0;
+
+	/*
+	 * This provides the necessary barrier to ensure data held in
+	 * DMA coherent memory is visible to the DMA engine prior to
+	 * the transfer starting.
+	 */
+	mb();
+
+	omap_dma_chan_write(c, CCR, d->ccr);
+	if (dma_omap1())
+		omap_dma_chan_write(c, CCR2, d->ccr >> 16);
+
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
+		cxsa = CSSA;
+		cxei = CSEI;
+		cxfi = CSFI;
+	} else {
+		cxsa = CDSA;
+		cxei = CDEI;
+		cxfi = CDFI;
+	}
+
+	omap_dma_chan_write(c, cxsa, d->dev_addr);
+	omap_dma_chan_write(c, cxei, d->ei);
+	omap_dma_chan_write(c, cxfi, d->fi);
+	omap_dma_chan_write(c, CSDP, d->csdp);
+	omap_dma_chan_write(c, CLNK_CTRL, d->clnk_ctrl);
+
+	omap_dma_start_sg(c, d);
+}
+
+static void omap_dma_callback(int ch, u16 status, void *data)
+{
+	struct omap_chan *c = data;
+	struct omap_desc *d;
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	d = c->desc;
+	if (d) {
+		if (c->cyclic) {
+			vchan_cyclic_callback(&d->vd);
+		} else if (d->using_ll || c->sgidx == d->sglen) {
+			omap_dma_start_desc(c);
+			vchan_cookie_complete(&d->vd);
+		} else {
+			omap_dma_start_sg(c, d);
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static irqreturn_t omap_dma_irq(int irq, void *devid)
+{
+	struct omap_dmadev *od = devid;
+	unsigned status, channel;
+
+	spin_lock(&od->irq_lock);
+
+	status = omap_dma_glbl_read(od, IRQSTATUS_L1);
+	status &= od->irq_enable_mask;
+	if (status == 0) {
+		spin_unlock(&od->irq_lock);
+		return IRQ_NONE;
+	}
+
+	while ((channel = ffs(status)) != 0) {
+		unsigned mask, csr;
+		struct omap_chan *c;
+
+		channel -= 1;
+		mask = BIT(channel);
+		status &= ~mask;
+
+		c = od->lch_map[channel];
+		if (c == NULL) {
+			/* This should never happen */
+			dev_err(od->ddev.dev, "invalid channel %u\n", channel);
+			continue;
+		}
+
+		csr = omap_dma_get_csr(c);
+		omap_dma_glbl_write(od, IRQSTATUS_L1, mask);
+
+		omap_dma_callback(channel, csr, c);
+	}
+
+	spin_unlock(&od->irq_lock);
+
+	return IRQ_HANDLED;
+}
+
+static int omap_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct device *dev = od->ddev.dev;
+	int ret;
+
+	if (od->legacy) {
+		ret = omap_request_dma(c->dma_sig, "DMA engine",
+				       omap_dma_callback, c, &c->dma_ch);
+	} else {
+		ret = omap_request_dma(c->dma_sig, "DMA engine", NULL, NULL,
+				       &c->dma_ch);
+	}
+
+	dev_dbg(dev, "allocating channel %u for %u\n", c->dma_ch, c->dma_sig);
+
+	if (ret >= 0) {
+		omap_dma_assign(od, c, c->dma_ch);
+
+		if (!od->legacy) {
+			unsigned val;
+
+			spin_lock_irq(&od->irq_lock);
+			val = BIT(c->dma_ch);
+			omap_dma_glbl_write(od, IRQSTATUS_L1, val);
+			od->irq_enable_mask |= val;
+			omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+
+			val = omap_dma_glbl_read(od, IRQENABLE_L0);
+			val &= ~BIT(c->dma_ch);
+			omap_dma_glbl_write(od, IRQENABLE_L0, val);
+			spin_unlock_irq(&od->irq_lock);
+		}
+	}
+
+	if (dma_omap1()) {
+		if (__dma_omap16xx(od->plat->dma_attr)) {
+			c->ccr = CCR_OMAP31_DISABLE;
+			/* Duplicate what plat-omap/dma.c does */
+			c->ccr |= c->dma_ch + 1;
+		} else {
+			c->ccr = c->dma_sig & 0x1f;
+		}
+	} else {
+		c->ccr = c->dma_sig & 0x1f;
+		c->ccr |= (c->dma_sig & ~0x1f) << 14;
+	}
+	if (od->plat->errata & DMA_ERRATA_IFRAME_BUFFERING)
+		c->ccr |= CCR_BUFFERING_DISABLE;
+
+	return ret;
+}
+
+static void omap_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	struct omap_chan *c = to_omap_dma_chan(chan);
+
+	if (!od->legacy) {
+		spin_lock_irq(&od->irq_lock);
+		od->irq_enable_mask &= ~BIT(c->dma_ch);
+		omap_dma_glbl_write(od, IRQENABLE_L1, od->irq_enable_mask);
+		spin_unlock_irq(&od->irq_lock);
+	}
+
+	c->channel_base = NULL;
+	od->lch_map[c->dma_ch] = NULL;
+	vchan_free_chan_resources(&c->vc);
+	omap_free_dma(c->dma_ch);
+
+	dev_dbg(od->ddev.dev, "freeing channel %u used for %u\n", c->dma_ch,
+		c->dma_sig);
+	c->dma_sig = 0;
+}
+
+static size_t omap_dma_sg_size(struct omap_sg *sg)
+{
+	return sg->en * sg->fn;
+}
+
+static size_t omap_dma_desc_size(struct omap_desc *d)
+{
+	unsigned i;
+	size_t size;
+
+	for (size = i = 0; i < d->sglen; i++)
+		size += omap_dma_sg_size(&d->sg[i]);
+
+	return size * es_bytes[d->es];
+}
+
+static size_t omap_dma_desc_size_pos(struct omap_desc *d, dma_addr_t addr)
+{
+	unsigned i;
+	size_t size, es_size = es_bytes[d->es];
+
+	for (size = i = 0; i < d->sglen; i++) {
+		size_t this_size = omap_dma_sg_size(&d->sg[i]) * es_size;
+
+		if (size)
+			size += this_size;
+		else if (addr >= d->sg[i].addr &&
+			 addr < d->sg[i].addr + this_size)
+			size += d->sg[i].addr + this_size - addr;
+	}
+	return size;
+}
+
+/*
+ * OMAP 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is
+ * read before the DMA controller finished disabling the channel.
+ */
+static uint32_t omap_dma_chan_read_3_3(struct omap_chan *c, unsigned reg)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	uint32_t val;
+
+	val = omap_dma_chan_read(c, reg);
+	if (val == 0 && od->plat->errata & DMA_ERRATA_3_3)
+		val = omap_dma_chan_read(c, reg);
+
+	return val;
+}
+
+static dma_addr_t omap_dma_get_src_pos(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	dma_addr_t addr, cdac;
+
+	if (__dma_omap15xx(od->plat->dma_attr)) {
+		addr = omap_dma_chan_read(c, CPC);
+	} else {
+		addr = omap_dma_chan_read_3_3(c, CSAC);
+		cdac = omap_dma_chan_read_3_3(c, CDAC);
+
+		/*
+		 * CDAC == 0 indicates that the DMA transfer on the channel has
+		 * not been started (no data has been transferred so far).
+		 * Return the programmed source start address in this case.
+		 */
+		if (cdac == 0)
+			addr = omap_dma_chan_read(c, CSSA);
+	}
+
+	if (dma_omap1())
+		addr |= omap_dma_chan_read(c, CSSA) & 0xffff0000;
+
+	return addr;
+}
+
+static dma_addr_t omap_dma_get_dst_pos(struct omap_chan *c)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(c->vc.chan.device);
+	dma_addr_t addr;
+
+	if (__dma_omap15xx(od->plat->dma_attr)) {
+		addr = omap_dma_chan_read(c, CPC);
+	} else {
+		addr = omap_dma_chan_read_3_3(c, CDAC);
+
+		/*
+		 * CDAC == 0 indicates that the DMA transfer on the channel
+		 * has not been started (no data has been transferred so
+		 * far).  Return the programmed destination start address in
+		 * this case.
+		 */
+		if (addr == 0)
+			addr = omap_dma_chan_read(c, CDSA);
+	}
+
+	if (dma_omap1())
+		addr |= omap_dma_chan_read(c, CDSA) & 0xffff0000;
+
+	return addr;
+}
+
+static enum dma_status omap_dma_tx_status(struct dma_chan *chan,
+	dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	if (!c->paused && c->running) {
+		uint32_t ccr = omap_dma_chan_read(c, CCR);
+		/*
+		 * The channel is no longer active, set the return value
+		 * accordingly
+		 */
+		if (!(ccr & CCR_ENABLE))
+			ret = DMA_COMPLETE;
+	}
+
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		txstate->residue = omap_dma_desc_size(to_omap_dma_desc(&vd->tx));
+	} else if (c->desc && c->desc->vd.tx.cookie == cookie) {
+		struct omap_desc *d = c->desc;
+		dma_addr_t pos;
+
+		if (d->dir == DMA_MEM_TO_DEV)
+			pos = omap_dma_get_src_pos(c);
+		else if (d->dir == DMA_DEV_TO_MEM  || d->dir == DMA_MEM_TO_MEM)
+			pos = omap_dma_get_dst_pos(c);
+		else
+			pos = 0;
+
+		txstate->residue = omap_dma_desc_size_pos(d, pos);
+	} else {
+		txstate->residue = 0;
+	}
+	if (ret == DMA_IN_PROGRESS && c->paused)
+		ret = DMA_PAUSED;
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	return ret;
+}
+
+static void omap_dma_issue_pending(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	if (vchan_issue_pending(&c->vc) && !c->desc)
+		omap_dma_start_desc(c);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned sglen,
+	enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct scatterlist *sgent;
+	struct omap_desc *d;
+	dma_addr_t dev_addr;
+	unsigned i, es, en, frame_bytes;
+	bool ll_failed = false;
+	u32 burst;
+	u32 port_window, port_window_bytes;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_addr = c->cfg.src_addr;
+		dev_width = c->cfg.src_addr_width;
+		burst = c->cfg.src_maxburst;
+		port_window = c->cfg.src_port_window_size;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_addr = c->cfg.dst_addr;
+		dev_width = c->cfg.dst_addr_width;
+		burst = c->cfg.dst_maxburst;
+		port_window = c->cfg.dst_port_window_size;
+	} else {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	/* Bus width translates to the element size (ES) */
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		es = CSDP_DATA_TYPE_8;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		es = CSDP_DATA_TYPE_16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		es = CSDP_DATA_TYPE_32;
+		break;
+	default: /* not reached */
+		return NULL;
+	}
+
+	/* Now allocate and setup the descriptor. */
+	d = kzalloc(struct_size(d, sg, sglen), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	d->dir = dir;
+	d->dev_addr = dev_addr;
+	d->es = es;
+
+	/* When the port_window is used, one frame must cover the window */
+	if (port_window) {
+		burst = port_window;
+		port_window_bytes = port_window * es_bytes[es];
+
+		d->ei = 1;
+		/*
+		 * One frame covers the port_window and by  configure
+		 * the source frame index to be -1 * (port_window - 1)
+		 * we instruct the sDMA that after a frame is processed
+		 * it should move back to the start of the window.
+		 */
+		d->fi = -(port_window_bytes - 1);
+	}
+
+	d->ccr = c->ccr | CCR_SYNC_FRAME;
+	if (dir == DMA_DEV_TO_MEM) {
+		d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
+
+		d->ccr |= CCR_DST_AMODE_POSTINC;
+		if (port_window) {
+			d->ccr |= CCR_SRC_AMODE_DBLIDX;
+
+			if (port_window_bytes >= 64)
+				d->csdp |= CSDP_SRC_BURST_64;
+			else if (port_window_bytes >= 32)
+				d->csdp |= CSDP_SRC_BURST_32;
+			else if (port_window_bytes >= 16)
+				d->csdp |= CSDP_SRC_BURST_16;
+
+		} else {
+			d->ccr |= CCR_SRC_AMODE_CONSTANT;
+		}
+	} else {
+		d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
+
+		d->ccr |= CCR_SRC_AMODE_POSTINC;
+		if (port_window) {
+			d->ccr |= CCR_DST_AMODE_DBLIDX;
+
+			if (port_window_bytes >= 64)
+				d->csdp |= CSDP_DST_BURST_64;
+			else if (port_window_bytes >= 32)
+				d->csdp |= CSDP_DST_BURST_32;
+			else if (port_window_bytes >= 16)
+				d->csdp |= CSDP_DST_BURST_16;
+		} else {
+			d->ccr |= CCR_DST_AMODE_CONSTANT;
+		}
+	}
+
+	d->cicr = CICR_DROP_IE | CICR_BLOCK_IE;
+	d->csdp |= es;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+
+		if (dir == DMA_DEV_TO_MEM)
+			d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_TIPB;
+		else
+			d->csdp |= CSDP_DST_PORT_TIPB | CSDP_SRC_PORT_EMIFF;
+	} else {
+		if (dir == DMA_DEV_TO_MEM)
+			d->ccr |= CCR_TRIGGER_SRC;
+
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+
+		if (port_window)
+			d->csdp |= CSDP_WRITE_LAST_NON_POSTED;
+	}
+	if (od->plat->errata & DMA_ERRATA_PARALLEL_CHANNELS)
+		d->clnk_ctrl = c->dma_ch;
+
+	/*
+	 * Build our scatterlist entries: each contains the address,
+	 * the number of elements (EN) in each frame, and the number of
+	 * frames (FN).  Number of bytes for this entry = ES * EN * FN.
+	 *
+	 * Burst size translates to number of elements with frame sync.
+	 * Note: DMA engine defines burst to be the number of dev-width
+	 * transfers.
+	 */
+	en = burst;
+	frame_bytes = es_bytes[es] * en;
+
+	if (sglen >= 2)
+		d->using_ll = od->ll123_supported;
+
+	for_each_sg(sgl, sgent, sglen, i) {
+		struct omap_sg *osg = &d->sg[i];
+
+		osg->addr = sg_dma_address(sgent);
+		osg->en = en;
+		osg->fn = sg_dma_len(sgent) / frame_bytes;
+
+		if (d->using_ll) {
+			osg->t2_desc = dma_pool_alloc(od->desc_pool, GFP_ATOMIC,
+						      &osg->t2_desc_paddr);
+			if (!osg->t2_desc) {
+				dev_err(chan->device->dev,
+					"t2_desc[%d] allocation failed\n", i);
+				ll_failed = true;
+				d->using_ll = false;
+				continue;
+			}
+
+			omap_dma_fill_type2_desc(d, i, dir, (i == sglen - 1));
+		}
+	}
+
+	d->sglen = sglen;
+
+	/* Release the dma_pool entries if one allocation failed */
+	if (ll_failed) {
+		for (i = 0; i < d->sglen; i++) {
+			struct omap_sg *osg = &d->sg[i];
+
+			if (osg->t2_desc) {
+				dma_pool_free(od->desc_pool, osg->t2_desc,
+					      osg->t2_desc_paddr);
+				osg->t2_desc = NULL;
+			}
+		}
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	enum dma_slave_buswidth dev_width;
+	struct omap_desc *d;
+	dma_addr_t dev_addr;
+	unsigned es;
+	u32 burst;
+
+	if (dir == DMA_DEV_TO_MEM) {
+		dev_addr = c->cfg.src_addr;
+		dev_width = c->cfg.src_addr_width;
+		burst = c->cfg.src_maxburst;
+	} else if (dir == DMA_MEM_TO_DEV) {
+		dev_addr = c->cfg.dst_addr;
+		dev_width = c->cfg.dst_addr_width;
+		burst = c->cfg.dst_maxburst;
+	} else {
+		dev_err(chan->device->dev, "%s: bad direction?\n", __func__);
+		return NULL;
+	}
+
+	/* Bus width translates to the element size (ES) */
+	switch (dev_width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+		es = CSDP_DATA_TYPE_8;
+		break;
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		es = CSDP_DATA_TYPE_16;
+		break;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		es = CSDP_DATA_TYPE_32;
+		break;
+	default: /* not reached */
+		return NULL;
+	}
+
+	/* Now allocate and setup the descriptor. */
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	d->dir = dir;
+	d->dev_addr = dev_addr;
+	d->fi = burst;
+	d->es = es;
+	d->sg[0].addr = buf_addr;
+	d->sg[0].en = period_len / es_bytes[es];
+	d->sg[0].fn = buf_len / period_len;
+	d->sglen = 1;
+
+	d->ccr = c->ccr;
+	if (dir == DMA_DEV_TO_MEM)
+		d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
+	else
+		d->ccr |= CCR_DST_AMODE_CONSTANT | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE;
+	if (flags & DMA_PREP_INTERRUPT)
+		d->cicr |= CICR_FRAME_IE;
+
+	d->csdp = es;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+
+		if (dir == DMA_DEV_TO_MEM)
+			d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_MPUI;
+		else
+			d->csdp |= CSDP_DST_PORT_MPUI | CSDP_SRC_PORT_EMIFF;
+	} else {
+		if (burst)
+			d->ccr |= CCR_SYNC_PACKET;
+		else
+			d->ccr |= CCR_SYNC_ELEMENT;
+
+		if (dir == DMA_DEV_TO_MEM) {
+			d->ccr |= CCR_TRIGGER_SRC;
+			d->csdp |= CSDP_DST_PACKED;
+		} else {
+			d->csdp |= CSDP_SRC_PACKED;
+		}
+
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	if (__dma_omap15xx(od->plat->dma_attr))
+		d->ccr |= CCR_AUTO_INIT | CCR_REPEAT;
+	else
+		d->clnk_ctrl = c->dma_ch | CLNK_CTRL_ENABLE_LNK;
+
+	c->cyclic = true;
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_desc *d;
+	uint8_t data_type;
+
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	data_type = __ffs((src | dest | len));
+	if (data_type > CSDP_DATA_TYPE_32)
+		data_type = CSDP_DATA_TYPE_32;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->dev_addr = src;
+	d->fi = 0;
+	d->es = data_type;
+	d->sg[0].en = len / BIT(data_type);
+	d->sg[0].fn = 1;
+	d->sg[0].addr = dest;
+	d->sglen = 1;
+	d->ccr = c->ccr;
+	d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE | CICR_FRAME_IE;
+
+	d->csdp = data_type;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+		d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+	} else {
+		d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved(
+	struct dma_chan *chan, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_desc *d;
+	struct omap_sg *sg;
+	uint8_t data_type;
+	size_t src_icg, dst_icg;
+
+	/* Slave mode is not supported */
+	if (is_slave_direction(xt->dir))
+		return NULL;
+
+	if (xt->frame_size != 1 || xt->numf == 0)
+		return NULL;
+
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	data_type = __ffs((xt->src_start | xt->dst_start | xt->sgl[0].size));
+	if (data_type > CSDP_DATA_TYPE_32)
+		data_type = CSDP_DATA_TYPE_32;
+
+	sg = &d->sg[0];
+	d->dir = DMA_MEM_TO_MEM;
+	d->dev_addr = xt->src_start;
+	d->es = data_type;
+	sg->en = xt->sgl[0].size / BIT(data_type);
+	sg->fn = xt->numf;
+	sg->addr = xt->dst_start;
+	d->sglen = 1;
+	d->ccr = c->ccr;
+
+	src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+	dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+	if (src_icg) {
+		d->ccr |= CCR_SRC_AMODE_DBLIDX;
+		d->ei = 1;
+		d->fi = src_icg;
+	} else if (xt->src_inc) {
+		d->ccr |= CCR_SRC_AMODE_POSTINC;
+		d->fi = 0;
+	} else {
+		dev_err(chan->device->dev,
+			"%s: SRC constant addressing is not supported\n",
+			__func__);
+		kfree(d);
+		return NULL;
+	}
+
+	if (dst_icg) {
+		d->ccr |= CCR_DST_AMODE_DBLIDX;
+		sg->ei = 1;
+		sg->fi = dst_icg;
+	} else if (xt->dst_inc) {
+		d->ccr |= CCR_DST_AMODE_POSTINC;
+		sg->fi = 0;
+	} else {
+		dev_err(chan->device->dev,
+			"%s: DST constant addressing is not supported\n",
+			__func__);
+		kfree(d);
+		return NULL;
+	}
+
+	d->cicr = CICR_DROP_IE | CICR_FRAME_IE;
+
+	d->csdp = data_type;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+		d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+	} else {
+		d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, flags);
+}
+
+static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+
+	if (cfg->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    cfg->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	if (cfg->src_maxburst > chan->device->max_burst ||
+	    cfg->dst_maxburst > chan->device->max_burst)
+		return -EINVAL;
+
+	memcpy(&c->cfg, cfg, sizeof(c->cfg));
+
+	return 0;
+}
+
+static int omap_dma_terminate_all(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+
+	/*
+	 * Stop DMA activity: we assume the callback will not be called
+	 * after omap_dma_stop() returns (even if it does, it will see
+	 * c->desc is NULL and exit.)
+	 */
+	if (c->desc) {
+		vchan_terminate_vdesc(&c->desc->vd);
+		c->desc = NULL;
+		/* Avoid stopping the dma twice */
+		if (!c->paused)
+			omap_dma_stop(c);
+	}
+
+	c->cyclic = false;
+	c->paused = false;
+
+	vchan_get_all_descriptors(&c->vc, &head);
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static void omap_dma_synchronize(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+
+	vchan_synchronize(&c->vc);
+}
+
+static int omap_dma_pause(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	unsigned long flags;
+	int ret = -EINVAL;
+	bool can_pause = false;
+
+	spin_lock_irqsave(&od->irq_lock, flags);
+
+	if (!c->desc)
+		goto out;
+
+	if (c->cyclic)
+		can_pause = true;
+
+	/*
+	 * We do not allow DMA_MEM_TO_DEV transfers to be paused.
+	 * From the AM572x TRM, 16.1.4.18 Disabling a Channel During Transfer:
+	 * "When a channel is disabled during a transfer, the channel undergoes
+	 * an abort, unless it is hardware-source-synchronized …".
+	 * A source-synchronised channel is one where the fetching of data is
+	 * under control of the device. In other words, a device-to-memory
+	 * transfer. So, a destination-synchronised channel (which would be a
+	 * memory-to-device transfer) undergoes an abort if the the CCR_ENABLE
+	 * bit is cleared.
+	 * From 16.1.4.20.4.6.2 Abort: "If an abort trigger occurs, the channel
+	 * aborts immediately after completion of current read/write
+	 * transactions and then the FIFO is cleaned up." The term "cleaned up"
+	 * is not defined. TI recommends to check that RD_ACTIVE and WR_ACTIVE
+	 * are both clear _before_ disabling the channel, otherwise data loss
+	 * will occur.
+	 * The problem is that if the channel is active, then device activity
+	 * can result in DMA activity starting between reading those as both
+	 * clear and the write to DMA_CCR to clear the enable bit hitting the
+	 * hardware. If the DMA hardware can't drain the data in its FIFO to the
+	 * destination, then data loss "might" occur (say if we write to an UART
+	 * and the UART is not accepting any further data).
+	 */
+	else if (c->desc->dir == DMA_DEV_TO_MEM)
+		can_pause = true;
+
+	if (can_pause && !c->paused) {
+		ret = omap_dma_stop(c);
+		if (!ret)
+			c->paused = true;
+	}
+out:
+	spin_unlock_irqrestore(&od->irq_lock, flags);
+
+	return ret;
+}
+
+static int omap_dma_resume(struct dma_chan *chan)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	spin_lock_irqsave(&od->irq_lock, flags);
+
+	if (c->paused && c->desc) {
+		mb();
+
+		/* Restore channel link register */
+		omap_dma_chan_write(c, CLNK_CTRL, c->desc->clnk_ctrl);
+
+		omap_dma_start(c, c->desc);
+		c->paused = false;
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&od->irq_lock, flags);
+
+	return ret;
+}
+
+static int omap_dma_chan_init(struct omap_dmadev *od)
+{
+	struct omap_chan *c;
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	c->reg_map = od->reg_map;
+	c->vc.desc_free = omap_dma_desc_free;
+	vchan_init(&c->vc, &od->ddev);
+
+	return 0;
+}
+
+static void omap_dma_free(struct omap_dmadev *od)
+{
+	while (!list_empty(&od->ddev.channels)) {
+		struct omap_chan *c = list_first_entry(&od->ddev.channels,
+			struct omap_chan, vc.chan.device_node);
+
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+		kfree(c);
+	}
+}
+
+#define OMAP_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static int omap_dma_probe(struct platform_device *pdev)
+{
+	struct omap_dmadev *od;
+	struct resource *res;
+	int rc, i, irq;
+	u32 lch_count;
+
+	od = devm_kzalloc(&pdev->dev, sizeof(*od), GFP_KERNEL);
+	if (!od)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	od->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(od->base))
+		return PTR_ERR(od->base);
+
+	od->plat = omap_get_plat_info();
+	if (!od->plat)
+		return -EPROBE_DEFER;
+
+	od->reg_map = od->plat->reg_map;
+
+	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, od->ddev.cap_mask);
+	od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
+	od->ddev.device_free_chan_resources = omap_dma_free_chan_resources;
+	od->ddev.device_tx_status = omap_dma_tx_status;
+	od->ddev.device_issue_pending = omap_dma_issue_pending;
+	od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg;
+	od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic;
+	od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy;
+	od->ddev.device_prep_interleaved_dma = omap_dma_prep_dma_interleaved;
+	od->ddev.device_config = omap_dma_slave_config;
+	od->ddev.device_pause = omap_dma_pause;
+	od->ddev.device_resume = omap_dma_resume;
+	od->ddev.device_terminate_all = omap_dma_terminate_all;
+	od->ddev.device_synchronize = omap_dma_synchronize;
+	od->ddev.src_addr_widths = OMAP_DMA_BUSWIDTHS;
+	od->ddev.dst_addr_widths = OMAP_DMA_BUSWIDTHS;
+	od->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	if (__dma_omap15xx(od->plat->dma_attr))
+		od->ddev.residue_granularity =
+				DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	else
+		od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	od->ddev.max_burst = SZ_16M - 1; /* CCEN: 24bit unsigned */
+	od->ddev.dev = &pdev->dev;
+	INIT_LIST_HEAD(&od->ddev.channels);
+	spin_lock_init(&od->lock);
+	spin_lock_init(&od->irq_lock);
+
+	/* Number of DMA requests */
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-requests property, using %u.\n",
+			 OMAP_SDMA_REQUESTS);
+	}
+
+	/* Number of available logical channels */
+	if (!pdev->dev.of_node) {
+		lch_count = od->plat->dma_attr->lch_count;
+		if (unlikely(!lch_count))
+			lch_count = OMAP_SDMA_CHANNELS;
+	} else if (of_property_read_u32(pdev->dev.of_node, "dma-channels",
+					&lch_count)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-channels property, using %u.\n",
+			 OMAP_SDMA_CHANNELS);
+		lch_count = OMAP_SDMA_CHANNELS;
+	}
+
+	od->lch_map = devm_kcalloc(&pdev->dev, lch_count, sizeof(*od->lch_map),
+				   GFP_KERNEL);
+	if (!od->lch_map)
+		return -ENOMEM;
+
+	for (i = 0; i < od->dma_requests; i++) {
+		rc = omap_dma_chan_init(od);
+		if (rc) {
+			omap_dma_free(od);
+			return rc;
+		}
+	}
+
+	irq = platform_get_irq(pdev, 1);
+	if (irq <= 0) {
+		dev_info(&pdev->dev, "failed to get L1 IRQ: %d\n", irq);
+		od->legacy = true;
+	} else {
+		/* Disable all interrupts */
+		od->irq_enable_mask = 0;
+		omap_dma_glbl_write(od, IRQENABLE_L1, 0);
+
+		rc = devm_request_irq(&pdev->dev, irq, omap_dma_irq,
+				      IRQF_SHARED, "omap-dma-engine", od);
+		if (rc)
+			return rc;
+	}
+
+	if (omap_dma_glbl_read(od, CAPS_0) & CAPS_0_SUPPORT_LL123)
+		od->ll123_supported = true;
+
+	od->ddev.filter.map = od->plat->slave_map;
+	od->ddev.filter.mapcnt = od->plat->slavecnt;
+	od->ddev.filter.fn = omap_dma_filter_fn;
+
+	if (od->ll123_supported) {
+		od->desc_pool = dma_pool_create(dev_name(&pdev->dev),
+						&pdev->dev,
+						sizeof(struct omap_type2_desc),
+						4, 0);
+		if (!od->desc_pool) {
+			dev_err(&pdev->dev,
+				"unable to allocate descriptor pool\n");
+			od->ll123_supported = false;
+		}
+	}
+
+	rc = dma_async_device_register(&od->ddev);
+	if (rc) {
+		pr_warn("OMAP-DMA: failed to register slave DMA engine device: %d\n",
+			rc);
+		omap_dma_free(od);
+		return rc;
+	}
+
+	platform_set_drvdata(pdev, od);
+
+	if (pdev->dev.of_node) {
+		omap_dma_info.dma_cap = od->ddev.cap_mask;
+
+		/* Device-tree DMA controller registration */
+		rc = of_dma_controller_register(pdev->dev.of_node,
+				of_dma_simple_xlate, &omap_dma_info);
+		if (rc) {
+			pr_warn("OMAP-DMA: failed to register DMA controller\n");
+			dma_async_device_unregister(&od->ddev);
+			omap_dma_free(od);
+		}
+	}
+
+	dev_info(&pdev->dev, "OMAP DMA engine driver%s\n",
+		 od->ll123_supported ? " (LinkedList1/2/3 supported)" : "");
+
+	return rc;
+}
+
+static int omap_dma_remove(struct platform_device *pdev)
+{
+	struct omap_dmadev *od = platform_get_drvdata(pdev);
+	int irq;
+
+	if (pdev->dev.of_node)
+		of_dma_controller_free(pdev->dev.of_node);
+
+	irq = platform_get_irq(pdev, 1);
+	devm_free_irq(&pdev->dev, irq, od);
+
+	dma_async_device_unregister(&od->ddev);
+
+	if (!od->legacy) {
+		/* Disable all interrupts */
+		omap_dma_glbl_write(od, IRQENABLE_L0, 0);
+	}
+
+	if (od->ll123_supported)
+		dma_pool_destroy(od->desc_pool);
+
+	omap_dma_free(od);
+
+	return 0;
+}
+
+static const struct of_device_id omap_dma_match[] = {
+	{ .compatible = "ti,omap2420-sdma", },
+	{ .compatible = "ti,omap2430-sdma", },
+	{ .compatible = "ti,omap3430-sdma", },
+	{ .compatible = "ti,omap3630-sdma", },
+	{ .compatible = "ti,omap4430-sdma", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, omap_dma_match);
+
+static struct platform_driver omap_dma_driver = {
+	.probe	= omap_dma_probe,
+	.remove	= omap_dma_remove,
+	.driver = {
+		.name = "omap-dma-engine",
+		.of_match_table = of_match_ptr(omap_dma_match),
+	},
+};
+
+bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &omap_dma_driver.driver) {
+		struct omap_dmadev *od = to_omap_dma_dev(chan->device);
+		struct omap_chan *c = to_omap_dma_chan(chan);
+		unsigned req = *(unsigned *)param;
+
+		if (req <= od->dma_requests) {
+			c->dma_sig = req;
+			return true;
+		}
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(omap_dma_filter_fn);
+
+static int omap_dma_init(void)
+{
+	return platform_driver_register(&omap_dma_driver);
+}
+subsys_initcall(omap_dma_init);
+
+static void __exit omap_dma_exit(void)
+{
+	platform_driver_unregister(&omap_dma_driver);
+}
+module_exit(omap_dma_exit);
+
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
new file mode 100644
index 0000000..395c698
--- /dev/null
+++ b/drivers/dma/timb_dma.c
@@ -0,0 +1,781 @@
+/*
+ * timb_dma.c timberdale FPGA DMA driver
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Supports:
+ * Timberdale FPGA DMA engine
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <linux/timb_dma.h>
+
+#include "dmaengine.h"
+
+#define DRIVER_NAME "timb-dma"
+
+/* Global DMA registers */
+#define TIMBDMA_ACR		0x34
+#define TIMBDMA_32BIT_ADDR	0x01
+
+#define TIMBDMA_ISR		0x080000
+#define TIMBDMA_IPR		0x080004
+#define TIMBDMA_IER		0x080008
+
+/* Channel specific registers */
+/* RX instances base addresses are 0x00, 0x40, 0x80 ...
+ * TX instances base addresses are 0x18, 0x58, 0x98 ...
+ */
+#define TIMBDMA_INSTANCE_OFFSET		0x40
+#define TIMBDMA_INSTANCE_TX_OFFSET	0x18
+
+/* RX registers, relative the instance base */
+#define TIMBDMA_OFFS_RX_DHAR	0x00
+#define TIMBDMA_OFFS_RX_DLAR	0x04
+#define TIMBDMA_OFFS_RX_LR	0x0C
+#define TIMBDMA_OFFS_RX_BLR	0x10
+#define TIMBDMA_OFFS_RX_ER	0x14
+#define TIMBDMA_RX_EN		0x01
+/* bytes per Row, video specific register
+ * which is placed after the TX registers...
+ */
+#define TIMBDMA_OFFS_RX_BPRR	0x30
+
+/* TX registers, relative the instance base */
+#define TIMBDMA_OFFS_TX_DHAR	0x00
+#define TIMBDMA_OFFS_TX_DLAR	0x04
+#define TIMBDMA_OFFS_TX_BLR	0x0C
+#define TIMBDMA_OFFS_TX_LR	0x14
+
+
+#define TIMB_DMA_DESC_SIZE	8
+
+struct timb_dma_desc {
+	struct list_head		desc_node;
+	struct dma_async_tx_descriptor	txd;
+	u8				*desc_list;
+	unsigned int			desc_list_len;
+	bool				interrupt;
+};
+
+struct timb_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*membase;
+	spinlock_t		lock; /* Used to protect data structures,
+					especially the lists and descriptors,
+					from races between the tasklet and calls
+					from above */
+	bool			ongoing;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+	unsigned int		bytes_per_line;
+	enum dma_transfer_direction	direction;
+	unsigned int		descs; /* Descriptors to allocate */
+	unsigned int		desc_elems; /* number of elems per descriptor */
+};
+
+struct timb_dma {
+	struct dma_device	dma;
+	void __iomem		*membase;
+	struct tasklet_struct	tasklet;
+	struct timb_dma_chan	channels[0];
+};
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2dmadev(struct dma_chan *chan)
+{
+	return chan2dev(chan)->parent->parent;
+}
+
+static struct timb_dma *tdchantotd(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	return (struct timb_dma *)((u8 *)td_chan -
+		id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+}
+
+/* Must be called with the spinlock held */
+static void __td_enable_chan_irq(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	struct timb_dma *td = tdchantotd(td_chan);
+	u32 ier;
+
+	/* enable interrupt for this channel */
+	ier = ioread32(td->membase + TIMBDMA_IER);
+	ier |= 1 << id;
+	dev_dbg(chan2dev(&td_chan->chan), "Enabling irq: %d, IER: 0x%x\n", id,
+		ier);
+	iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+/* Should be called with the spinlock held */
+static bool __td_dma_done_ack(struct timb_dma_chan *td_chan)
+{
+	int id = td_chan->chan.chan_id;
+	struct timb_dma *td = (struct timb_dma *)((u8 *)td_chan -
+		id * sizeof(struct timb_dma_chan) - sizeof(struct timb_dma));
+	u32 isr;
+	bool done = false;
+
+	dev_dbg(chan2dev(&td_chan->chan), "Checking irq: %d, td: %p\n", id, td);
+
+	isr = ioread32(td->membase + TIMBDMA_ISR) & (1 << id);
+	if (isr) {
+		iowrite32(isr, td->membase + TIMBDMA_ISR);
+		done = true;
+	}
+
+	return done;
+}
+
+static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc,
+	struct scatterlist *sg, bool last)
+{
+	if (sg_dma_len(sg) > USHRT_MAX) {
+		dev_err(chan2dev(&td_chan->chan), "Too big sg element\n");
+		return -EINVAL;
+	}
+
+	/* length must be word aligned */
+	if (sg_dma_len(sg) % sizeof(u32)) {
+		dev_err(chan2dev(&td_chan->chan), "Incorrect length: %d\n",
+			sg_dma_len(sg));
+		return -EINVAL;
+	}
+
+	dev_dbg(chan2dev(&td_chan->chan), "desc: %p, addr: 0x%llx\n",
+		dma_desc, (unsigned long long)sg_dma_address(sg));
+
+	dma_desc[7] = (sg_dma_address(sg) >> 24) & 0xff;
+	dma_desc[6] = (sg_dma_address(sg) >> 16) & 0xff;
+	dma_desc[5] = (sg_dma_address(sg) >> 8) & 0xff;
+	dma_desc[4] = (sg_dma_address(sg) >> 0) & 0xff;
+
+	dma_desc[3] = (sg_dma_len(sg) >> 8) & 0xff;
+	dma_desc[2] = (sg_dma_len(sg) >> 0) & 0xff;
+
+	dma_desc[1] = 0x00;
+	dma_desc[0] = 0x21 | (last ? 0x02 : 0); /* tran, valid */
+
+	return 0;
+}
+
+/* Must be called with the spinlock held */
+static void __td_start_dma(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc;
+
+	if (td_chan->ongoing) {
+		dev_err(chan2dev(&td_chan->chan),
+			"Transfer already ongoing\n");
+		return;
+	}
+
+	td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+		desc_node);
+
+	dev_dbg(chan2dev(&td_chan->chan),
+		"td_chan: %p, chan: %d, membase: %p\n",
+		td_chan, td_chan->chan.chan_id, td_chan->membase);
+
+	if (td_chan->direction == DMA_DEV_TO_MEM) {
+
+		/* descriptor address */
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_DHAR);
+		iowrite32(td_desc->txd.phys, td_chan->membase +
+			TIMBDMA_OFFS_RX_DLAR);
+		/* Bytes per line */
+		iowrite32(td_chan->bytes_per_line, td_chan->membase +
+			TIMBDMA_OFFS_RX_BPRR);
+		/* enable RX */
+		iowrite32(TIMBDMA_RX_EN, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+	} else {
+		/* address high */
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DHAR);
+		iowrite32(td_desc->txd.phys, td_chan->membase +
+			TIMBDMA_OFFS_TX_DLAR);
+	}
+
+	td_chan->ongoing = true;
+
+	if (td_desc->interrupt)
+		__td_enable_chan_irq(td_chan);
+}
+
+static void __td_finish(struct timb_dma_chan *td_chan)
+{
+	struct dmaengine_desc_callback	cb;
+	struct dma_async_tx_descriptor	*txd;
+	struct timb_dma_desc		*td_desc;
+
+	/* can happen if the descriptor is canceled */
+	if (list_empty(&td_chan->active_list))
+		return;
+
+	td_desc = list_entry(td_chan->active_list.next, struct timb_dma_desc,
+		desc_node);
+	txd = &td_desc->txd;
+
+	dev_dbg(chan2dev(&td_chan->chan), "descriptor %u complete\n",
+		txd->cookie);
+
+	/* make sure to stop the transfer */
+	if (td_chan->direction == DMA_DEV_TO_MEM)
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_RX_ER);
+/* Currently no support for stopping DMA transfers
+	else
+		iowrite32(0, td_chan->membase + TIMBDMA_OFFS_TX_DLAR);
+*/
+	dma_cookie_complete(txd);
+	td_chan->ongoing = false;
+
+	dmaengine_desc_get_callback(txd, &cb);
+
+	list_move(&td_desc->desc_node, &td_chan->free_list);
+
+	dma_descriptor_unmap(txd);
+	/*
+	 * The API requires that no submissions are done from a
+	 * callback, so we don't need to drop the lock here
+	 */
+	dmaengine_desc_callback_invoke(&cb, NULL);
+}
+
+static u32 __td_ier_mask(struct timb_dma *td)
+{
+	int i;
+	u32 ret = 0;
+
+	for (i = 0; i < td->dma.chancnt; i++) {
+		struct timb_dma_chan *td_chan = td->channels + i;
+		if (td_chan->ongoing) {
+			struct timb_dma_desc *td_desc =
+				list_entry(td_chan->active_list.next,
+				struct timb_dma_desc, desc_node);
+			if (td_desc->interrupt)
+				ret |= 1 << i;
+		}
+	}
+
+	return ret;
+}
+
+static void __td_start_next(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc;
+
+	BUG_ON(list_empty(&td_chan->queue));
+	BUG_ON(td_chan->ongoing);
+
+	td_desc = list_entry(td_chan->queue.next, struct timb_dma_desc,
+		desc_node);
+
+	dev_dbg(chan2dev(&td_chan->chan), "%s: started %u\n",
+		__func__, td_desc->txd.cookie);
+
+	list_move(&td_desc->desc_node, &td_chan->active_list);
+	__td_start_dma(td_chan);
+}
+
+static dma_cookie_t td_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct timb_dma_desc *td_desc = container_of(txd, struct timb_dma_desc,
+		txd);
+	struct timb_dma_chan *td_chan = container_of(txd->chan,
+		struct timb_dma_chan, chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&td_chan->lock);
+	cookie = dma_cookie_assign(txd);
+
+	if (list_empty(&td_chan->active_list)) {
+		dev_dbg(chan2dev(txd->chan), "%s: started %u\n", __func__,
+			txd->cookie);
+		list_add_tail(&td_desc->desc_node, &td_chan->active_list);
+		__td_start_dma(td_chan);
+	} else {
+		dev_dbg(chan2dev(txd->chan), "tx_submit: queued %u\n",
+			txd->cookie);
+
+		list_add_tail(&td_desc->desc_node, &td_chan->queue);
+	}
+
+	spin_unlock_bh(&td_chan->lock);
+
+	return cookie;
+}
+
+static struct timb_dma_desc *td_alloc_init_desc(struct timb_dma_chan *td_chan)
+{
+	struct dma_chan *chan = &td_chan->chan;
+	struct timb_dma_desc *td_desc;
+	int err;
+
+	td_desc = kzalloc(sizeof(struct timb_dma_desc), GFP_KERNEL);
+	if (!td_desc)
+		goto out;
+
+	td_desc->desc_list_len = td_chan->desc_elems * TIMB_DMA_DESC_SIZE;
+
+	td_desc->desc_list = kzalloc(td_desc->desc_list_len, GFP_KERNEL);
+	if (!td_desc->desc_list)
+		goto err;
+
+	dma_async_tx_descriptor_init(&td_desc->txd, chan);
+	td_desc->txd.tx_submit = td_tx_submit;
+	td_desc->txd.flags = DMA_CTRL_ACK;
+
+	td_desc->txd.phys = dma_map_single(chan2dmadev(chan),
+		td_desc->desc_list, td_desc->desc_list_len, DMA_TO_DEVICE);
+
+	err = dma_mapping_error(chan2dmadev(chan), td_desc->txd.phys);
+	if (err) {
+		dev_err(chan2dev(chan), "DMA mapping error: %d\n", err);
+		goto err;
+	}
+
+	return td_desc;
+err:
+	kfree(td_desc->desc_list);
+	kfree(td_desc);
+out:
+	return NULL;
+
+}
+
+static void td_free_desc(struct timb_dma_desc *td_desc)
+{
+	dev_dbg(chan2dev(td_desc->txd.chan), "Freeing desc: %p\n", td_desc);
+	dma_unmap_single(chan2dmadev(td_desc->txd.chan), td_desc->txd.phys,
+		td_desc->desc_list_len, DMA_TO_DEVICE);
+
+	kfree(td_desc->desc_list);
+	kfree(td_desc);
+}
+
+static void td_desc_put(struct timb_dma_chan *td_chan,
+	struct timb_dma_desc *td_desc)
+{
+	dev_dbg(chan2dev(&td_chan->chan), "Putting desc: %p\n", td_desc);
+
+	spin_lock_bh(&td_chan->lock);
+	list_add(&td_desc->desc_node, &td_chan->free_list);
+	spin_unlock_bh(&td_chan->lock);
+}
+
+static struct timb_dma_desc *td_desc_get(struct timb_dma_chan *td_chan)
+{
+	struct timb_dma_desc *td_desc, *_td_desc;
+	struct timb_dma_desc *ret = NULL;
+
+	spin_lock_bh(&td_chan->lock);
+	list_for_each_entry_safe(td_desc, _td_desc, &td_chan->free_list,
+		desc_node) {
+		if (async_tx_test_ack(&td_desc->txd)) {
+			list_del(&td_desc->desc_node);
+			ret = td_desc;
+			break;
+		}
+		dev_dbg(chan2dev(&td_chan->chan), "desc %p not ACKed\n",
+			td_desc);
+	}
+	spin_unlock_bh(&td_chan->lock);
+
+	return ret;
+}
+
+static int td_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	int i;
+
+	dev_dbg(chan2dev(chan), "%s: entry\n", __func__);
+
+	BUG_ON(!list_empty(&td_chan->free_list));
+	for (i = 0; i < td_chan->descs; i++) {
+		struct timb_dma_desc *td_desc = td_alloc_init_desc(td_chan);
+		if (!td_desc) {
+			if (i)
+				break;
+			else {
+				dev_err(chan2dev(chan),
+					"Couldn't allocate any descriptors\n");
+				return -ENOMEM;
+			}
+		}
+
+		td_desc_put(td_chan, td_desc);
+	}
+
+	spin_lock_bh(&td_chan->lock);
+	dma_cookie_init(chan);
+	spin_unlock_bh(&td_chan->lock);
+
+	return 0;
+}
+
+static void td_free_chan_resources(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc, *_td_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	/* check that all descriptors are free */
+	BUG_ON(!list_empty(&td_chan->active_list));
+	BUG_ON(!list_empty(&td_chan->queue));
+
+	spin_lock_bh(&td_chan->lock);
+	list_splice_init(&td_chan->free_list, &list);
+	spin_unlock_bh(&td_chan->lock);
+
+	list_for_each_entry_safe(td_desc, _td_desc, &list, desc_node) {
+		dev_dbg(chan2dev(chan), "%s: Freeing desc: %p\n", __func__,
+			td_desc);
+		td_free_desc(td_desc);
+	}
+}
+
+static enum dma_status td_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+				    struct dma_tx_state *txstate)
+{
+	enum dma_status ret;
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	dev_dbg(chan2dev(chan), "%s: exit, ret: %d\n", 	__func__, ret);
+
+	return ret;
+}
+
+static void td_issue_pending(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+	spin_lock_bh(&td_chan->lock);
+
+	if (!list_empty(&td_chan->active_list))
+		/* transfer ongoing */
+		if (__td_dma_done_ack(td_chan))
+			__td_finish(td_chan);
+
+	if (list_empty(&td_chan->active_list) && !list_empty(&td_chan->queue))
+		__td_start_next(td_chan);
+
+	spin_unlock_bh(&td_chan->lock);
+}
+
+static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan,
+	struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc;
+	struct scatterlist *sg;
+	unsigned int i;
+	unsigned int desc_usage = 0;
+
+	if (!sgl || !sg_len) {
+		dev_err(chan2dev(chan), "%s: No SG list\n", __func__);
+		return NULL;
+	}
+
+	/* even channels are for RX, odd for TX */
+	if (td_chan->direction != direction) {
+		dev_err(chan2dev(chan),
+			"Requesting channel in wrong direction\n");
+		return NULL;
+	}
+
+	td_desc = td_desc_get(td_chan);
+	if (!td_desc) {
+		dev_err(chan2dev(chan), "Not enough descriptors available\n");
+		return NULL;
+	}
+
+	td_desc->interrupt = (flags & DMA_PREP_INTERRUPT) != 0;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		int err;
+		if (desc_usage > td_desc->desc_list_len) {
+			dev_err(chan2dev(chan), "No descriptor space\n");
+			return NULL;
+		}
+
+		err = td_fill_desc(td_chan, td_desc->desc_list + desc_usage, sg,
+			i == (sg_len - 1));
+		if (err) {
+			dev_err(chan2dev(chan), "Failed to update desc: %d\n",
+				err);
+			td_desc_put(td_chan, td_desc);
+			return NULL;
+		}
+		desc_usage += TIMB_DMA_DESC_SIZE;
+	}
+
+	dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys,
+		td_desc->desc_list_len, DMA_MEM_TO_DEV);
+
+	return &td_desc->txd;
+}
+
+static int td_terminate_all(struct dma_chan *chan)
+{
+	struct timb_dma_chan *td_chan =
+		container_of(chan, struct timb_dma_chan, chan);
+	struct timb_dma_desc *td_desc, *_td_desc;
+
+	dev_dbg(chan2dev(chan), "%s: Entry\n", __func__);
+
+	/* first the easy part, put the queue into the free list */
+	spin_lock_bh(&td_chan->lock);
+	list_for_each_entry_safe(td_desc, _td_desc, &td_chan->queue,
+		desc_node)
+		list_move(&td_desc->desc_node, &td_chan->free_list);
+
+	/* now tear down the running */
+	__td_finish(td_chan);
+	spin_unlock_bh(&td_chan->lock);
+
+	return 0;
+}
+
+static void td_tasklet(unsigned long data)
+{
+	struct timb_dma *td = (struct timb_dma *)data;
+	u32 isr;
+	u32 ipr;
+	u32 ier;
+	int i;
+
+	isr = ioread32(td->membase + TIMBDMA_ISR);
+	ipr = isr & __td_ier_mask(td);
+
+	/* ack the interrupts */
+	iowrite32(ipr, td->membase + TIMBDMA_ISR);
+
+	for (i = 0; i < td->dma.chancnt; i++)
+		if (ipr & (1 << i)) {
+			struct timb_dma_chan *td_chan = td->channels + i;
+			spin_lock(&td_chan->lock);
+			__td_finish(td_chan);
+			if (!list_empty(&td_chan->queue))
+				__td_start_next(td_chan);
+			spin_unlock(&td_chan->lock);
+		}
+
+	ier = __td_ier_mask(td);
+	iowrite32(ier, td->membase + TIMBDMA_IER);
+}
+
+
+static irqreturn_t td_irq(int irq, void *devid)
+{
+	struct timb_dma *td = devid;
+	u32 ipr = ioread32(td->membase + TIMBDMA_IPR);
+
+	if (ipr) {
+		/* disable interrupts, will be re-enabled in tasklet */
+		iowrite32(0, td->membase + TIMBDMA_IER);
+
+		tasklet_schedule(&td->tasklet);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
+}
+
+
+static int td_probe(struct platform_device *pdev)
+{
+	struct timb_dma_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct timb_dma *td;
+	struct resource *iomem;
+	int irq;
+	int err;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!iomem)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	if (!request_mem_region(iomem->start, resource_size(iomem),
+		DRIVER_NAME))
+		return -EBUSY;
+
+	td  = kzalloc(sizeof(struct timb_dma) +
+		sizeof(struct timb_dma_chan) * pdata->nr_channels, GFP_KERNEL);
+	if (!td) {
+		err = -ENOMEM;
+		goto err_release_region;
+	}
+
+	dev_dbg(&pdev->dev, "Allocated TD: %p\n", td);
+
+	td->membase = ioremap(iomem->start, resource_size(iomem));
+	if (!td->membase) {
+		dev_err(&pdev->dev, "Failed to remap I/O memory\n");
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	/* 32bit addressing */
+	iowrite32(TIMBDMA_32BIT_ADDR, td->membase + TIMBDMA_ACR);
+
+	/* disable and clear any interrupts */
+	iowrite32(0x0, td->membase + TIMBDMA_IER);
+	iowrite32(0xFFFFFFFF, td->membase + TIMBDMA_ISR);
+
+	tasklet_init(&td->tasklet, td_tasklet, (unsigned long)td);
+
+	err = request_irq(irq, td_irq, IRQF_SHARED, DRIVER_NAME, td);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to request IRQ\n");
+		goto err_tasklet_kill;
+	}
+
+	td->dma.device_alloc_chan_resources	= td_alloc_chan_resources;
+	td->dma.device_free_chan_resources	= td_free_chan_resources;
+	td->dma.device_tx_status		= td_tx_status;
+	td->dma.device_issue_pending		= td_issue_pending;
+
+	dma_cap_set(DMA_SLAVE, td->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, td->dma.cap_mask);
+	td->dma.device_prep_slave_sg = td_prep_slave_sg;
+	td->dma.device_terminate_all = td_terminate_all;
+
+	td->dma.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&td->dma.channels);
+
+	for (i = 0; i < pdata->nr_channels; i++) {
+		struct timb_dma_chan *td_chan = &td->channels[i];
+		struct timb_dma_platform_data_channel *pchan =
+			pdata->channels + i;
+
+		/* even channels are RX, odd are TX */
+		if ((i % 2) == pchan->rx) {
+			dev_err(&pdev->dev, "Wrong channel configuration\n");
+			err = -EINVAL;
+			goto err_free_irq;
+		}
+
+		td_chan->chan.device = &td->dma;
+		dma_cookie_init(&td_chan->chan);
+		spin_lock_init(&td_chan->lock);
+		INIT_LIST_HEAD(&td_chan->active_list);
+		INIT_LIST_HEAD(&td_chan->queue);
+		INIT_LIST_HEAD(&td_chan->free_list);
+
+		td_chan->descs = pchan->descriptors;
+		td_chan->desc_elems = pchan->descriptor_elements;
+		td_chan->bytes_per_line = pchan->bytes_per_line;
+		td_chan->direction = pchan->rx ? DMA_DEV_TO_MEM :
+			DMA_MEM_TO_DEV;
+
+		td_chan->membase = td->membase +
+			(i / 2) * TIMBDMA_INSTANCE_OFFSET +
+			(pchan->rx ? 0 : TIMBDMA_INSTANCE_TX_OFFSET);
+
+		dev_dbg(&pdev->dev, "Chan: %d, membase: %p\n",
+			i, td_chan->membase);
+
+		list_add_tail(&td_chan->chan.device_node, &td->dma.channels);
+	}
+
+	err = dma_async_device_register(&td->dma);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to register async device\n");
+		goto err_free_irq;
+	}
+
+	platform_set_drvdata(pdev, td);
+
+	dev_dbg(&pdev->dev, "Probe result: %d\n", err);
+	return err;
+
+err_free_irq:
+	free_irq(irq, td);
+err_tasklet_kill:
+	tasklet_kill(&td->tasklet);
+	iounmap(td->membase);
+err_free_mem:
+	kfree(td);
+err_release_region:
+	release_mem_region(iomem->start, resource_size(iomem));
+
+	return err;
+
+}
+
+static int td_remove(struct platform_device *pdev)
+{
+	struct timb_dma *td = platform_get_drvdata(pdev);
+	struct resource *iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	int irq = platform_get_irq(pdev, 0);
+
+	dma_async_device_unregister(&td->dma);
+	free_irq(irq, td);
+	tasklet_kill(&td->tasklet);
+	iounmap(td->membase);
+	kfree(td);
+	release_mem_region(iomem->start, resource_size(iomem));
+
+	dev_dbg(&pdev->dev, "Removed...\n");
+	return 0;
+}
+
+static struct platform_driver td_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+	},
+	.probe	= td_probe,
+	.remove	= td_remove,
+};
+
+module_platform_driver(td_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Timberdale DMA controller driver");
+MODULE_AUTHOR("Pelagicore AB <info@pelagicore.com>");
+MODULE_ALIAS("platform:"DRIVER_NAME);
diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c
new file mode 100644
index 0000000..eb45af7
--- /dev/null
+++ b/drivers/dma/txx9dmac.c
@@ -0,0 +1,1314 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+
+#include "dmaengine.h"
+#include "txx9dmac.h"
+
+static struct txx9dmac_chan *to_txx9dmac_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct txx9dmac_chan, chan);
+}
+
+static struct txx9dmac_cregs __iomem *__dma_regs(const struct txx9dmac_chan *dc)
+{
+	return dc->ch_regs;
+}
+
+static struct txx9dmac_cregs32 __iomem *__dma_regs32(
+	const struct txx9dmac_chan *dc)
+{
+	return dc->ch_regs;
+}
+
+#define channel64_readq(dc, name) \
+	__raw_readq(&(__dma_regs(dc)->name))
+#define channel64_writeq(dc, name, val) \
+	__raw_writeq((val), &(__dma_regs(dc)->name))
+#define channel64_readl(dc, name) \
+	__raw_readl(&(__dma_regs(dc)->name))
+#define channel64_writel(dc, name, val) \
+	__raw_writel((val), &(__dma_regs(dc)->name))
+
+#define channel32_readl(dc, name) \
+	__raw_readl(&(__dma_regs32(dc)->name))
+#define channel32_writel(dc, name, val) \
+	__raw_writel((val), &(__dma_regs32(dc)->name))
+
+#define channel_readq(dc, name) channel64_readq(dc, name)
+#define channel_writeq(dc, name, val) channel64_writeq(dc, name, val)
+#define channel_readl(dc, name) \
+	(is_dmac64(dc) ? \
+	 channel64_readl(dc, name) : channel32_readl(dc, name))
+#define channel_writel(dc, name, val) \
+	(is_dmac64(dc) ? \
+	 channel64_writel(dc, name, val) : channel32_writel(dc, name, val))
+
+static dma_addr_t channel64_read_CHAR(const struct txx9dmac_chan *dc)
+{
+	if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+		return channel64_readq(dc, CHAR);
+	else
+		return channel64_readl(dc, CHAR);
+}
+
+static void channel64_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+	if (sizeof(__dma_regs(dc)->CHAR) == sizeof(u64))
+		channel64_writeq(dc, CHAR, val);
+	else
+		channel64_writel(dc, CHAR, val);
+}
+
+static void channel64_clear_CHAR(const struct txx9dmac_chan *dc)
+{
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+	channel64_writel(dc, CHAR, 0);
+	channel64_writel(dc, __pad_CHAR, 0);
+#else
+	channel64_writeq(dc, CHAR, 0);
+#endif
+}
+
+static dma_addr_t channel_read_CHAR(const struct txx9dmac_chan *dc)
+{
+	if (is_dmac64(dc))
+		return channel64_read_CHAR(dc);
+	else
+		return channel32_readl(dc, CHAR);
+}
+
+static void channel_write_CHAR(const struct txx9dmac_chan *dc, dma_addr_t val)
+{
+	if (is_dmac64(dc))
+		channel64_write_CHAR(dc, val);
+	else
+		channel32_writel(dc, CHAR, val);
+}
+
+static struct txx9dmac_regs __iomem *__txx9dmac_regs(
+	const struct txx9dmac_dev *ddev)
+{
+	return ddev->regs;
+}
+
+static struct txx9dmac_regs32 __iomem *__txx9dmac_regs32(
+	const struct txx9dmac_dev *ddev)
+{
+	return ddev->regs;
+}
+
+#define dma64_readl(ddev, name) \
+	__raw_readl(&(__txx9dmac_regs(ddev)->name))
+#define dma64_writel(ddev, name, val) \
+	__raw_writel((val), &(__txx9dmac_regs(ddev)->name))
+
+#define dma32_readl(ddev, name) \
+	__raw_readl(&(__txx9dmac_regs32(ddev)->name))
+#define dma32_writel(ddev, name, val) \
+	__raw_writel((val), &(__txx9dmac_regs32(ddev)->name))
+
+#define dma_readl(ddev, name) \
+	(__is_dmac64(ddev) ? \
+	dma64_readl(ddev, name) : dma32_readl(ddev, name))
+#define dma_writel(ddev, name, val) \
+	(__is_dmac64(ddev) ? \
+	dma64_writel(ddev, name, val) : dma32_writel(ddev, name, val))
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+static struct device *chan2parent(struct dma_chan *chan)
+{
+	return chan->dev->device.parent;
+}
+
+static struct txx9dmac_desc *
+txd_to_txx9dmac_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct txx9dmac_desc, txd);
+}
+
+static dma_addr_t desc_read_CHAR(const struct txx9dmac_chan *dc,
+				 const struct txx9dmac_desc *desc)
+{
+	return is_dmac64(dc) ? desc->hwdesc.CHAR : desc->hwdesc32.CHAR;
+}
+
+static void desc_write_CHAR(const struct txx9dmac_chan *dc,
+			    struct txx9dmac_desc *desc, dma_addr_t val)
+{
+	if (is_dmac64(dc))
+		desc->hwdesc.CHAR = val;
+	else
+		desc->hwdesc32.CHAR = val;
+}
+
+#define TXX9_DMA_MAX_COUNT	0x04000000
+
+#define TXX9_DMA_INITIAL_DESC_COUNT	64
+
+static struct txx9dmac_desc *txx9dmac_first_active(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->active_list.next,
+			  struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_active(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->active_list.prev,
+			  struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_first_queued(struct txx9dmac_chan *dc)
+{
+	return list_entry(dc->queue.next, struct txx9dmac_desc, desc_node);
+}
+
+static struct txx9dmac_desc *txx9dmac_last_child(struct txx9dmac_desc *desc)
+{
+	if (!list_empty(&desc->tx_list))
+		desc = list_entry(desc->tx_list.prev, typeof(*desc), desc_node);
+	return desc;
+}
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx);
+
+static struct txx9dmac_desc *txx9dmac_desc_alloc(struct txx9dmac_chan *dc,
+						 gfp_t flags)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), flags);
+	if (!desc)
+		return NULL;
+	INIT_LIST_HEAD(&desc->tx_list);
+	dma_async_tx_descriptor_init(&desc->txd, &dc->chan);
+	desc->txd.tx_submit = txx9dmac_tx_submit;
+	/* txd.flags will be overwritten in prep funcs */
+	desc->txd.flags = DMA_CTRL_ACK;
+	desc->txd.phys = dma_map_single(chan2parent(&dc->chan), &desc->hwdesc,
+					ddev->descsize, DMA_TO_DEVICE);
+	return desc;
+}
+
+static struct txx9dmac_desc *txx9dmac_desc_get(struct txx9dmac_chan *dc)
+{
+	struct txx9dmac_desc *desc, *_desc;
+	struct txx9dmac_desc *ret = NULL;
+	unsigned int i = 0;
+
+	spin_lock_bh(&dc->lock);
+	list_for_each_entry_safe(desc, _desc, &dc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(chan2dev(&dc->chan), "desc %p not ACKed\n", desc);
+		i++;
+	}
+	spin_unlock_bh(&dc->lock);
+
+	dev_vdbg(chan2dev(&dc->chan), "scanned %u descriptors on freelist\n",
+		 i);
+	if (!ret) {
+		ret = txx9dmac_desc_alloc(dc, GFP_ATOMIC);
+		if (ret) {
+			spin_lock_bh(&dc->lock);
+			dc->descs_allocated++;
+			spin_unlock_bh(&dc->lock);
+		} else
+			dev_err(chan2dev(&dc->chan),
+				"not enough descriptors available\n");
+	}
+	return ret;
+}
+
+static void txx9dmac_sync_desc_for_cpu(struct txx9dmac_chan *dc,
+				       struct txx9dmac_desc *desc)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *child;
+
+	list_for_each_entry(child, &desc->tx_list, desc_node)
+		dma_sync_single_for_cpu(chan2parent(&dc->chan),
+				child->txd.phys, ddev->descsize,
+				DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(chan2parent(&dc->chan),
+			desc->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void txx9dmac_desc_put(struct txx9dmac_chan *dc,
+			      struct txx9dmac_desc *desc)
+{
+	if (desc) {
+		struct txx9dmac_desc *child;
+
+		txx9dmac_sync_desc_for_cpu(dc, desc);
+
+		spin_lock_bh(&dc->lock);
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			dev_vdbg(chan2dev(&dc->chan),
+				 "moving child desc %p to freelist\n",
+				 child);
+		list_splice_init(&desc->tx_list, &dc->free_list);
+		dev_vdbg(chan2dev(&dc->chan), "moving desc %p to freelist\n",
+			 desc);
+		list_add(&desc->desc_node, &dc->free_list);
+		spin_unlock_bh(&dc->lock);
+	}
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_dump_regs(struct txx9dmac_chan *dc)
+{
+	if (is_dmac64(dc))
+		dev_err(chan2dev(&dc->chan),
+			"  CHAR: %#llx SAR: %#llx DAR: %#llx CNTR: %#x"
+			" SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+			(u64)channel64_read_CHAR(dc),
+			channel64_readq(dc, SAR),
+			channel64_readq(dc, DAR),
+			channel64_readl(dc, CNTR),
+			channel64_readl(dc, SAIR),
+			channel64_readl(dc, DAIR),
+			channel64_readl(dc, CCR),
+			channel64_readl(dc, CSR));
+	else
+		dev_err(chan2dev(&dc->chan),
+			"  CHAR: %#x SAR: %#x DAR: %#x CNTR: %#x"
+			" SAIR: %#x DAIR: %#x CCR: %#x CSR: %#x\n",
+			channel32_readl(dc, CHAR),
+			channel32_readl(dc, SAR),
+			channel32_readl(dc, DAR),
+			channel32_readl(dc, CNTR),
+			channel32_readl(dc, SAIR),
+			channel32_readl(dc, DAIR),
+			channel32_readl(dc, CCR),
+			channel32_readl(dc, CSR));
+}
+
+static void txx9dmac_reset_chan(struct txx9dmac_chan *dc)
+{
+	channel_writel(dc, CCR, TXX9_DMA_CCR_CHRST);
+	if (is_dmac64(dc)) {
+		channel64_clear_CHAR(dc);
+		channel_writeq(dc, SAR, 0);
+		channel_writeq(dc, DAR, 0);
+	} else {
+		channel_writel(dc, CHAR, 0);
+		channel_writel(dc, SAR, 0);
+		channel_writel(dc, DAR, 0);
+	}
+	channel_writel(dc, CNTR, 0);
+	channel_writel(dc, SAIR, 0);
+	channel_writel(dc, DAIR, 0);
+	channel_writel(dc, CCR, 0);
+	mmiowb();
+}
+
+/* Called with dc->lock held and bh disabled */
+static void txx9dmac_dostart(struct txx9dmac_chan *dc,
+			     struct txx9dmac_desc *first)
+{
+	struct txx9dmac_slave *ds = dc->chan.private;
+	u32 sai, dai;
+
+	dev_vdbg(chan2dev(&dc->chan), "dostart %u %p\n",
+		 first->txd.cookie, first);
+	/* ASSERT:  channel is idle */
+	if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+		dev_err(chan2dev(&dc->chan),
+			"BUG: Attempted to start non-idle channel\n");
+		txx9dmac_dump_regs(dc);
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	if (is_dmac64(dc)) {
+		channel64_writel(dc, CNTR, 0);
+		channel64_writel(dc, CSR, 0xffffffff);
+		if (ds) {
+			if (ds->tx_reg) {
+				sai = ds->reg_width;
+				dai = 0;
+			} else {
+				sai = 0;
+				dai = ds->reg_width;
+			}
+		} else {
+			sai = 8;
+			dai = 8;
+		}
+		channel64_writel(dc, SAIR, sai);
+		channel64_writel(dc, DAIR, dai);
+		/* All 64-bit DMAC supports SMPCHN */
+		channel64_writel(dc, CCR, dc->ccr);
+		/* Writing a non zero value to CHAR will assert XFACT */
+		channel64_write_CHAR(dc, first->txd.phys);
+	} else {
+		channel32_writel(dc, CNTR, 0);
+		channel32_writel(dc, CSR, 0xffffffff);
+		if (ds) {
+			if (ds->tx_reg) {
+				sai = ds->reg_width;
+				dai = 0;
+			} else {
+				sai = 0;
+				dai = ds->reg_width;
+			}
+		} else {
+			sai = 4;
+			dai = 4;
+		}
+		channel32_writel(dc, SAIR, sai);
+		channel32_writel(dc, DAIR, dai);
+		if (txx9_dma_have_SMPCHN()) {
+			channel32_writel(dc, CCR, dc->ccr);
+			/* Writing a non zero value to CHAR will assert XFACT */
+			channel32_writel(dc, CHAR, first->txd.phys);
+		} else {
+			channel32_writel(dc, CHAR, first->txd.phys);
+			channel32_writel(dc, CCR, dc->ccr);
+		}
+	}
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+txx9dmac_descriptor_complete(struct txx9dmac_chan *dc,
+			     struct txx9dmac_desc *desc)
+{
+	struct dmaengine_desc_callback cb;
+	struct dma_async_tx_descriptor *txd = &desc->txd;
+
+	dev_vdbg(chan2dev(&dc->chan), "descriptor %u %p complete\n",
+		 txd->cookie, desc);
+
+	dma_cookie_complete(txd);
+	dmaengine_desc_get_callback(txd, &cb);
+
+	txx9dmac_sync_desc_for_cpu(dc, desc);
+	list_splice_init(&desc->tx_list, &dc->free_list);
+	list_move(&desc->desc_node, &dc->free_list);
+
+	dma_descriptor_unmap(txd);
+	/*
+	 * The API requires that no submissions are done from a
+	 * callback, so we don't need to drop the lock here
+	 */
+	dmaengine_desc_callback_invoke(&cb, NULL);
+	dma_run_dependencies(txd);
+}
+
+static void txx9dmac_dequeue(struct txx9dmac_chan *dc, struct list_head *list)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	struct txx9dmac_desc *prev = NULL;
+
+	BUG_ON(!list_empty(list));
+	do {
+		desc = txx9dmac_first_queued(dc);
+		if (prev) {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+				prev->txd.phys, ddev->descsize,
+				DMA_TO_DEVICE);
+		}
+		prev = txx9dmac_last_child(desc);
+		list_move_tail(&desc->desc_node, list);
+		/* Make chain-completion interrupt happen */
+		if ((desc->txd.flags & DMA_PREP_INTERRUPT) &&
+		    !txx9dmac_chan_INTENT(dc))
+			break;
+	} while (!list_empty(&dc->queue));
+}
+
+static void txx9dmac_complete_all(struct txx9dmac_chan *dc)
+{
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	list_splice_init(&dc->active_list, &list);
+	if (!list_empty(&dc->queue)) {
+		txx9dmac_dequeue(dc, &dc->active_list);
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+	}
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		txx9dmac_descriptor_complete(dc, desc);
+}
+
+static void txx9dmac_dump_desc(struct txx9dmac_chan *dc,
+			       struct txx9dmac_hwdesc *desc)
+{
+	if (is_dmac64(dc)) {
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#llx s%#llx d%#llx c%#x\n",
+			 (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR);
+#else
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#llx s%#llx d%#llx c%#x"
+			 " si%#x di%#x cc%#x cs%#x\n",
+			 (u64)desc->CHAR, desc->SAR, desc->DAR, desc->CNTR,
+			 desc->SAIR, desc->DAIR, desc->CCR, desc->CSR);
+#endif
+	} else {
+		struct txx9dmac_hwdesc32 *d = (struct txx9dmac_hwdesc32 *)desc;
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#x s%#x d%#x c%#x\n",
+			 d->CHAR, d->SAR, d->DAR, d->CNTR);
+#else
+		dev_crit(chan2dev(&dc->chan),
+			 "  desc: ch%#x s%#x d%#x c%#x"
+			 " si%#x di%#x cc%#x cs%#x\n",
+			 d->CHAR, d->SAR, d->DAR, d->CNTR,
+			 d->SAIR, d->DAIR, d->CCR, d->CSR);
+#endif
+	}
+}
+
+static void txx9dmac_handle_error(struct txx9dmac_chan *dc, u32 csr)
+{
+	struct txx9dmac_desc *bad_desc;
+	struct txx9dmac_desc *child;
+	u32 errors;
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * borked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	dev_crit(chan2dev(&dc->chan), "Abnormal Chain Completion\n");
+	txx9dmac_dump_regs(dc);
+
+	bad_desc = txx9dmac_first_active(dc);
+	list_del_init(&bad_desc->desc_node);
+
+	/* Clear all error flags and try to restart the controller */
+	errors = csr & (TXX9_DMA_CSR_ABCHC |
+			TXX9_DMA_CSR_CFERR | TXX9_DMA_CSR_CHERR |
+			TXX9_DMA_CSR_DESERR | TXX9_DMA_CSR_SORERR);
+	channel_writel(dc, CSR, errors);
+
+	if (list_empty(&dc->active_list) && !list_empty(&dc->queue))
+		txx9dmac_dequeue(dc, &dc->active_list);
+	if (!list_empty(&dc->active_list))
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+
+	dev_crit(chan2dev(&dc->chan),
+		 "Bad descriptor submitted for DMA! (cookie: %d)\n",
+		 bad_desc->txd.cookie);
+	txx9dmac_dump_desc(dc, &bad_desc->hwdesc);
+	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
+		txx9dmac_dump_desc(dc, &child->hwdesc);
+	/* Pretend the descriptor completed successfully */
+	txx9dmac_descriptor_complete(dc, bad_desc);
+}
+
+static void txx9dmac_scan_descriptors(struct txx9dmac_chan *dc)
+{
+	dma_addr_t chain;
+	struct txx9dmac_desc *desc, *_desc;
+	struct txx9dmac_desc *child;
+	u32 csr;
+
+	if (is_dmac64(dc)) {
+		chain = channel64_read_CHAR(dc);
+		csr = channel64_readl(dc, CSR);
+		channel64_writel(dc, CSR, csr);
+	} else {
+		chain = channel32_readl(dc, CHAR);
+		csr = channel32_readl(dc, CSR);
+		channel32_writel(dc, CSR, csr);
+	}
+	/* For dynamic chain, we should look at XFACT instead of NCHNC */
+	if (!(csr & (TXX9_DMA_CSR_XFACT | TXX9_DMA_CSR_ABCHC))) {
+		/* Everything we've submitted is done */
+		txx9dmac_complete_all(dc);
+		return;
+	}
+	if (!(csr & TXX9_DMA_CSR_CHNEN))
+		chain = 0;	/* last descriptor of this chain */
+
+	dev_vdbg(chan2dev(&dc->chan), "scan_descriptors: char=%#llx\n",
+		 (u64)chain);
+
+	list_for_each_entry_safe(desc, _desc, &dc->active_list, desc_node) {
+		if (desc_read_CHAR(dc, desc) == chain) {
+			/* This one is currently in progress */
+			if (csr & TXX9_DMA_CSR_ABCHC)
+				goto scan_done;
+			return;
+		}
+
+		list_for_each_entry(child, &desc->tx_list, desc_node)
+			if (desc_read_CHAR(dc, child) == chain) {
+				/* Currently in progress */
+				if (csr & TXX9_DMA_CSR_ABCHC)
+					goto scan_done;
+				return;
+			}
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this one must be done.
+		 */
+		txx9dmac_descriptor_complete(dc, desc);
+	}
+scan_done:
+	if (csr & TXX9_DMA_CSR_ABCHC) {
+		txx9dmac_handle_error(dc, csr);
+		return;
+	}
+
+	dev_err(chan2dev(&dc->chan),
+		"BUG: All descriptors done, but channel not idle!\n");
+
+	/* Try to continue after resetting the channel... */
+	txx9dmac_reset_chan(dc);
+
+	if (!list_empty(&dc->queue)) {
+		txx9dmac_dequeue(dc, &dc->active_list);
+		txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+	}
+}
+
+static void txx9dmac_chan_tasklet(unsigned long data)
+{
+	int irq;
+	u32 csr;
+	struct txx9dmac_chan *dc;
+
+	dc = (struct txx9dmac_chan *)data;
+	csr = channel_readl(dc, CSR);
+	dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n", csr);
+
+	spin_lock(&dc->lock);
+	if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+		   TXX9_DMA_CSR_NTRNFC))
+		txx9dmac_scan_descriptors(dc);
+	spin_unlock(&dc->lock);
+	irq = dc->irq;
+
+	enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_chan_interrupt(int irq, void *dev_id)
+{
+	struct txx9dmac_chan *dc = dev_id;
+
+	dev_vdbg(chan2dev(&dc->chan), "interrupt: status=%#x\n",
+			channel_readl(dc, CSR));
+
+	tasklet_schedule(&dc->tasklet);
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	disable_irq_nosync(irq);
+
+	return IRQ_HANDLED;
+}
+
+static void txx9dmac_tasklet(unsigned long data)
+{
+	int irq;
+	u32 csr;
+	struct txx9dmac_chan *dc;
+
+	struct txx9dmac_dev *ddev = (struct txx9dmac_dev *)data;
+	u32 mcr;
+	int i;
+
+	mcr = dma_readl(ddev, MCR);
+	dev_vdbg(ddev->chan[0]->dma.dev, "tasklet: mcr=%x\n", mcr);
+	for (i = 0; i < TXX9_DMA_MAX_NR_CHANNELS; i++) {
+		if ((mcr >> (24 + i)) & 0x11) {
+			dc = ddev->chan[i];
+			csr = channel_readl(dc, CSR);
+			dev_vdbg(chan2dev(&dc->chan), "tasklet: status=%x\n",
+				 csr);
+			spin_lock(&dc->lock);
+			if (csr & (TXX9_DMA_CSR_ABCHC | TXX9_DMA_CSR_NCHNC |
+				   TXX9_DMA_CSR_NTRNFC))
+				txx9dmac_scan_descriptors(dc);
+			spin_unlock(&dc->lock);
+		}
+	}
+	irq = ddev->irq;
+
+	enable_irq(irq);
+}
+
+static irqreturn_t txx9dmac_interrupt(int irq, void *dev_id)
+{
+	struct txx9dmac_dev *ddev = dev_id;
+
+	dev_vdbg(ddev->chan[0]->dma.dev, "interrupt: status=%#x\n",
+			dma_readl(ddev, MCR));
+
+	tasklet_schedule(&ddev->tasklet);
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	disable_irq_nosync(irq);
+
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t txx9dmac_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct txx9dmac_desc *desc = txd_to_txx9dmac_desc(tx);
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(tx->chan);
+	dma_cookie_t cookie;
+
+	spin_lock_bh(&dc->lock);
+	cookie = dma_cookie_assign(tx);
+
+	dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u %p\n",
+		 desc->txd.cookie, desc);
+
+	list_add_tail(&desc->desc_node, &dc->queue);
+	spin_unlock_bh(&dc->lock);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	struct txx9dmac_desc *first;
+	struct txx9dmac_desc *prev;
+	size_t xfer_count;
+	size_t offset;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy d%#llx s%#llx l%#zx f%#lx\n",
+		 (u64)dest, (u64)src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	prev = first = NULL;
+
+	for (offset = 0; offset < len; offset += xfer_count) {
+		xfer_count = min_t(size_t, len - offset, TXX9_DMA_MAX_COUNT);
+		/*
+		 * Workaround for ERT-TX49H2-033, ERT-TX49H3-020,
+		 * ERT-TX49H4-016 (slightly conservative)
+		 */
+		if (__is_dmac64(ddev)) {
+			if (xfer_count > 0x100 &&
+			    (xfer_count & 0xff) >= 0xfa &&
+			    (xfer_count & 0xff) <= 0xff)
+				xfer_count -= 0x20;
+		} else {
+			if (xfer_count > 0x80 &&
+			    (xfer_count & 0x7f) >= 0x7e &&
+			    (xfer_count & 0x7f) <= 0x7f)
+				xfer_count -= 0x20;
+		}
+
+		desc = txx9dmac_desc_get(dc);
+		if (!desc) {
+			txx9dmac_desc_put(dc, first);
+			return NULL;
+		}
+
+		if (__is_dmac64(ddev)) {
+			desc->hwdesc.SAR = src + offset;
+			desc->hwdesc.DAR = dest + offset;
+			desc->hwdesc.CNTR = xfer_count;
+			txx9dmac_desc_set_nosimple(ddev, desc, 8, 8,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+		} else {
+			desc->hwdesc32.SAR = src + offset;
+			desc->hwdesc32.DAR = dest + offset;
+			desc->hwdesc32.CNTR = xfer_count;
+			txx9dmac_desc_set_nosimple(ddev, desc, 4, 4,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+		}
+
+		/*
+		 * The descriptors on tx_list are not reachable from
+		 * the dc->queue list or dc->active_list after a
+		 * submit.  If we put all descriptors on active_list,
+		 * calling of callback on the completion will be more
+		 * complex.
+		 */
+		if (!first) {
+			first = desc;
+		} else {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+					prev->txd.phys, ddev->descsize,
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+		prev = desc;
+	}
+
+	/* Trigger interrupt after last block */
+	if (flags & DMA_PREP_INTERRUPT)
+		txx9dmac_desc_set_INTENT(ddev, prev);
+
+	desc_write_CHAR(dc, prev, 0);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+			prev->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = len;
+
+	return &first->txd;
+}
+
+static struct dma_async_tx_descriptor *
+txx9dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_slave *ds = chan->private;
+	struct txx9dmac_desc *prev;
+	struct txx9dmac_desc *first;
+	unsigned int i;
+	struct scatterlist *sg;
+
+	dev_vdbg(chan2dev(chan), "prep_dma_slave\n");
+
+	BUG_ON(!ds || !ds->reg_width);
+	if (ds->tx_reg)
+		BUG_ON(direction != DMA_MEM_TO_DEV);
+	else
+		BUG_ON(direction != DMA_DEV_TO_MEM);
+	if (unlikely(!sg_len))
+		return NULL;
+
+	prev = first = NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct txx9dmac_desc *desc;
+		dma_addr_t mem;
+		u32 sai, dai;
+
+		desc = txx9dmac_desc_get(dc);
+		if (!desc) {
+			txx9dmac_desc_put(dc, first);
+			return NULL;
+		}
+
+		mem = sg_dma_address(sg);
+
+		if (__is_dmac64(ddev)) {
+			if (direction == DMA_MEM_TO_DEV) {
+				desc->hwdesc.SAR = mem;
+				desc->hwdesc.DAR = ds->tx_reg;
+			} else {
+				desc->hwdesc.SAR = ds->rx_reg;
+				desc->hwdesc.DAR = mem;
+			}
+			desc->hwdesc.CNTR = sg_dma_len(sg);
+		} else {
+			if (direction == DMA_MEM_TO_DEV) {
+				desc->hwdesc32.SAR = mem;
+				desc->hwdesc32.DAR = ds->tx_reg;
+			} else {
+				desc->hwdesc32.SAR = ds->rx_reg;
+				desc->hwdesc32.DAR = mem;
+			}
+			desc->hwdesc32.CNTR = sg_dma_len(sg);
+		}
+		if (direction == DMA_MEM_TO_DEV) {
+			sai = ds->reg_width;
+			dai = 0;
+		} else {
+			sai = 0;
+			dai = ds->reg_width;
+		}
+		txx9dmac_desc_set_nosimple(ddev, desc, sai, dai,
+					dc->ccr | TXX9_DMA_CCR_XFACT);
+
+		if (!first) {
+			first = desc;
+		} else {
+			desc_write_CHAR(dc, prev, desc->txd.phys);
+			dma_sync_single_for_device(chan2parent(&dc->chan),
+					prev->txd.phys,
+					ddev->descsize,
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node, &first->tx_list);
+		}
+		prev = desc;
+	}
+
+	/* Trigger interrupt after last block */
+	if (flags & DMA_PREP_INTERRUPT)
+		txx9dmac_desc_set_INTENT(ddev, prev);
+
+	desc_write_CHAR(dc, prev, 0);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+			prev->txd.phys, ddev->descsize,
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = 0;
+
+	return &first->txd;
+}
+
+static int txx9dmac_terminate_all(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_vdbg(chan2dev(chan), "terminate_all\n");
+	spin_lock_bh(&dc->lock);
+
+	txx9dmac_reset_chan(dc);
+
+	/* active_list entries will end up before queued entries */
+	list_splice_init(&dc->queue, &list);
+	list_splice_init(&dc->active_list, &list);
+
+	spin_unlock_bh(&dc->lock);
+
+	/* Flush all pending and queued descriptors */
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		txx9dmac_descriptor_complete(dc, desc);
+
+	return 0;
+}
+
+static enum dma_status
+txx9dmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+		   struct dma_tx_state *txstate)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(chan, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return DMA_COMPLETE;
+
+	spin_lock_bh(&dc->lock);
+	txx9dmac_scan_descriptors(dc);
+	spin_unlock_bh(&dc->lock);
+
+	return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void txx9dmac_chain_dynamic(struct txx9dmac_chan *dc,
+				   struct txx9dmac_desc *prev)
+{
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc;
+	LIST_HEAD(list);
+
+	prev = txx9dmac_last_child(prev);
+	txx9dmac_dequeue(dc, &list);
+	desc = list_entry(list.next, struct txx9dmac_desc, desc_node);
+	desc_write_CHAR(dc, prev, desc->txd.phys);
+	dma_sync_single_for_device(chan2parent(&dc->chan),
+				   prev->txd.phys, ddev->descsize,
+				   DMA_TO_DEVICE);
+	mmiowb();
+	if (!(channel_readl(dc, CSR) & TXX9_DMA_CSR_CHNEN) &&
+	    channel_read_CHAR(dc) == prev->txd.phys)
+		/* Restart chain DMA */
+		channel_write_CHAR(dc, desc->txd.phys);
+	list_splice_tail(&list, &dc->active_list);
+}
+
+static void txx9dmac_issue_pending(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+
+	spin_lock_bh(&dc->lock);
+
+	if (!list_empty(&dc->active_list))
+		txx9dmac_scan_descriptors(dc);
+	if (!list_empty(&dc->queue)) {
+		if (list_empty(&dc->active_list)) {
+			txx9dmac_dequeue(dc, &dc->active_list);
+			txx9dmac_dostart(dc, txx9dmac_first_active(dc));
+		} else if (txx9_dma_have_SMPCHN()) {
+			struct txx9dmac_desc *prev = txx9dmac_last_active(dc);
+
+			if (!(prev->txd.flags & DMA_PREP_INTERRUPT) ||
+			    txx9dmac_chan_INTENT(dc))
+				txx9dmac_chain_dynamic(dc, prev);
+		}
+	}
+
+	spin_unlock_bh(&dc->lock);
+}
+
+static int txx9dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_slave *ds = chan->private;
+	struct txx9dmac_desc *desc;
+	int i;
+
+	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
+
+	/* ASSERT:  channel is idle */
+	if (channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT) {
+		dev_dbg(chan2dev(chan), "DMA channel not idle?\n");
+		return -EIO;
+	}
+
+	dma_cookie_init(chan);
+
+	dc->ccr = TXX9_DMA_CCR_IMMCHN | TXX9_DMA_CCR_INTENE | CCR_LE;
+	txx9dmac_chan_set_SMPCHN(dc);
+	if (!txx9_dma_have_SMPCHN() || (dc->ccr & TXX9_DMA_CCR_SMPCHN))
+		dc->ccr |= TXX9_DMA_CCR_INTENC;
+	if (chan->device->device_prep_dma_memcpy) {
+		if (ds)
+			return -EINVAL;
+		dc->ccr |= TXX9_DMA_CCR_XFSZ_X8;
+	} else {
+		if (!ds ||
+		    (ds->tx_reg && ds->rx_reg) || (!ds->tx_reg && !ds->rx_reg))
+			return -EINVAL;
+		dc->ccr |= TXX9_DMA_CCR_EXTRQ |
+			TXX9_DMA_CCR_XFSZ(__ffs(ds->reg_width));
+		txx9dmac_chan_set_INTENT(dc);
+	}
+
+	spin_lock_bh(&dc->lock);
+	i = dc->descs_allocated;
+	while (dc->descs_allocated < TXX9_DMA_INITIAL_DESC_COUNT) {
+		spin_unlock_bh(&dc->lock);
+
+		desc = txx9dmac_desc_alloc(dc, GFP_KERNEL);
+		if (!desc) {
+			dev_info(chan2dev(chan),
+				"only allocated %d descriptors\n", i);
+			spin_lock_bh(&dc->lock);
+			break;
+		}
+		txx9dmac_desc_put(dc, desc);
+
+		spin_lock_bh(&dc->lock);
+		i = ++dc->descs_allocated;
+	}
+	spin_unlock_bh(&dc->lock);
+
+	dev_dbg(chan2dev(chan),
+		"alloc_chan_resources allocated %d descriptors\n", i);
+
+	return i;
+}
+
+static void txx9dmac_free_chan_resources(struct dma_chan *chan)
+{
+	struct txx9dmac_chan *dc = to_txx9dmac_chan(chan);
+	struct txx9dmac_dev *ddev = dc->ddev;
+	struct txx9dmac_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n",
+			dc->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&dc->active_list));
+	BUG_ON(!list_empty(&dc->queue));
+	BUG_ON(channel_readl(dc, CSR) & TXX9_DMA_CSR_XFACT);
+
+	spin_lock_bh(&dc->lock);
+	list_splice_init(&dc->free_list, &list);
+	dc->descs_allocated = 0;
+	spin_unlock_bh(&dc->lock);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
+		dma_unmap_single(chan2parent(chan), desc->txd.phys,
+				 ddev->descsize, DMA_TO_DEVICE);
+		kfree(desc);
+	}
+
+	dev_vdbg(chan2dev(chan), "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void txx9dmac_off(struct txx9dmac_dev *ddev)
+{
+	dma_writel(ddev, MCR, 0);
+	mmiowb();
+}
+
+static int __init txx9dmac_chan_probe(struct platform_device *pdev)
+{
+	struct txx9dmac_chan_platform_data *cpdata =
+			dev_get_platdata(&pdev->dev);
+	struct platform_device *dmac_dev = cpdata->dmac_dev;
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&dmac_dev->dev);
+	struct txx9dmac_chan *dc;
+	int err;
+	int ch = pdev->id % TXX9_DMA_MAX_NR_CHANNELS;
+	int irq;
+
+	dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
+	if (!dc)
+		return -ENOMEM;
+
+	dc->dma.dev = &pdev->dev;
+	dc->dma.device_alloc_chan_resources = txx9dmac_alloc_chan_resources;
+	dc->dma.device_free_chan_resources = txx9dmac_free_chan_resources;
+	dc->dma.device_terminate_all = txx9dmac_terminate_all;
+	dc->dma.device_tx_status = txx9dmac_tx_status;
+	dc->dma.device_issue_pending = txx9dmac_issue_pending;
+	if (pdata && pdata->memcpy_chan == ch) {
+		dc->dma.device_prep_dma_memcpy = txx9dmac_prep_dma_memcpy;
+		dma_cap_set(DMA_MEMCPY, dc->dma.cap_mask);
+	} else {
+		dc->dma.device_prep_slave_sg = txx9dmac_prep_slave_sg;
+		dma_cap_set(DMA_SLAVE, dc->dma.cap_mask);
+		dma_cap_set(DMA_PRIVATE, dc->dma.cap_mask);
+	}
+
+	INIT_LIST_HEAD(&dc->dma.channels);
+	dc->ddev = platform_get_drvdata(dmac_dev);
+	if (dc->ddev->irq < 0) {
+		irq = platform_get_irq(pdev, 0);
+		if (irq < 0)
+			return irq;
+		tasklet_init(&dc->tasklet, txx9dmac_chan_tasklet,
+				(unsigned long)dc);
+		dc->irq = irq;
+		err = devm_request_irq(&pdev->dev, dc->irq,
+			txx9dmac_chan_interrupt, 0, dev_name(&pdev->dev), dc);
+		if (err)
+			return err;
+	} else
+		dc->irq = -1;
+	dc->ddev->chan[ch] = dc;
+	dc->chan.device = &dc->dma;
+	list_add_tail(&dc->chan.device_node, &dc->chan.device->channels);
+	dma_cookie_init(&dc->chan);
+
+	if (is_dmac64(dc))
+		dc->ch_regs = &__txx9dmac_regs(dc->ddev)->CHAN[ch];
+	else
+		dc->ch_regs = &__txx9dmac_regs32(dc->ddev)->CHAN[ch];
+	spin_lock_init(&dc->lock);
+
+	INIT_LIST_HEAD(&dc->active_list);
+	INIT_LIST_HEAD(&dc->queue);
+	INIT_LIST_HEAD(&dc->free_list);
+
+	txx9dmac_reset_chan(dc);
+
+	platform_set_drvdata(pdev, dc);
+
+	err = dma_async_device_register(&dc->dma);
+	if (err)
+		return err;
+	dev_dbg(&pdev->dev, "TXx9 DMA Channel (dma%d%s%s)\n",
+		dc->dma.dev_id,
+		dma_has_cap(DMA_MEMCPY, dc->dma.cap_mask) ? " memcpy" : "",
+		dma_has_cap(DMA_SLAVE, dc->dma.cap_mask) ? " slave" : "");
+
+	return 0;
+}
+
+static int txx9dmac_chan_remove(struct platform_device *pdev)
+{
+	struct txx9dmac_chan *dc = platform_get_drvdata(pdev);
+
+
+	dma_async_device_unregister(&dc->dma);
+	if (dc->irq >= 0) {
+		devm_free_irq(&pdev->dev, dc->irq, dc);
+		tasklet_kill(&dc->tasklet);
+	}
+	dc->ddev->chan[pdev->id % TXX9_DMA_MAX_NR_CHANNELS] = NULL;
+	return 0;
+}
+
+static int __init txx9dmac_probe(struct platform_device *pdev)
+{
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	struct resource *io;
+	struct txx9dmac_dev *ddev;
+	u32 mcr;
+	int err;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	ddev = devm_kzalloc(&pdev->dev, sizeof(*ddev), GFP_KERNEL);
+	if (!ddev)
+		return -ENOMEM;
+
+	if (!devm_request_mem_region(&pdev->dev, io->start, resource_size(io),
+				     dev_name(&pdev->dev)))
+		return -EBUSY;
+
+	ddev->regs = devm_ioremap(&pdev->dev, io->start, resource_size(io));
+	if (!ddev->regs)
+		return -ENOMEM;
+	ddev->have_64bit_regs = pdata->have_64bit_regs;
+	if (__is_dmac64(ddev))
+		ddev->descsize = sizeof(struct txx9dmac_hwdesc);
+	else
+		ddev->descsize = sizeof(struct txx9dmac_hwdesc32);
+
+	/* force dma off, just in case */
+	txx9dmac_off(ddev);
+
+	ddev->irq = platform_get_irq(pdev, 0);
+	if (ddev->irq >= 0) {
+		tasklet_init(&ddev->tasklet, txx9dmac_tasklet,
+				(unsigned long)ddev);
+		err = devm_request_irq(&pdev->dev, ddev->irq,
+			txx9dmac_interrupt, 0, dev_name(&pdev->dev), ddev);
+		if (err)
+			return err;
+	}
+
+	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+	if (pdata && pdata->memcpy_chan >= 0)
+		mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+	dma_writel(ddev, MCR, mcr);
+
+	platform_set_drvdata(pdev, ddev);
+	return 0;
+}
+
+static int txx9dmac_remove(struct platform_device *pdev)
+{
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+	txx9dmac_off(ddev);
+	if (ddev->irq >= 0) {
+		devm_free_irq(&pdev->dev, ddev->irq, ddev);
+		tasklet_kill(&ddev->tasklet);
+	}
+	return 0;
+}
+
+static void txx9dmac_shutdown(struct platform_device *pdev)
+{
+	struct txx9dmac_dev *ddev = platform_get_drvdata(pdev);
+
+	txx9dmac_off(ddev);
+}
+
+static int txx9dmac_suspend_noirq(struct device *dev)
+{
+	struct txx9dmac_dev *ddev = dev_get_drvdata(dev);
+
+	txx9dmac_off(ddev);
+	return 0;
+}
+
+static int txx9dmac_resume_noirq(struct device *dev)
+{
+	struct txx9dmac_dev *ddev = dev_get_drvdata(dev);
+	struct txx9dmac_platform_data *pdata = dev_get_platdata(dev);
+	u32 mcr;
+
+	mcr = TXX9_DMA_MCR_MSTEN | MCR_LE;
+	if (pdata && pdata->memcpy_chan >= 0)
+		mcr |= TXX9_DMA_MCR_FIFUM(pdata->memcpy_chan);
+	dma_writel(ddev, MCR, mcr);
+	return 0;
+
+}
+
+static const struct dev_pm_ops txx9dmac_dev_pm_ops = {
+	.suspend_noirq = txx9dmac_suspend_noirq,
+	.resume_noirq = txx9dmac_resume_noirq,
+};
+
+static struct platform_driver txx9dmac_chan_driver = {
+	.remove		= txx9dmac_chan_remove,
+	.driver = {
+		.name	= "txx9dmac-chan",
+	},
+};
+
+static struct platform_driver txx9dmac_driver = {
+	.remove		= txx9dmac_remove,
+	.shutdown	= txx9dmac_shutdown,
+	.driver = {
+		.name	= "txx9dmac",
+		.pm	= &txx9dmac_dev_pm_ops,
+	},
+};
+
+static int __init txx9dmac_init(void)
+{
+	int rc;
+
+	rc = platform_driver_probe(&txx9dmac_driver, txx9dmac_probe);
+	if (!rc) {
+		rc = platform_driver_probe(&txx9dmac_chan_driver,
+					   txx9dmac_chan_probe);
+		if (rc)
+			platform_driver_unregister(&txx9dmac_driver);
+	}
+	return rc;
+}
+module_init(txx9dmac_init);
+
+static void __exit txx9dmac_exit(void)
+{
+	platform_driver_unregister(&txx9dmac_chan_driver);
+	platform_driver_unregister(&txx9dmac_driver);
+}
+module_exit(txx9dmac_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("TXx9 DMA Controller driver");
+MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
+MODULE_ALIAS("platform:txx9dmac");
+MODULE_ALIAS("platform:txx9dmac-chan");
diff --git a/drivers/dma/txx9dmac.h b/drivers/dma/txx9dmac.h
new file mode 100644
index 0000000..f6517b9
--- /dev/null
+++ b/drivers/dma/txx9dmac.h
@@ -0,0 +1,307 @@
+/*
+ * Driver for the TXx9 SoC DMA Controller
+ *
+ * Copyright (C) 2009 Atsushi Nemoto
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef TXX9DMAC_H
+#define TXX9DMAC_H
+
+#include <linux/dmaengine.h>
+#include <asm/txx9/dmac.h>
+
+/*
+ * Design Notes:
+ *
+ * This DMAC have four channels and one FIFO buffer.  Each channel can
+ * be configured for memory-memory or device-memory transfer, but only
+ * one channel can do alignment-free memory-memory transfer at a time
+ * while the channel should occupy the FIFO buffer for effective
+ * transfers.
+ *
+ * Instead of dynamically assign the FIFO buffer to channels, I chose
+ * make one dedicated channel for memory-memory transfer.  The
+ * dedicated channel is public.  Other channels are private and used
+ * for slave transfer.  Some devices in the SoC are wired to certain
+ * DMA channel.
+ */
+
+#ifdef CONFIG_MACH_TX49XX
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+	return true;
+}
+#define TXX9_DMA_USE_SIMPLE_CHAIN
+#else
+static inline bool txx9_dma_have_SMPCHN(void)
+{
+	return false;
+}
+#endif
+
+#ifdef __LITTLE_ENDIAN
+#ifdef CONFIG_MACH_TX49XX
+#define CCR_LE	TXX9_DMA_CCR_LE
+#define MCR_LE	0
+#else
+#define CCR_LE	0
+#define MCR_LE	TXX9_DMA_MCR_LE
+#endif
+#else
+#define CCR_LE	0
+#define MCR_LE	0
+#endif
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#ifdef __BIG_ENDIAN
+#define TXX9_DMA_REG32(name)		u32 __pad_##name; u32 name
+#else
+#define TXX9_DMA_REG32(name)		u32 name; u32 __pad_##name
+#endif
+
+/* Hardware register definitions. */
+struct txx9dmac_cregs {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+	TXX9_DMA_REG32(CHAR);	/* Chain Address Register */
+#else
+	u64 CHAR;		/* Chain Address Register */
+#endif
+	u64 SAR;		/* Source Address Register */
+	u64 DAR;		/* Destination Address Register */
+	TXX9_DMA_REG32(CNTR);	/* Count Register */
+	TXX9_DMA_REG32(SAIR);	/* Source Address Increment Register */
+	TXX9_DMA_REG32(DAIR);	/* Destination Address Increment Register */
+	TXX9_DMA_REG32(CCR);	/* Channel Control Register */
+	TXX9_DMA_REG32(CSR);	/* Channel Status Register */
+};
+struct txx9dmac_cregs32 {
+	u32 CHAR;
+	u32 SAR;
+	u32 DAR;
+	u32 CNTR;
+	u32 SAIR;
+	u32 DAIR;
+	u32 CCR;
+	u32 CSR;
+};
+
+struct txx9dmac_regs {
+	/* per-channel registers */
+	struct txx9dmac_cregs	CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+	u64	__pad[9];
+	u64	MFDR;		/* Memory Fill Data Register */
+	TXX9_DMA_REG32(MCR);	/* Master Control Register */
+};
+struct txx9dmac_regs32 {
+	struct txx9dmac_cregs32	CHAN[TXX9_DMA_MAX_NR_CHANNELS];
+	u32	__pad[9];
+	u32	MFDR;
+	u32	MCR;
+};
+
+/* bits for MCR */
+#define TXX9_DMA_MCR_EIS(ch)	(0x10000000<<(ch))
+#define TXX9_DMA_MCR_DIS(ch)	(0x01000000<<(ch))
+#define TXX9_DMA_MCR_RSFIF	0x00000080
+#define TXX9_DMA_MCR_FIFUM(ch)	(0x00000008<<(ch))
+#define TXX9_DMA_MCR_LE		0x00000004
+#define TXX9_DMA_MCR_RPRT	0x00000002
+#define TXX9_DMA_MCR_MSTEN	0x00000001
+
+/* bits for CCRn */
+#define TXX9_DMA_CCR_IMMCHN	0x20000000
+#define TXX9_DMA_CCR_USEXFSZ	0x10000000
+#define TXX9_DMA_CCR_LE		0x08000000
+#define TXX9_DMA_CCR_DBINH	0x04000000
+#define TXX9_DMA_CCR_SBINH	0x02000000
+#define TXX9_DMA_CCR_CHRST	0x01000000
+#define TXX9_DMA_CCR_RVBYTE	0x00800000
+#define TXX9_DMA_CCR_ACKPOL	0x00400000
+#define TXX9_DMA_CCR_REQPL	0x00200000
+#define TXX9_DMA_CCR_EGREQ	0x00100000
+#define TXX9_DMA_CCR_CHDN	0x00080000
+#define TXX9_DMA_CCR_DNCTL	0x00060000
+#define TXX9_DMA_CCR_EXTRQ	0x00010000
+#define TXX9_DMA_CCR_INTRQD	0x0000e000
+#define TXX9_DMA_CCR_INTENE	0x00001000
+#define TXX9_DMA_CCR_INTENC	0x00000800
+#define TXX9_DMA_CCR_INTENT	0x00000400
+#define TXX9_DMA_CCR_CHNEN	0x00000200
+#define TXX9_DMA_CCR_XFACT	0x00000100
+#define TXX9_DMA_CCR_SMPCHN	0x00000020
+#define TXX9_DMA_CCR_XFSZ(order)	(((order) << 2) & 0x0000001c)
+#define TXX9_DMA_CCR_XFSZ_1	TXX9_DMA_CCR_XFSZ(0)
+#define TXX9_DMA_CCR_XFSZ_2	TXX9_DMA_CCR_XFSZ(1)
+#define TXX9_DMA_CCR_XFSZ_4	TXX9_DMA_CCR_XFSZ(2)
+#define TXX9_DMA_CCR_XFSZ_8	TXX9_DMA_CCR_XFSZ(3)
+#define TXX9_DMA_CCR_XFSZ_X4	TXX9_DMA_CCR_XFSZ(4)
+#define TXX9_DMA_CCR_XFSZ_X8	TXX9_DMA_CCR_XFSZ(5)
+#define TXX9_DMA_CCR_XFSZ_X16	TXX9_DMA_CCR_XFSZ(6)
+#define TXX9_DMA_CCR_XFSZ_X32	TXX9_DMA_CCR_XFSZ(7)
+#define TXX9_DMA_CCR_MEMIO	0x00000002
+#define TXX9_DMA_CCR_SNGAD	0x00000001
+
+/* bits for CSRn */
+#define TXX9_DMA_CSR_CHNEN	0x00000400
+#define TXX9_DMA_CSR_STLXFER	0x00000200
+#define TXX9_DMA_CSR_XFACT	0x00000100
+#define TXX9_DMA_CSR_ABCHC	0x00000080
+#define TXX9_DMA_CSR_NCHNC	0x00000040
+#define TXX9_DMA_CSR_NTRNFC	0x00000020
+#define TXX9_DMA_CSR_EXTDN	0x00000010
+#define TXX9_DMA_CSR_CFERR	0x00000008
+#define TXX9_DMA_CSR_CHERR	0x00000004
+#define TXX9_DMA_CSR_DESERR	0x00000002
+#define TXX9_DMA_CSR_SORERR	0x00000001
+
+struct txx9dmac_chan {
+	struct dma_chan		chan;
+	struct dma_device	dma;
+	struct txx9dmac_dev	*ddev;
+	void __iomem		*ch_regs;
+	struct tasklet_struct	tasklet;
+	int			irq;
+	u32			ccr;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+
+	unsigned int		descs_allocated;
+};
+
+struct txx9dmac_dev {
+	void __iomem		*regs;
+	struct tasklet_struct	tasklet;
+	int			irq;
+	struct txx9dmac_chan	*chan[TXX9_DMA_MAX_NR_CHANNELS];
+	bool			have_64bit_regs;
+	unsigned int		descsize;
+};
+
+static inline bool __is_dmac64(const struct txx9dmac_dev *ddev)
+{
+	return ddev->have_64bit_regs;
+}
+
+static inline bool is_dmac64(const struct txx9dmac_chan *dc)
+{
+	return __is_dmac64(dc->ddev);
+}
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+/* Hardware descriptor definition. (for simple-chain) */
+struct txx9dmac_hwdesc {
+#if defined(CONFIG_32BIT) && !defined(CONFIG_PHYS_ADDR_T_64BIT)
+	TXX9_DMA_REG32(CHAR);
+#else
+	u64 CHAR;
+#endif
+	u64 SAR;
+	u64 DAR;
+	TXX9_DMA_REG32(CNTR);
+};
+struct txx9dmac_hwdesc32 {
+	u32 CHAR;
+	u32 SAR;
+	u32 DAR;
+	u32 CNTR;
+};
+#else
+#define txx9dmac_hwdesc txx9dmac_cregs
+#define txx9dmac_hwdesc32 txx9dmac_cregs32
+#endif
+
+struct txx9dmac_desc {
+	/* FIRST values the hardware uses */
+	union {
+		struct txx9dmac_hwdesc hwdesc;
+		struct txx9dmac_hwdesc32 hwdesc32;
+	};
+
+	/* THEN values for driver housekeeping */
+	struct list_head		desc_node ____cacheline_aligned;
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+};
+
+#ifdef TXX9_DMA_USE_SIMPLE_CHAIN
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+	return (dc->ccr & TXX9_DMA_CCR_INTENT) != 0;
+}
+
+static inline void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+	dc->ccr |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+					    struct txx9dmac_desc *desc)
+{
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+	dc->ccr |= TXX9_DMA_CCR_SMPCHN;
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+					      struct txx9dmac_desc *desc,
+					      u32 sair, u32 dair, u32 ccr)
+{
+}
+
+#else /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+static inline bool txx9dmac_chan_INTENT(struct txx9dmac_chan *dc)
+{
+	return true;
+}
+
+static void txx9dmac_chan_set_INTENT(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_INTENT(struct txx9dmac_dev *ddev,
+					    struct txx9dmac_desc *desc)
+{
+	if (__is_dmac64(ddev))
+		desc->hwdesc.CCR |= TXX9_DMA_CCR_INTENT;
+	else
+		desc->hwdesc32.CCR |= TXX9_DMA_CCR_INTENT;
+}
+
+static inline void txx9dmac_chan_set_SMPCHN(struct txx9dmac_chan *dc)
+{
+}
+
+static inline void txx9dmac_desc_set_nosimple(struct txx9dmac_dev *ddev,
+					      struct txx9dmac_desc *desc,
+					      u32 sai, u32 dai, u32 ccr)
+{
+	if (__is_dmac64(ddev)) {
+		desc->hwdesc.SAIR = sai;
+		desc->hwdesc.DAIR = dai;
+		desc->hwdesc.CCR = ccr;
+	} else {
+		desc->hwdesc32.SAIR = sai;
+		desc->hwdesc32.DAIR = dai;
+		desc->hwdesc32.CCR = ccr;
+	}
+}
+
+#endif /* TXX9_DMA_USE_SIMPLE_CHAIN */
+
+#endif /* TXX9DMAC_H */
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
new file mode 100644
index 0000000..88ad8ed
--- /dev/null
+++ b/drivers/dma/virt-dma.c
@@ -0,0 +1,150 @@
+/*
+ * Virtual DMA channel support for DMAengine
+ *
+ * Copyright (C) 2012 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+static struct virt_dma_desc *to_virt_desc(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct virt_dma_desc, tx);
+}
+
+dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct virt_dma_desc *vd = to_virt_desc(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	list_move_tail(&vd->node, &vc->desc_submitted);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: submitted\n",
+		vc, vd, cookie);
+
+	return cookie;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_submit);
+
+/**
+ * vchan_tx_desc_free - free a reusable descriptor
+ * @tx: the transfer
+ *
+ * This function frees a previously allocated reusable descriptor. The only
+ * other way is to clear the DMA_CTRL_REUSE flag and submit one last time the
+ * transfer.
+ *
+ * Returns 0 upon success
+ */
+int vchan_tx_desc_free(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct virt_dma_desc *vd = to_virt_desc(tx);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_del(&vd->node);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	dev_dbg(vc->chan.device->dev, "vchan %p: txd %p[%x]: freeing\n",
+		vc, vd, vd->tx.cookie);
+	vc->desc_free(vd);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vchan_tx_desc_free);
+
+struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *vc,
+	dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd;
+
+	list_for_each_entry(vd, &vc->desc_issued, node)
+		if (vd->tx.cookie == cookie)
+			return vd;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(vchan_find_desc);
+
+/*
+ * This tasklet handles the completion of a DMA descriptor by
+ * calling its callback and freeing it.
+ */
+static void vchan_complete(unsigned long arg)
+{
+	struct virt_dma_chan *vc = (struct virt_dma_chan *)arg;
+	struct virt_dma_desc *vd, *_vd;
+	struct dmaengine_desc_callback cb;
+	LIST_HEAD(head);
+
+	spin_lock_irq(&vc->lock);
+	list_splice_tail_init(&vc->desc_completed, &head);
+	vd = vc->cyclic;
+	if (vd) {
+		vc->cyclic = NULL;
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+	} else {
+		memset(&cb, 0, sizeof(cb));
+	}
+	spin_unlock_irq(&vc->lock);
+
+	dmaengine_desc_callback_invoke(&cb, NULL);
+
+	list_for_each_entry_safe(vd, _vd, &head, node) {
+		dmaengine_desc_get_callback(&vd->tx, &cb);
+
+		list_del(&vd->node);
+		vchan_vdesc_fini(vd);
+
+		dmaengine_desc_callback_invoke(&cb, NULL);
+	}
+}
+
+void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head)
+{
+	struct virt_dma_desc *vd, *_vd;
+
+	list_for_each_entry_safe(vd, _vd, head, node) {
+		if (dmaengine_desc_test_reuse(&vd->tx)) {
+			list_move_tail(&vd->node, &vc->desc_allocated);
+		} else {
+			dev_dbg(vc->chan.device->dev, "txd %p: freeing\n", vd);
+			list_del(&vd->node);
+			vc->desc_free(vd);
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(vchan_dma_desc_free_list);
+
+void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev)
+{
+	dma_cookie_init(&vc->chan);
+
+	spin_lock_init(&vc->lock);
+	INIT_LIST_HEAD(&vc->desc_allocated);
+	INIT_LIST_HEAD(&vc->desc_submitted);
+	INIT_LIST_HEAD(&vc->desc_issued);
+	INIT_LIST_HEAD(&vc->desc_completed);
+
+	tasklet_init(&vc->task, vchan_complete, (unsigned long)vc);
+
+	vc->chan.device = dmadev;
+	list_add_tail(&vc->chan.device_node, &dmadev->channels);
+}
+EXPORT_SYMBOL_GPL(vchan_init);
+
+MODULE_AUTHOR("Russell King");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/virt-dma.h b/drivers/dma/virt-dma.h
new file mode 100644
index 0000000..b09b75a
--- /dev/null
+++ b/drivers/dma/virt-dma.h
@@ -0,0 +1,221 @@
+/*
+ * Virtual DMA channel support for DMAengine
+ *
+ * Copyright (C) 2012 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef VIRT_DMA_H
+#define VIRT_DMA_H
+
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#include "dmaengine.h"
+
+struct virt_dma_desc {
+	struct dma_async_tx_descriptor tx;
+	/* protected by vc.lock */
+	struct list_head node;
+};
+
+struct virt_dma_chan {
+	struct dma_chan	chan;
+	struct tasklet_struct task;
+	void (*desc_free)(struct virt_dma_desc *);
+
+	spinlock_t lock;
+
+	/* protected by vc.lock */
+	struct list_head desc_allocated;
+	struct list_head desc_submitted;
+	struct list_head desc_issued;
+	struct list_head desc_completed;
+
+	struct virt_dma_desc *cyclic;
+	struct virt_dma_desc *vd_terminated;
+};
+
+static inline struct virt_dma_chan *to_virt_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct virt_dma_chan, chan);
+}
+
+void vchan_dma_desc_free_list(struct virt_dma_chan *vc, struct list_head *head);
+void vchan_init(struct virt_dma_chan *vc, struct dma_device *dmadev);
+struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t);
+extern dma_cookie_t vchan_tx_submit(struct dma_async_tx_descriptor *);
+extern int vchan_tx_desc_free(struct dma_async_tx_descriptor *);
+
+/**
+ * vchan_tx_prep - prepare a descriptor
+ * @vc: virtual channel allocating this descriptor
+ * @vd: virtual descriptor to prepare
+ * @tx_flags: flags argument passed in to prepare function
+ */
+static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc,
+	struct virt_dma_desc *vd, unsigned long tx_flags)
+{
+	unsigned long flags;
+
+	dma_async_tx_descriptor_init(&vd->tx, &vc->chan);
+	vd->tx.flags = tx_flags;
+	vd->tx.tx_submit = vchan_tx_submit;
+	vd->tx.desc_free = vchan_tx_desc_free;
+
+	spin_lock_irqsave(&vc->lock, flags);
+	list_add_tail(&vd->node, &vc->desc_allocated);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	return &vd->tx;
+}
+
+/**
+ * vchan_issue_pending - move submitted descriptors to issued list
+ * @vc: virtual channel to update
+ *
+ * vc.lock must be held by caller
+ */
+static inline bool vchan_issue_pending(struct virt_dma_chan *vc)
+{
+	list_splice_tail_init(&vc->desc_submitted, &vc->desc_issued);
+	return !list_empty(&vc->desc_issued);
+}
+
+/**
+ * vchan_cookie_complete - report completion of a descriptor
+ * @vd: virtual descriptor to update
+ *
+ * vc.lock must be held by caller
+ */
+static inline void vchan_cookie_complete(struct virt_dma_desc *vd)
+{
+	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+	dma_cookie_t cookie;
+
+	cookie = vd->tx.cookie;
+	dma_cookie_complete(&vd->tx);
+	dev_vdbg(vc->chan.device->dev, "txd %p[%x]: marked complete\n",
+		 vd, cookie);
+	list_add_tail(&vd->node, &vc->desc_completed);
+
+	tasklet_schedule(&vc->task);
+}
+
+/**
+ * vchan_vdesc_fini - Free or reuse a descriptor
+ * @vd: virtual descriptor to free/reuse
+ */
+static inline void vchan_vdesc_fini(struct virt_dma_desc *vd)
+{
+	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+	if (dmaengine_desc_test_reuse(&vd->tx))
+		list_add(&vd->node, &vc->desc_allocated);
+	else
+		vc->desc_free(vd);
+}
+
+/**
+ * vchan_cyclic_callback - report the completion of a period
+ * @vd: virtual descriptor
+ */
+static inline void vchan_cyclic_callback(struct virt_dma_desc *vd)
+{
+	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+	vc->cyclic = vd;
+	tasklet_schedule(&vc->task);
+}
+
+/**
+ * vchan_terminate_vdesc - Disable pending cyclic callback
+ * @vd: virtual descriptor to be terminated
+ *
+ * vc.lock must be held by caller
+ */
+static inline void vchan_terminate_vdesc(struct virt_dma_desc *vd)
+{
+	struct virt_dma_chan *vc = to_virt_chan(vd->tx.chan);
+
+	/* free up stuck descriptor */
+	if (vc->vd_terminated)
+		vchan_vdesc_fini(vc->vd_terminated);
+
+	vc->vd_terminated = vd;
+	if (vc->cyclic == vd)
+		vc->cyclic = NULL;
+}
+
+/**
+ * vchan_next_desc - peek at the next descriptor to be processed
+ * @vc: virtual channel to obtain descriptor from
+ *
+ * vc.lock must be held by caller
+ */
+static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
+{
+	return list_first_entry_or_null(&vc->desc_issued,
+					struct virt_dma_desc, node);
+}
+
+/**
+ * vchan_get_all_descriptors - obtain all submitted and issued descriptors
+ * @vc: virtual channel to get descriptors from
+ * @head: list of descriptors found
+ *
+ * vc.lock must be held by caller
+ *
+ * Removes all submitted and issued descriptors from internal lists, and
+ * provides a list of all descriptors found
+ */
+static inline void vchan_get_all_descriptors(struct virt_dma_chan *vc,
+	struct list_head *head)
+{
+	list_splice_tail_init(&vc->desc_allocated, head);
+	list_splice_tail_init(&vc->desc_submitted, head);
+	list_splice_tail_init(&vc->desc_issued, head);
+	list_splice_tail_init(&vc->desc_completed, head);
+}
+
+static inline void vchan_free_chan_resources(struct virt_dma_chan *vc)
+{
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	vchan_get_all_descriptors(vc, &head);
+	list_for_each_entry(vd, &head, node)
+		dmaengine_desc_clear_reuse(&vd->tx);
+	spin_unlock_irqrestore(&vc->lock, flags);
+
+	vchan_dma_desc_free_list(vc, &head);
+}
+
+/**
+ * vchan_synchronize() - synchronize callback execution to the current context
+ * @vc: virtual channel to synchronize
+ *
+ * Makes sure that all scheduled or active callbacks have finished running. For
+ * proper operation the caller has to ensure that no new callbacks are scheduled
+ * after the invocation of this function started.
+ * Free up the terminated cyclic descriptor to prevent memory leakage.
+ */
+static inline void vchan_synchronize(struct virt_dma_chan *vc)
+{
+	unsigned long flags;
+
+	tasklet_kill(&vc->task);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	if (vc->vd_terminated) {
+		vchan_vdesc_fini(vc->vd_terminated);
+		vc->vd_terminated = NULL;
+	}
+	spin_unlock_irqrestore(&vc->lock, flags);
+}
+
+#endif
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
new file mode 100644
index 0000000..1d59888
--- /dev/null
+++ b/drivers/dma/xgene-dma.c
@@ -0,0 +1,1848 @@
+/*
+ * Applied Micro X-Gene SoC DMA engine Driver
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Authors: Rameshwar Prasad Sahu <rsahu@apm.com>
+ *	    Loc Ho <lho@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * NOTE: PM support is currently not available.
+ */
+
+#include <linux/acpi.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include "dmaengine.h"
+
+/* X-Gene DMA ring csr registers and bit definations */
+#define XGENE_DMA_RING_CONFIG			0x04
+#define XGENE_DMA_RING_ENABLE			BIT(31)
+#define XGENE_DMA_RING_ID			0x08
+#define XGENE_DMA_RING_ID_SETUP(v)		((v) | BIT(31))
+#define XGENE_DMA_RING_ID_BUF			0x0C
+#define XGENE_DMA_RING_ID_BUF_SETUP(v)		(((v) << 9) | BIT(21))
+#define XGENE_DMA_RING_THRESLD0_SET1		0x30
+#define XGENE_DMA_RING_THRESLD0_SET1_VAL	0X64
+#define XGENE_DMA_RING_THRESLD1_SET1		0x34
+#define XGENE_DMA_RING_THRESLD1_SET1_VAL	0xC8
+#define XGENE_DMA_RING_HYSTERESIS		0x68
+#define XGENE_DMA_RING_HYSTERESIS_VAL		0xFFFFFFFF
+#define XGENE_DMA_RING_STATE			0x6C
+#define XGENE_DMA_RING_STATE_WR_BASE		0x70
+#define XGENE_DMA_RING_NE_INT_MODE		0x017C
+#define XGENE_DMA_RING_NE_INT_MODE_SET(m, v)	\
+	((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v)))
+#define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v)	\
+	((m) &= (~BIT(31 - (v))))
+#define XGENE_DMA_RING_CLKEN			0xC208
+#define XGENE_DMA_RING_SRST			0xC200
+#define XGENE_DMA_RING_MEM_RAM_SHUTDOWN		0xD070
+#define XGENE_DMA_RING_BLK_MEM_RDY		0xD074
+#define XGENE_DMA_RING_BLK_MEM_RDY_VAL		0xFFFFFFFF
+#define XGENE_DMA_RING_ID_GET(owner, num)	(((owner) << 6) | (num))
+#define XGENE_DMA_RING_DST_ID(v)		((1 << 10) | (v))
+#define XGENE_DMA_RING_CMD_OFFSET		0x2C
+#define XGENE_DMA_RING_CMD_BASE_OFFSET(v)	((v) << 6)
+#define XGENE_DMA_RING_COHERENT_SET(m)		\
+	(((u32 *)(m))[2] |= BIT(4))
+#define XGENE_DMA_RING_ADDRL_SET(m, v)		\
+	(((u32 *)(m))[2] |= (((v) >> 8) << 5))
+#define XGENE_DMA_RING_ADDRH_SET(m, v)		\
+	(((u32 *)(m))[3] |= ((v) >> 35))
+#define XGENE_DMA_RING_ACCEPTLERR_SET(m)	\
+	(((u32 *)(m))[3] |= BIT(19))
+#define XGENE_DMA_RING_SIZE_SET(m, v)		\
+	(((u32 *)(m))[3] |= ((v) << 23))
+#define XGENE_DMA_RING_RECOMBBUF_SET(m)		\
+	(((u32 *)(m))[3] |= BIT(27))
+#define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m)	\
+	(((u32 *)(m))[3] |= (0x7 << 28))
+#define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m)	\
+	(((u32 *)(m))[4] |= 0x3)
+#define XGENE_DMA_RING_SELTHRSH_SET(m)		\
+	(((u32 *)(m))[4] |= BIT(3))
+#define XGENE_DMA_RING_TYPE_SET(m, v)		\
+	(((u32 *)(m))[4] |= ((v) << 19))
+
+/* X-Gene DMA device csr registers and bit definitions */
+#define XGENE_DMA_IPBRR				0x0
+#define XGENE_DMA_DEV_ID_RD(v)			((v) & 0x00000FFF)
+#define XGENE_DMA_BUS_ID_RD(v)			(((v) >> 12) & 3)
+#define XGENE_DMA_REV_NO_RD(v)			(((v) >> 14) & 3)
+#define XGENE_DMA_GCR				0x10
+#define XGENE_DMA_CH_SETUP(v)			\
+	((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF)
+#define XGENE_DMA_ENABLE(v)			((v) |= BIT(31))
+#define XGENE_DMA_DISABLE(v)			((v) &= ~BIT(31))
+#define XGENE_DMA_RAID6_CONT			0x14
+#define XGENE_DMA_RAID6_MULTI_CTRL(v)		((v) << 24)
+#define XGENE_DMA_INT				0x70
+#define XGENE_DMA_INT_MASK			0x74
+#define XGENE_DMA_INT_ALL_MASK			0xFFFFFFFF
+#define XGENE_DMA_INT_ALL_UNMASK		0x0
+#define XGENE_DMA_INT_MASK_SHIFT		0x14
+#define XGENE_DMA_RING_INT0_MASK		0x90A0
+#define XGENE_DMA_RING_INT1_MASK		0x90A8
+#define XGENE_DMA_RING_INT2_MASK		0x90B0
+#define XGENE_DMA_RING_INT3_MASK		0x90B8
+#define XGENE_DMA_RING_INT4_MASK		0x90C0
+#define XGENE_DMA_CFG_RING_WQ_ASSOC		0x90E0
+#define XGENE_DMA_ASSOC_RING_MNGR1		0xFFFFFFFF
+#define XGENE_DMA_MEM_RAM_SHUTDOWN		0xD070
+#define XGENE_DMA_BLK_MEM_RDY			0xD074
+#define XGENE_DMA_BLK_MEM_RDY_VAL		0xFFFFFFFF
+#define XGENE_DMA_RING_CMD_SM_OFFSET		0x8000
+
+/* X-Gene SoC EFUSE csr register and bit defination */
+#define XGENE_SOC_JTAG1_SHADOW			0x18
+#define XGENE_DMA_PQ_DISABLE_MASK		BIT(13)
+
+/* X-Gene DMA Descriptor format */
+#define XGENE_DMA_DESC_NV_BIT			BIT_ULL(50)
+#define XGENE_DMA_DESC_IN_BIT			BIT_ULL(55)
+#define XGENE_DMA_DESC_C_BIT			BIT_ULL(63)
+#define XGENE_DMA_DESC_DR_BIT			BIT_ULL(61)
+#define XGENE_DMA_DESC_ELERR_POS		46
+#define XGENE_DMA_DESC_RTYPE_POS		56
+#define XGENE_DMA_DESC_LERR_POS			60
+#define XGENE_DMA_DESC_BUFLEN_POS		48
+#define XGENE_DMA_DESC_HOENQ_NUM_POS		48
+#define XGENE_DMA_DESC_ELERR_RD(m)		\
+	(((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
+#define XGENE_DMA_DESC_LERR_RD(m)		\
+	(((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7)
+#define XGENE_DMA_DESC_STATUS(elerr, lerr)	\
+	(((elerr) << 4) | (lerr))
+
+/* X-Gene DMA descriptor empty s/w signature */
+#define XGENE_DMA_DESC_EMPTY_SIGNATURE		~0ULL
+
+/* X-Gene DMA configurable parameters defines */
+#define XGENE_DMA_RING_NUM		512
+#define XGENE_DMA_BUFNUM		0x0
+#define XGENE_DMA_CPU_BUFNUM		0x18
+#define XGENE_DMA_RING_OWNER_DMA	0x03
+#define XGENE_DMA_RING_OWNER_CPU	0x0F
+#define XGENE_DMA_RING_TYPE_REGULAR	0x01
+#define XGENE_DMA_RING_WQ_DESC_SIZE	32	/* 32 Bytes */
+#define XGENE_DMA_RING_NUM_CONFIG	5
+#define XGENE_DMA_MAX_CHANNEL		4
+#define XGENE_DMA_XOR_CHANNEL		0
+#define XGENE_DMA_PQ_CHANNEL		1
+#define XGENE_DMA_MAX_BYTE_CNT		0x4000	/* 16 KB */
+#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT	0x14000	/* 80 KB */
+#define XGENE_DMA_MAX_XOR_SRC		5
+#define XGENE_DMA_16K_BUFFER_LEN_CODE	0x0
+#define XGENE_DMA_INVALID_LEN_CODE	0x7800000000000000ULL
+
+/* X-Gene DMA descriptor error codes */
+#define ERR_DESC_AXI			0x01
+#define ERR_BAD_DESC			0x02
+#define ERR_READ_DATA_AXI		0x03
+#define ERR_WRITE_DATA_AXI		0x04
+#define ERR_FBP_TIMEOUT			0x05
+#define ERR_ECC				0x06
+#define ERR_DIFF_SIZE			0x08
+#define ERR_SCT_GAT_LEN			0x09
+#define ERR_CRC_ERR			0x11
+#define ERR_CHKSUM			0x12
+#define ERR_DIF				0x13
+
+/* X-Gene DMA error interrupt codes */
+#define ERR_DIF_SIZE_INT		0x0
+#define ERR_GS_ERR_INT			0x1
+#define ERR_FPB_TIMEO_INT		0x2
+#define ERR_WFIFO_OVF_INT		0x3
+#define ERR_RFIFO_OVF_INT		0x4
+#define ERR_WR_TIMEO_INT		0x5
+#define ERR_RD_TIMEO_INT		0x6
+#define ERR_WR_ERR_INT			0x7
+#define ERR_RD_ERR_INT			0x8
+#define ERR_BAD_DESC_INT		0x9
+#define ERR_DESC_DST_INT		0xA
+#define ERR_DESC_SRC_INT		0xB
+
+/* X-Gene DMA flyby operation code */
+#define FLYBY_2SRC_XOR			0x80
+#define FLYBY_3SRC_XOR			0x90
+#define FLYBY_4SRC_XOR			0xA0
+#define FLYBY_5SRC_XOR			0xB0
+
+/* X-Gene DMA SW descriptor flags */
+#define XGENE_DMA_FLAG_64B_DESC		BIT(0)
+
+/* Define to dump X-Gene DMA descriptor */
+#define XGENE_DMA_DESC_DUMP(desc, m)	\
+	print_hex_dump(KERN_ERR, (m),	\
+			DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0)
+
+#define to_dma_desc_sw(tx)		\
+	container_of(tx, struct xgene_dma_desc_sw, tx)
+#define to_dma_chan(dchan)		\
+	container_of(dchan, struct xgene_dma_chan, dma_chan)
+
+#define chan_dbg(chan, fmt, arg...)	\
+	dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)	\
+	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+struct xgene_dma_desc_hw {
+	__le64 m0;
+	__le64 m1;
+	__le64 m2;
+	__le64 m3;
+};
+
+enum xgene_dma_ring_cfgsize {
+	XGENE_DMA_RING_CFG_SIZE_512B,
+	XGENE_DMA_RING_CFG_SIZE_2KB,
+	XGENE_DMA_RING_CFG_SIZE_16KB,
+	XGENE_DMA_RING_CFG_SIZE_64KB,
+	XGENE_DMA_RING_CFG_SIZE_512KB,
+	XGENE_DMA_RING_CFG_SIZE_INVALID
+};
+
+struct xgene_dma_ring {
+	struct xgene_dma *pdma;
+	u8 buf_num;
+	u16 id;
+	u16 num;
+	u16 head;
+	u16 owner;
+	u16 slots;
+	u16 dst_ring_num;
+	u32 size;
+	void __iomem *cmd;
+	void __iomem *cmd_base;
+	dma_addr_t desc_paddr;
+	u32 state[XGENE_DMA_RING_NUM_CONFIG];
+	enum xgene_dma_ring_cfgsize cfgsize;
+	union {
+		void *desc_vaddr;
+		struct xgene_dma_desc_hw *desc_hw;
+	};
+};
+
+struct xgene_dma_desc_sw {
+	struct xgene_dma_desc_hw desc1;
+	struct xgene_dma_desc_hw desc2;
+	u32 flags;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor tx;
+};
+
+/**
+ * struct xgene_dma_chan - internal representation of an X-Gene DMA channel
+ * @dma_chan: dmaengine channel object member
+ * @pdma: X-Gene DMA device structure reference
+ * @dev: struct device reference for dma mapping api
+ * @id: raw id of this channel
+ * @rx_irq: channel IRQ
+ * @name: name of X-Gene DMA channel
+ * @lock: serializes enqueue/dequeue operations to the descriptor pool
+ * @pending: number of transaction request pushed to DMA controller for
+ *	execution, but still waiting for completion,
+ * @max_outstanding: max number of outstanding request we can push to channel
+ * @ld_pending: descriptors which are queued to run, but have not yet been
+ *	submitted to the hardware for execution
+ * @ld_running: descriptors which are currently being executing by the hardware
+ * @ld_completed: descriptors which have finished execution by the hardware.
+ *	These descriptors have already had their cleanup actions run. They
+ *	are waiting for the ACK bit to be set by the async tx API.
+ * @desc_pool: descriptor pool for DMA operations
+ * @tasklet: bottom half where all completed descriptors cleans
+ * @tx_ring: transmit ring descriptor that we use to prepare actual
+ *	descriptors for further executions
+ * @rx_ring: receive ring descriptor that we use to get completed DMA
+ *	descriptors during cleanup time
+ */
+struct xgene_dma_chan {
+	struct dma_chan dma_chan;
+	struct xgene_dma *pdma;
+	struct device *dev;
+	int id;
+	int rx_irq;
+	char name[10];
+	spinlock_t lock;
+	int pending;
+	int max_outstanding;
+	struct list_head ld_pending;
+	struct list_head ld_running;
+	struct list_head ld_completed;
+	struct dma_pool *desc_pool;
+	struct tasklet_struct tasklet;
+	struct xgene_dma_ring tx_ring;
+	struct xgene_dma_ring rx_ring;
+};
+
+/**
+ * struct xgene_dma - internal representation of an X-Gene DMA device
+ * @err_irq: DMA error irq number
+ * @ring_num: start id number for DMA ring
+ * @csr_dma: base for DMA register access
+ * @csr_ring: base for DMA ring register access
+ * @csr_ring_cmd: base for DMA ring command register access
+ * @csr_efuse: base for efuse register access
+ * @dma_dev: embedded struct dma_device
+ * @chan: reference to X-Gene DMA channels
+ */
+struct xgene_dma {
+	struct device *dev;
+	struct clk *clk;
+	int err_irq;
+	int ring_num;
+	void __iomem *csr_dma;
+	void __iomem *csr_ring;
+	void __iomem *csr_ring_cmd;
+	void __iomem *csr_efuse;
+	struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL];
+	struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL];
+};
+
+static const char * const xgene_dma_desc_err[] = {
+	[ERR_DESC_AXI] = "AXI error when reading src/dst link list",
+	[ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc",
+	[ERR_READ_DATA_AXI] = "AXI error when reading data",
+	[ERR_WRITE_DATA_AXI] = "AXI error when writing data",
+	[ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch",
+	[ERR_ECC] = "ECC double bit error",
+	[ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result",
+	[ERR_SCT_GAT_LEN] = "Gather and scatter data length not same",
+	[ERR_CRC_ERR] = "CRC error",
+	[ERR_CHKSUM] = "Checksum error",
+	[ERR_DIF] = "DIF error",
+};
+
+static const char * const xgene_dma_err[] = {
+	[ERR_DIF_SIZE_INT] = "DIF size error",
+	[ERR_GS_ERR_INT] = "Gather scatter not same size error",
+	[ERR_FPB_TIMEO_INT] = "Free pool time out error",
+	[ERR_WFIFO_OVF_INT] = "Write FIFO over flow error",
+	[ERR_RFIFO_OVF_INT] = "Read FIFO over flow error",
+	[ERR_WR_TIMEO_INT] = "Write time out error",
+	[ERR_RD_TIMEO_INT] = "Read time out error",
+	[ERR_WR_ERR_INT] = "HBF bus write error",
+	[ERR_RD_ERR_INT] = "HBF bus read error",
+	[ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error",
+	[ERR_DESC_DST_INT] = "HFB reading dst link address error",
+	[ERR_DESC_SRC_INT] = "HFB reading src link address error",
+};
+
+static bool is_pq_enabled(struct xgene_dma *pdma)
+{
+	u32 val;
+
+	val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW);
+	return !(val & XGENE_DMA_PQ_DISABLE_MASK);
+}
+
+static u64 xgene_dma_encode_len(size_t len)
+{
+	return (len < XGENE_DMA_MAX_BYTE_CNT) ?
+		((u64)len << XGENE_DMA_DESC_BUFLEN_POS) :
+		XGENE_DMA_16K_BUFFER_LEN_CODE;
+}
+
+static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
+{
+	static u8 flyby_type[] = {
+		FLYBY_2SRC_XOR, /* Dummy */
+		FLYBY_2SRC_XOR, /* Dummy */
+		FLYBY_2SRC_XOR,
+		FLYBY_3SRC_XOR,
+		FLYBY_4SRC_XOR,
+		FLYBY_5SRC_XOR
+	};
+
+	return flyby_type[src_cnt];
+}
+
+static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len,
+				     dma_addr_t *paddr)
+{
+	size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
+			*len : XGENE_DMA_MAX_BYTE_CNT;
+
+	*ext8 |= cpu_to_le64(*paddr);
+	*ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes));
+	*len -= nbytes;
+	*paddr += nbytes;
+}
+
+static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx)
+{
+	switch (idx) {
+	case 0:
+		return &desc->m1;
+	case 1:
+		return &desc->m0;
+	case 2:
+		return &desc->m3;
+	case 3:
+		return &desc->m2;
+	default:
+		pr_err("Invalid dma descriptor index\n");
+	}
+
+	return NULL;
+}
+
+static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc,
+				u16 dst_ring_num)
+{
+	desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+	desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+				XGENE_DMA_DESC_RTYPE_POS);
+	desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+	desc->m3 |= cpu_to_le64((u64)dst_ring_num <<
+				XGENE_DMA_DESC_HOENQ_NUM_POS);
+}
+
+static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
+				    struct xgene_dma_desc_sw *desc_sw,
+				    dma_addr_t *dst, dma_addr_t *src,
+				    u32 src_cnt, size_t *nbytes,
+				    const u8 *scf)
+{
+	struct xgene_dma_desc_hw *desc1, *desc2;
+	size_t len = *nbytes;
+	int i;
+
+	desc1 = &desc_sw->desc1;
+	desc2 = &desc_sw->desc2;
+
+	/* Initialize DMA descriptor */
+	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+	/* Set destination address */
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(*dst);
+
+	/* We have multiple source addresses, so need to set NV bit*/
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
+
+	/* Set flyby opcode */
+	desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt));
+
+	/* Set 1st to 5th source addresses */
+	for (i = 0; i < src_cnt; i++) {
+		len = *nbytes;
+		xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 :
+					 xgene_dma_lookup_ext8(desc2, i - 1),
+					 &len, &src[i]);
+		desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8)));
+	}
+
+	/* Update meta data */
+	*nbytes = len;
+	*dst += XGENE_DMA_MAX_BYTE_CNT;
+
+	/* We need always 64B descriptor to perform xor or pq operations */
+	desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+}
+
+static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct xgene_dma_desc_sw *desc;
+	struct xgene_dma_chan *chan;
+	dma_cookie_t cookie;
+
+	if (unlikely(!tx))
+		return -EINVAL;
+
+	chan = to_dma_chan(tx->chan);
+	desc = to_dma_desc_sw(tx);
+
+	spin_lock_bh(&chan->lock);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Add this transaction list onto the tail of the pending queue */
+	list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+
+	spin_unlock_bh(&chan->lock);
+
+	return cookie;
+}
+
+static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan,
+				       struct xgene_dma_desc_sw *desc)
+{
+	list_del(&desc->node);
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor(
+				 struct xgene_dma_chan *chan)
+{
+	struct xgene_dma_desc_sw *desc;
+	dma_addr_t phys;
+
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys);
+	if (!desc) {
+		chan_err(chan, "Failed to allocate LDs\n");
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&desc->tx_list);
+	desc->tx.phys = phys;
+	desc->tx.tx_submit = xgene_dma_tx_submit;
+	dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan);
+
+	chan_dbg(chan, "LD %p allocated\n", desc);
+
+	return desc;
+}
+
+/**
+ * xgene_dma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ */
+static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan)
+{
+	struct xgene_dma_desc_sw *desc, *_desc;
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
+		if (async_tx_test_ack(&desc->tx))
+			xgene_dma_clean_descriptor(chan, desc);
+	}
+}
+
+/**
+ * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: X-Gene DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan,
+					      struct xgene_dma_desc_sw *desc)
+{
+	struct dma_async_tx_descriptor *tx = &desc->tx;
+
+	/*
+	 * If this is not the last transaction in the group,
+	 * then no need to complete cookie and run any callback as
+	 * this is not the tx_descriptor which had been sent to caller
+	 * of this DMA request
+	 */
+
+	if (tx->cookie == 0)
+		return;
+
+	dma_cookie_complete(tx);
+	dma_descriptor_unmap(tx);
+
+	/* Run the link descriptor callback function */
+	dmaengine_desc_get_callback_invoke(tx, NULL);
+
+	/* Run any dependencies */
+	dma_run_dependencies(tx);
+}
+
+/**
+ * xgene_dma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: X-Gene DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api,
+ * else move it to queue ld_completed.
+ */
+static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan,
+					       struct xgene_dma_desc_sw *desc)
+{
+	/* Remove from the list of running transactions */
+	list_del(&desc->node);
+
+	/*
+	 * the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->tx)) {
+		/*
+		 * Move this descriptor to the list of descriptors which is
+		 * completed, but still awaiting the 'ack' bit to be set.
+		 */
+		list_add_tail(&desc->node, &chan->ld_completed);
+		return;
+	}
+
+	chan_dbg(chan, "LD %p free\n", desc);
+	dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static void xgene_chan_xfer_request(struct xgene_dma_chan *chan,
+				    struct xgene_dma_desc_sw *desc_sw)
+{
+	struct xgene_dma_ring *ring = &chan->tx_ring;
+	struct xgene_dma_desc_hw *desc_hw;
+
+	/* Get hw descriptor from DMA tx ring */
+	desc_hw = &ring->desc_hw[ring->head];
+
+	/*
+	 * Increment the head count to point next
+	 * descriptor for next time
+	 */
+	if (++ring->head == ring->slots)
+		ring->head = 0;
+
+	/* Copy prepared sw descriptor data to hw descriptor */
+	memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw));
+
+	/*
+	 * Check if we have prepared 64B descriptor,
+	 * in this case we need one more hw descriptor
+	 */
+	if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) {
+		desc_hw = &ring->desc_hw[ring->head];
+
+		if (++ring->head == ring->slots)
+			ring->head = 0;
+
+		memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw));
+	}
+
+	/* Increment the pending transaction count */
+	chan->pending += ((desc_sw->flags &
+			  XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
+	/* Notify the hw that we have descriptor ready for execution */
+	iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ?
+		  2 : 1, ring->cmd);
+}
+
+/**
+ * xgene_chan_xfer_ld_pending - push any pending transactions to hw
+ * @chan : X-Gene DMA channel
+ *
+ * LOCKING: must hold chan->lock
+ */
+static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
+{
+	struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+
+	/*
+	 * If the list of pending descriptors is empty, then we
+	 * don't need to do any work at all
+	 */
+	if (list_empty(&chan->ld_pending)) {
+		chan_dbg(chan, "No pending LDs\n");
+		return;
+	}
+
+	/*
+	 * Move elements from the queue of pending transactions onto the list
+	 * of running transactions and push it to hw for further executions
+	 */
+	list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) {
+		/*
+		 * Check if have pushed max number of transactions to hw
+		 * as capable, so let's stop here and will push remaining
+		 * elements from pening ld queue after completing some
+		 * descriptors that we have already pushed
+		 */
+		if (chan->pending >= chan->max_outstanding)
+			return;
+
+		xgene_chan_xfer_request(chan, desc_sw);
+
+		/*
+		 * Delete this element from ld pending queue and append it to
+		 * ld running queue
+		 */
+		list_move_tail(&desc_sw->node, &chan->ld_running);
+	}
+}
+
+/**
+ * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
+{
+	struct xgene_dma_ring *ring = &chan->rx_ring;
+	struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+	struct xgene_dma_desc_hw *desc_hw;
+	struct list_head ld_completed;
+	u8 status;
+
+	INIT_LIST_HEAD(&ld_completed);
+
+	spin_lock_bh(&chan->lock);
+
+	/* Clean already completed and acked descriptors */
+	xgene_dma_clean_completed_descriptor(chan);
+
+	/* Move all completed descriptors to ld completed queue, in order */
+	list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
+		/* Get subsequent hw descriptor from DMA rx ring */
+		desc_hw = &ring->desc_hw[ring->head];
+
+		/* Check if this descriptor has been completed */
+		if (unlikely(le64_to_cpu(desc_hw->m0) ==
+			     XGENE_DMA_DESC_EMPTY_SIGNATURE))
+			break;
+
+		if (++ring->head == ring->slots)
+			ring->head = 0;
+
+		/* Check if we have any error with DMA transactions */
+		status = XGENE_DMA_DESC_STATUS(
+				XGENE_DMA_DESC_ELERR_RD(le64_to_cpu(
+							desc_hw->m0)),
+				XGENE_DMA_DESC_LERR_RD(le64_to_cpu(
+						       desc_hw->m0)));
+		if (status) {
+			/* Print the DMA error type */
+			chan_err(chan, "%s\n", xgene_dma_desc_err[status]);
+
+			/*
+			 * We have DMA transactions error here. Dump DMA Tx
+			 * and Rx descriptors for this request */
+			XGENE_DMA_DESC_DUMP(&desc_sw->desc1,
+					    "X-Gene DMA TX DESC1: ");
+
+			if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC)
+				XGENE_DMA_DESC_DUMP(&desc_sw->desc2,
+						    "X-Gene DMA TX DESC2: ");
+
+			XGENE_DMA_DESC_DUMP(desc_hw,
+					    "X-Gene DMA RX ERR DESC: ");
+		}
+
+		/* Notify the hw about this completed descriptor */
+		iowrite32(-1, ring->cmd);
+
+		/* Mark this hw descriptor as processed */
+		desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+
+		/*
+		 * Decrement the pending transaction count
+		 * as we have processed one
+		 */
+		chan->pending -= ((desc_sw->flags &
+				  XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
+		/*
+		 * Delete this node from ld running queue and append it to
+		 * ld completed queue for further processing
+		 */
+		list_move_tail(&desc_sw->node, &ld_completed);
+	}
+
+	/*
+	 * Start any pending transactions automatically
+	 * In the ideal case, we keep the DMA controller busy while we go
+	 * ahead and free the descriptors below.
+	 */
+	xgene_chan_xfer_ld_pending(chan);
+
+	spin_unlock_bh(&chan->lock);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) {
+		xgene_dma_run_tx_complete_actions(chan, desc_sw);
+		xgene_dma_clean_running_descriptor(chan, desc_sw);
+	}
+}
+
+static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+					  sizeof(struct xgene_dma_desc_sw),
+					  0, 0);
+	if (!chan->desc_pool) {
+		chan_err(chan, "Failed to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	chan_dbg(chan, "Allocate descripto pool\n");
+
+	return 1;
+}
+
+/**
+ * xgene_dma_free_desc_list - Free all descriptors in a queue
+ * @chan: X-Gene DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->lock
+ */
+static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
+				     struct list_head *list)
+{
+	struct xgene_dma_desc_sw *desc, *_desc;
+
+	list_for_each_entry_safe(desc, _desc, list, node)
+		xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+	chan_dbg(chan, "Free all resources\n");
+
+	if (!chan->desc_pool)
+		return;
+
+	/* Process all running descriptor */
+	xgene_dma_cleanup_descriptors(chan);
+
+	spin_lock_bh(&chan->lock);
+
+	/* Clean all link descriptor queues */
+	xgene_dma_free_desc_list(chan, &chan->ld_pending);
+	xgene_dma_free_desc_list(chan, &chan->ld_running);
+	xgene_dma_free_desc_list(chan, &chan->ld_completed);
+
+	spin_unlock_bh(&chan->lock);
+
+	/* Delete this channel DMA pool */
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_xor(
+	struct dma_chan *dchan, dma_addr_t dst,	dma_addr_t *src,
+	u32 src_cnt, size_t len, unsigned long flags)
+{
+	struct xgene_dma_desc_sw *first = NULL, *new;
+	struct xgene_dma_chan *chan;
+	static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {
+				0x01, 0x01, 0x01, 0x01, 0x01};
+
+	if (unlikely(!dchan || !len))
+		return NULL;
+
+	chan = to_dma_chan(dchan);
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = xgene_dma_alloc_descriptor(chan);
+		if (!new)
+			goto fail;
+
+		/* Prepare xor DMA descriptor */
+		xgene_dma_prep_xor_desc(chan, new, &dst, src,
+					src_cnt, &len, multi);
+
+		if (!first)
+			first = new;
+
+		new->tx.cookie = 0;
+		async_tx_ack(&new->tx);
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	new->tx.flags = flags; /* client is in control of this ack */
+	new->tx.cookie = -EBUSY;
+	list_splice(&first->tx_list, &new->tx_list);
+
+	return &new->tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	xgene_dma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_pq(
+	struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+	u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+	struct xgene_dma_desc_sw *first = NULL, *new;
+	struct xgene_dma_chan *chan;
+	size_t _len = len;
+	dma_addr_t _src[XGENE_DMA_MAX_XOR_SRC];
+	static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {0x01, 0x01, 0x01, 0x01, 0x01};
+
+	if (unlikely(!dchan || !len))
+		return NULL;
+
+	chan = to_dma_chan(dchan);
+
+	/*
+	 * Save source addresses on local variable, may be we have to
+	 * prepare two descriptor to generate P and Q if both enabled
+	 * in the flags by client
+	 */
+	memcpy(_src, src, sizeof(*src) * src_cnt);
+
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		len = 0;
+
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		_len = 0;
+
+	do {
+		/* Allocate the link descriptor from DMA pool */
+		new = xgene_dma_alloc_descriptor(chan);
+		if (!new)
+			goto fail;
+
+		if (!first)
+			first = new;
+
+		new->tx.cookie = 0;
+		async_tx_ack(&new->tx);
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->tx_list);
+
+		/*
+		 * Prepare DMA descriptor to generate P,
+		 * if DMA_PREP_PQ_DISABLE_P flag is not set
+		 */
+		if (len) {
+			xgene_dma_prep_xor_desc(chan, new, &dst[0], src,
+						src_cnt, &len, multi);
+			continue;
+		}
+
+		/*
+		 * Prepare DMA descriptor to generate Q,
+		 * if DMA_PREP_PQ_DISABLE_Q flag is not set
+		 */
+		if (_len) {
+			xgene_dma_prep_xor_desc(chan, new, &dst[1], _src,
+						src_cnt, &_len, scf);
+		}
+	} while (len || _len);
+
+	new->tx.flags = flags; /* client is in control of this ack */
+	new->tx.cookie = -EBUSY;
+	list_splice(&first->tx_list, &new->tx_list);
+
+	return &new->tx;
+
+fail:
+	if (!first)
+		return NULL;
+
+	xgene_dma_free_desc_list(chan, &first->tx_list);
+	return NULL;
+}
+
+static void xgene_dma_issue_pending(struct dma_chan *dchan)
+{
+	struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+	spin_lock_bh(&chan->lock);
+	xgene_chan_xfer_ld_pending(chan);
+	spin_unlock_bh(&chan->lock);
+}
+
+static enum dma_status xgene_dma_tx_status(struct dma_chan *dchan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void xgene_dma_tasklet_cb(unsigned long data)
+{
+	struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data;
+
+	/* Run all cleanup for descriptors which have been completed */
+	xgene_dma_cleanup_descriptors(chan);
+
+	/* Re-enable DMA channel IRQ */
+	enable_irq(chan->rx_irq);
+}
+
+static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id)
+{
+	struct xgene_dma_chan *chan = (struct xgene_dma_chan *)id;
+
+	BUG_ON(!chan);
+
+	/*
+	 * Disable DMA channel IRQ until we process completed
+	 * descriptors
+	 */
+	disable_irq_nosync(chan->rx_irq);
+
+	/*
+	 * Schedule the tasklet to handle all cleanup of the current
+	 * transaction. It will start a new transaction if there is
+	 * one pending.
+	 */
+	tasklet_schedule(&chan->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t xgene_dma_err_isr(int irq, void *id)
+{
+	struct xgene_dma *pdma = (struct xgene_dma *)id;
+	unsigned long int_mask;
+	u32 val, i;
+
+	val = ioread32(pdma->csr_dma + XGENE_DMA_INT);
+
+	/* Clear DMA interrupts */
+	iowrite32(val, pdma->csr_dma + XGENE_DMA_INT);
+
+	/* Print DMA error info */
+	int_mask = val >> XGENE_DMA_INT_MASK_SHIFT;
+	for_each_set_bit(i, &int_mask, ARRAY_SIZE(xgene_dma_err))
+		dev_err(pdma->dev,
+			"Interrupt status 0x%08X %s\n", val, xgene_dma_err[i]);
+
+	return IRQ_HANDLED;
+}
+
+static void xgene_dma_wr_ring_state(struct xgene_dma_ring *ring)
+{
+	int i;
+
+	iowrite32(ring->num, ring->pdma->csr_ring + XGENE_DMA_RING_STATE);
+
+	for (i = 0; i < XGENE_DMA_RING_NUM_CONFIG; i++)
+		iowrite32(ring->state[i], ring->pdma->csr_ring +
+			  XGENE_DMA_RING_STATE_WR_BASE + (i * 4));
+}
+
+static void xgene_dma_clr_ring_state(struct xgene_dma_ring *ring)
+{
+	memset(ring->state, 0, sizeof(u32) * XGENE_DMA_RING_NUM_CONFIG);
+	xgene_dma_wr_ring_state(ring);
+}
+
+static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
+{
+	void *ring_cfg = ring->state;
+	u64 addr = ring->desc_paddr;
+	u32 i, val;
+
+	ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
+
+	/* Clear DMA ring state */
+	xgene_dma_clr_ring_state(ring);
+
+	/* Set DMA ring type */
+	XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR);
+
+	if (ring->owner == XGENE_DMA_RING_OWNER_DMA) {
+		/* Set recombination buffer and timeout */
+		XGENE_DMA_RING_RECOMBBUF_SET(ring_cfg);
+		XGENE_DMA_RING_RECOMTIMEOUTL_SET(ring_cfg);
+		XGENE_DMA_RING_RECOMTIMEOUTH_SET(ring_cfg);
+	}
+
+	/* Initialize DMA ring state */
+	XGENE_DMA_RING_SELTHRSH_SET(ring_cfg);
+	XGENE_DMA_RING_ACCEPTLERR_SET(ring_cfg);
+	XGENE_DMA_RING_COHERENT_SET(ring_cfg);
+	XGENE_DMA_RING_ADDRL_SET(ring_cfg, addr);
+	XGENE_DMA_RING_ADDRH_SET(ring_cfg, addr);
+	XGENE_DMA_RING_SIZE_SET(ring_cfg, ring->cfgsize);
+
+	/* Write DMA ring configurations */
+	xgene_dma_wr_ring_state(ring);
+
+	/* Set DMA ring id */
+	iowrite32(XGENE_DMA_RING_ID_SETUP(ring->id),
+		  ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+	/* Set DMA ring buffer */
+	iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num),
+		  ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+
+	if (ring->owner != XGENE_DMA_RING_OWNER_CPU)
+		return;
+
+	/* Set empty signature to DMA Rx ring descriptors */
+	for (i = 0; i < ring->slots; i++) {
+		struct xgene_dma_desc_hw *desc;
+
+		desc = &ring->desc_hw[i];
+		desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
+	}
+
+	/* Enable DMA Rx ring interrupt */
+	val = ioread32(ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+	XGENE_DMA_RING_NE_INT_MODE_SET(val, ring->buf_num);
+	iowrite32(val, ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+}
+
+static void xgene_dma_clear_ring(struct xgene_dma_ring *ring)
+{
+	u32 ring_id, val;
+
+	if (ring->owner == XGENE_DMA_RING_OWNER_CPU) {
+		/* Disable DMA Rx ring interrupt */
+		val = ioread32(ring->pdma->csr_ring +
+			       XGENE_DMA_RING_NE_INT_MODE);
+		XGENE_DMA_RING_NE_INT_MODE_RESET(val, ring->buf_num);
+		iowrite32(val, ring->pdma->csr_ring +
+			  XGENE_DMA_RING_NE_INT_MODE);
+	}
+
+	/* Clear DMA ring state */
+	ring_id = XGENE_DMA_RING_ID_SETUP(ring->id);
+	iowrite32(ring_id, ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+	iowrite32(0, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+	xgene_dma_clr_ring_state(ring);
+}
+
+static void xgene_dma_set_ring_cmd(struct xgene_dma_ring *ring)
+{
+	ring->cmd_base = ring->pdma->csr_ring_cmd +
+				XGENE_DMA_RING_CMD_BASE_OFFSET((ring->num -
+							  XGENE_DMA_RING_NUM));
+
+	ring->cmd = ring->cmd_base + XGENE_DMA_RING_CMD_OFFSET;
+}
+
+static int xgene_dma_get_ring_size(struct xgene_dma_chan *chan,
+				   enum xgene_dma_ring_cfgsize cfgsize)
+{
+	int size;
+
+	switch (cfgsize) {
+	case XGENE_DMA_RING_CFG_SIZE_512B:
+		size = 0x200;
+		break;
+	case XGENE_DMA_RING_CFG_SIZE_2KB:
+		size = 0x800;
+		break;
+	case XGENE_DMA_RING_CFG_SIZE_16KB:
+		size = 0x4000;
+		break;
+	case XGENE_DMA_RING_CFG_SIZE_64KB:
+		size = 0x10000;
+		break;
+	case XGENE_DMA_RING_CFG_SIZE_512KB:
+		size = 0x80000;
+		break;
+	default:
+		chan_err(chan, "Unsupported cfg ring size %d\n", cfgsize);
+		return -EINVAL;
+	}
+
+	return size;
+}
+
+static void xgene_dma_delete_ring_one(struct xgene_dma_ring *ring)
+{
+	/* Clear DMA ring configurations */
+	xgene_dma_clear_ring(ring);
+
+	/* De-allocate DMA ring descriptor */
+	if (ring->desc_vaddr) {
+		dma_free_coherent(ring->pdma->dev, ring->size,
+				  ring->desc_vaddr, ring->desc_paddr);
+		ring->desc_vaddr = NULL;
+	}
+}
+
+static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan)
+{
+	xgene_dma_delete_ring_one(&chan->rx_ring);
+	xgene_dma_delete_ring_one(&chan->tx_ring);
+}
+
+static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
+				     struct xgene_dma_ring *ring,
+				     enum xgene_dma_ring_cfgsize cfgsize)
+{
+	int ret;
+
+	/* Setup DMA ring descriptor variables */
+	ring->pdma = chan->pdma;
+	ring->cfgsize = cfgsize;
+	ring->num = chan->pdma->ring_num++;
+	ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num);
+
+	ret = xgene_dma_get_ring_size(chan, cfgsize);
+	if (ret <= 0)
+		return ret;
+	ring->size = ret;
+
+	/* Allocate memory for DMA ring descriptor */
+	ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size,
+					       &ring->desc_paddr, GFP_KERNEL);
+	if (!ring->desc_vaddr) {
+		chan_err(chan, "Failed to allocate ring desc\n");
+		return -ENOMEM;
+	}
+
+	/* Configure and enable DMA ring */
+	xgene_dma_set_ring_cmd(ring);
+	xgene_dma_setup_ring(ring);
+
+	return 0;
+}
+
+static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
+{
+	struct xgene_dma_ring *rx_ring = &chan->rx_ring;
+	struct xgene_dma_ring *tx_ring = &chan->tx_ring;
+	int ret;
+
+	/* Create DMA Rx ring descriptor */
+	rx_ring->owner = XGENE_DMA_RING_OWNER_CPU;
+	rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id;
+
+	ret = xgene_dma_create_ring_one(chan, rx_ring,
+					XGENE_DMA_RING_CFG_SIZE_64KB);
+	if (ret)
+		return ret;
+
+	chan_dbg(chan, "Rx ring id 0x%X num %d desc 0x%p\n",
+		 rx_ring->id, rx_ring->num, rx_ring->desc_vaddr);
+
+	/* Create DMA Tx ring descriptor */
+	tx_ring->owner = XGENE_DMA_RING_OWNER_DMA;
+	tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id;
+
+	ret = xgene_dma_create_ring_one(chan, tx_ring,
+					XGENE_DMA_RING_CFG_SIZE_64KB);
+	if (ret) {
+		xgene_dma_delete_ring_one(rx_ring);
+		return ret;
+	}
+
+	tx_ring->dst_ring_num = XGENE_DMA_RING_DST_ID(rx_ring->num);
+
+	chan_dbg(chan,
+		 "Tx ring id 0x%X num %d desc 0x%p\n",
+		 tx_ring->id, tx_ring->num, tx_ring->desc_vaddr);
+
+	/* Set the max outstanding request possible to this channel */
+	chan->max_outstanding = tx_ring->slots;
+
+	return ret;
+}
+
+static int xgene_dma_init_rings(struct xgene_dma *pdma)
+{
+	int ret, i, j;
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+		ret = xgene_dma_create_chan_rings(&pdma->chan[i]);
+		if (ret) {
+			for (j = 0; j < i; j++)
+				xgene_dma_delete_chan_rings(&pdma->chan[j]);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void xgene_dma_enable(struct xgene_dma *pdma)
+{
+	u32 val;
+
+	/* Configure and enable DMA engine */
+	val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+	XGENE_DMA_CH_SETUP(val);
+	XGENE_DMA_ENABLE(val);
+	iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_disable(struct xgene_dma *pdma)
+{
+	u32 val;
+
+	val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+	XGENE_DMA_DISABLE(val);
+	iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_mask_interrupts(struct xgene_dma *pdma)
+{
+	/*
+	 * Mask DMA ring overflow, underflow and
+	 * AXI write/read error interrupts
+	 */
+	iowrite32(XGENE_DMA_INT_ALL_MASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_MASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_MASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_MASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_MASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+	/* Mask DMA error interrupts */
+	iowrite32(XGENE_DMA_INT_ALL_MASK, pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_unmask_interrupts(struct xgene_dma *pdma)
+{
+	/*
+	 * Unmask DMA ring overflow, underflow and
+	 * AXI write/read error interrupts
+	 */
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+	/* Unmask DMA error interrupts */
+	iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+		  pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_init_hw(struct xgene_dma *pdma)
+{
+	u32 val;
+
+	/* Associate DMA ring to corresponding ring HW */
+	iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
+		  pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC);
+
+	/* Configure RAID6 polynomial control setting */
+	if (is_pq_enabled(pdma))
+		iowrite32(XGENE_DMA_RAID6_MULTI_CTRL(0x1D),
+			  pdma->csr_dma + XGENE_DMA_RAID6_CONT);
+	else
+		dev_info(pdma->dev, "PQ is disabled in HW\n");
+
+	xgene_dma_enable(pdma);
+	xgene_dma_unmask_interrupts(pdma);
+
+	/* Get DMA id and version info */
+	val = ioread32(pdma->csr_dma + XGENE_DMA_IPBRR);
+
+	/* DMA device info */
+	dev_info(pdma->dev,
+		 "X-Gene DMA v%d.%02d.%02d driver registered %d channels",
+		 XGENE_DMA_REV_NO_RD(val), XGENE_DMA_BUS_ID_RD(val),
+		 XGENE_DMA_DEV_ID_RD(val), XGENE_DMA_MAX_CHANNEL);
+}
+
+static int xgene_dma_init_ring_mngr(struct xgene_dma *pdma)
+{
+	if (ioread32(pdma->csr_ring + XGENE_DMA_RING_CLKEN) &&
+	    (!ioread32(pdma->csr_ring + XGENE_DMA_RING_SRST)))
+		return 0;
+
+	iowrite32(0x3, pdma->csr_ring + XGENE_DMA_RING_CLKEN);
+	iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_SRST);
+
+	/* Bring up memory */
+	iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+	/* Force a barrier */
+	ioread32(pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+	/* reset may take up to 1ms */
+	usleep_range(1000, 1100);
+
+	if (ioread32(pdma->csr_ring + XGENE_DMA_RING_BLK_MEM_RDY)
+		!= XGENE_DMA_RING_BLK_MEM_RDY_VAL) {
+		dev_err(pdma->dev,
+			"Failed to release ring mngr memory from shutdown\n");
+		return -ENODEV;
+	}
+
+	/* program threshold set 1 and all hysteresis */
+	iowrite32(XGENE_DMA_RING_THRESLD0_SET1_VAL,
+		  pdma->csr_ring + XGENE_DMA_RING_THRESLD0_SET1);
+	iowrite32(XGENE_DMA_RING_THRESLD1_SET1_VAL,
+		  pdma->csr_ring + XGENE_DMA_RING_THRESLD1_SET1);
+	iowrite32(XGENE_DMA_RING_HYSTERESIS_VAL,
+		  pdma->csr_ring + XGENE_DMA_RING_HYSTERESIS);
+
+	/* Enable QPcore and assign error queue */
+	iowrite32(XGENE_DMA_RING_ENABLE,
+		  pdma->csr_ring + XGENE_DMA_RING_CONFIG);
+
+	return 0;
+}
+
+static int xgene_dma_init_mem(struct xgene_dma *pdma)
+{
+	int ret;
+
+	ret = xgene_dma_init_ring_mngr(pdma);
+	if (ret)
+		return ret;
+
+	/* Bring up memory */
+	iowrite32(0x0, pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+	/* Force a barrier */
+	ioread32(pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+	/* reset may take up to 1ms */
+	usleep_range(1000, 1100);
+
+	if (ioread32(pdma->csr_dma + XGENE_DMA_BLK_MEM_RDY)
+		!= XGENE_DMA_BLK_MEM_RDY_VAL) {
+		dev_err(pdma->dev,
+			"Failed to release DMA memory from shutdown\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int xgene_dma_request_irqs(struct xgene_dma *pdma)
+{
+	struct xgene_dma_chan *chan;
+	int ret, i, j;
+
+	/* Register DMA error irq */
+	ret = devm_request_irq(pdma->dev, pdma->err_irq, xgene_dma_err_isr,
+			       0, "dma_error", pdma);
+	if (ret) {
+		dev_err(pdma->dev,
+			"Failed to register error IRQ %d\n", pdma->err_irq);
+		return ret;
+	}
+
+	/* Register DMA channel rx irq */
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+		chan = &pdma->chan[i];
+		irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+		ret = devm_request_irq(chan->dev, chan->rx_irq,
+				       xgene_dma_chan_ring_isr,
+				       0, chan->name, chan);
+		if (ret) {
+			chan_err(chan, "Failed to register Rx IRQ %d\n",
+				 chan->rx_irq);
+			devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+			for (j = 0; j < i; j++) {
+				chan = &pdma->chan[i];
+				irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+				devm_free_irq(chan->dev, chan->rx_irq, chan);
+			}
+
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void xgene_dma_free_irqs(struct xgene_dma *pdma)
+{
+	struct xgene_dma_chan *chan;
+	int i;
+
+	/* Free DMA device error irq */
+	devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+		chan = &pdma->chan[i];
+		irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
+		devm_free_irq(chan->dev, chan->rx_irq, chan);
+	}
+}
+
+static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
+			       struct dma_device *dma_dev)
+{
+	/* Initialize DMA device capability mask */
+	dma_cap_zero(dma_dev->cap_mask);
+
+	/* Set DMA device capability */
+
+	/* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR
+	 * and channel 1 supports XOR, PQ both. First thing here is we have
+	 * mechanism in hw to enable/disable PQ/XOR supports on channel 1,
+	 * we can make sure this by reading SoC Efuse register.
+	 * Second thing, we have hw errata that if we run channel 0 and
+	 * channel 1 simultaneously with executing XOR and PQ request,
+	 * suddenly DMA engine hangs, So here we enable XOR on channel 0 only
+	 * if XOR and PQ supports on channel 1 is disabled.
+	 */
+	if ((chan->id == XGENE_DMA_PQ_CHANNEL) &&
+	    is_pq_enabled(chan->pdma)) {
+		dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+		dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+	} else if ((chan->id == XGENE_DMA_XOR_CHANNEL) &&
+		   !is_pq_enabled(chan->pdma)) {
+		dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+	}
+
+	/* Set base and prep routines */
+	dma_dev->dev = chan->dev;
+	dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources;
+	dma_dev->device_issue_pending = xgene_dma_issue_pending;
+	dma_dev->device_tx_status = xgene_dma_tx_status;
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->device_prep_dma_xor = xgene_dma_prep_xor;
+		dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC;
+		dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES;
+	}
+
+	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+		dma_dev->device_prep_dma_pq = xgene_dma_prep_pq;
+		dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
+		dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES;
+	}
+}
+
+static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
+{
+	struct xgene_dma_chan *chan = &pdma->chan[id];
+	struct dma_device *dma_dev = &pdma->dma_dev[id];
+	int ret;
+
+	chan->dma_chan.device = dma_dev;
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->ld_pending);
+	INIT_LIST_HEAD(&chan->ld_running);
+	INIT_LIST_HEAD(&chan->ld_completed);
+	tasklet_init(&chan->tasklet, xgene_dma_tasklet_cb,
+		     (unsigned long)chan);
+
+	chan->pending = 0;
+	chan->desc_pool = NULL;
+	dma_cookie_init(&chan->dma_chan);
+
+	/* Setup dma device capabilities and prep routines */
+	xgene_dma_set_caps(chan, dma_dev);
+
+	/* Initialize DMA device list head */
+	INIT_LIST_HEAD(&dma_dev->channels);
+	list_add_tail(&chan->dma_chan.device_node, &dma_dev->channels);
+
+	/* Register with Linux async DMA framework*/
+	ret = dma_async_device_register(dma_dev);
+	if (ret) {
+		chan_err(chan, "Failed to register async device %d", ret);
+		tasklet_kill(&chan->tasklet);
+
+		return ret;
+	}
+
+	/* DMA capability info */
+	dev_info(pdma->dev,
+		 "%s: CAPABILITY ( %s%s)\n", dma_chan_name(&chan->dma_chan),
+		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
+		 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
+
+	return 0;
+}
+
+static int xgene_dma_init_async(struct xgene_dma *pdma)
+{
+	int ret, i, j;
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL ; i++) {
+		ret = xgene_dma_async_register(pdma, i);
+		if (ret) {
+			for (j = 0; j < i; j++) {
+				dma_async_device_unregister(&pdma->dma_dev[j]);
+				tasklet_kill(&pdma->chan[j].tasklet);
+			}
+
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void xgene_dma_async_unregister(struct xgene_dma *pdma)
+{
+	int i;
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+		dma_async_device_unregister(&pdma->dma_dev[i]);
+}
+
+static void xgene_dma_init_channels(struct xgene_dma *pdma)
+{
+	struct xgene_dma_chan *chan;
+	int i;
+
+	pdma->ring_num = XGENE_DMA_RING_NUM;
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+		chan = &pdma->chan[i];
+		chan->dev = pdma->dev;
+		chan->pdma = pdma;
+		chan->id = i;
+		snprintf(chan->name, sizeof(chan->name), "dmachan%d", chan->id);
+	}
+}
+
+static int xgene_dma_get_resources(struct platform_device *pdev,
+				   struct xgene_dma *pdma)
+{
+	struct resource *res;
+	int irq, i;
+
+	/* Get DMA csr region */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get csr region\n");
+		return -ENXIO;
+	}
+
+	pdma->csr_dma = devm_ioremap(&pdev->dev, res->start,
+				     resource_size(res));
+	if (!pdma->csr_dma) {
+		dev_err(&pdev->dev, "Failed to ioremap csr region");
+		return -ENOMEM;
+	}
+
+	/* Get DMA ring csr region */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get ring csr region\n");
+		return -ENXIO;
+	}
+
+	pdma->csr_ring =  devm_ioremap(&pdev->dev, res->start,
+				       resource_size(res));
+	if (!pdma->csr_ring) {
+		dev_err(&pdev->dev, "Failed to ioremap ring csr region");
+		return -ENOMEM;
+	}
+
+	/* Get DMA ring cmd csr region */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get ring cmd csr region\n");
+		return -ENXIO;
+	}
+
+	pdma->csr_ring_cmd = devm_ioremap(&pdev->dev, res->start,
+					  resource_size(res));
+	if (!pdma->csr_ring_cmd) {
+		dev_err(&pdev->dev, "Failed to ioremap ring cmd csr region");
+		return -ENOMEM;
+	}
+
+	pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET;
+
+	/* Get efuse csr region */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to get efuse csr region\n");
+		return -ENXIO;
+	}
+
+	pdma->csr_efuse = devm_ioremap(&pdev->dev, res->start,
+				       resource_size(res));
+	if (!pdma->csr_efuse) {
+		dev_err(&pdev->dev, "Failed to ioremap efuse csr region");
+		return -ENOMEM;
+	}
+
+	/* Get DMA error interrupt */
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(&pdev->dev, "Failed to get Error IRQ\n");
+		return -ENXIO;
+	}
+
+	pdma->err_irq = irq;
+
+	/* Get DMA Rx ring descriptor interrupts for all DMA channels */
+	for (i = 1; i <= XGENE_DMA_MAX_CHANNEL; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq <= 0) {
+			dev_err(&pdev->dev, "Failed to get Rx IRQ\n");
+			return -ENXIO;
+		}
+
+		pdma->chan[i - 1].rx_irq = irq;
+	}
+
+	return 0;
+}
+
+static int xgene_dma_probe(struct platform_device *pdev)
+{
+	struct xgene_dma *pdma;
+	int ret, i;
+
+	pdma = devm_kzalloc(&pdev->dev, sizeof(*pdma), GFP_KERNEL);
+	if (!pdma)
+		return -ENOMEM;
+
+	pdma->dev = &pdev->dev;
+	platform_set_drvdata(pdev, pdma);
+
+	ret = xgene_dma_get_resources(pdev, pdma);
+	if (ret)
+		return ret;
+
+	pdma->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) {
+		dev_err(&pdev->dev, "Failed to get clk\n");
+		return PTR_ERR(pdma->clk);
+	}
+
+	/* Enable clk before accessing registers */
+	if (!IS_ERR(pdma->clk)) {
+		ret = clk_prepare_enable(pdma->clk);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
+			return ret;
+		}
+	}
+
+	/* Remove DMA RAM out of shutdown */
+	ret = xgene_dma_init_mem(pdma);
+	if (ret)
+		goto err_clk_enable;
+
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(42));
+	if (ret) {
+		dev_err(&pdev->dev, "No usable DMA configuration\n");
+		goto err_dma_mask;
+	}
+
+	/* Initialize DMA channels software state */
+	xgene_dma_init_channels(pdma);
+
+	/* Configue DMA rings */
+	ret = xgene_dma_init_rings(pdma);
+	if (ret)
+		goto err_clk_enable;
+
+	ret = xgene_dma_request_irqs(pdma);
+	if (ret)
+		goto err_request_irq;
+
+	/* Configure and enable DMA engine */
+	xgene_dma_init_hw(pdma);
+
+	/* Register DMA device with linux async framework */
+	ret = xgene_dma_init_async(pdma);
+	if (ret)
+		goto err_async_init;
+
+	return 0;
+
+err_async_init:
+	xgene_dma_free_irqs(pdma);
+
+err_request_irq:
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+		xgene_dma_delete_chan_rings(&pdma->chan[i]);
+
+err_dma_mask:
+err_clk_enable:
+	if (!IS_ERR(pdma->clk))
+		clk_disable_unprepare(pdma->clk);
+
+	return ret;
+}
+
+static int xgene_dma_remove(struct platform_device *pdev)
+{
+	struct xgene_dma *pdma = platform_get_drvdata(pdev);
+	struct xgene_dma_chan *chan;
+	int i;
+
+	xgene_dma_async_unregister(pdma);
+
+	/* Mask interrupts and disable DMA engine */
+	xgene_dma_mask_interrupts(pdma);
+	xgene_dma_disable(pdma);
+	xgene_dma_free_irqs(pdma);
+
+	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+		chan = &pdma->chan[i];
+		tasklet_kill(&chan->tasklet);
+		xgene_dma_delete_chan_rings(chan);
+	}
+
+	if (!IS_ERR(pdma->clk))
+		clk_disable_unprepare(pdma->clk);
+
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = {
+	{"APMC0D43", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr);
+#endif
+
+static const struct of_device_id xgene_dma_of_match_ptr[] = {
+	{.compatible = "apm,xgene-storm-dma",},
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgene_dma_of_match_ptr);
+
+static struct platform_driver xgene_dma_driver = {
+	.probe = xgene_dma_probe,
+	.remove = xgene_dma_remove,
+	.driver = {
+		.name = "X-Gene-DMA",
+		.of_match_table = xgene_dma_of_match_ptr,
+		.acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr),
+	},
+};
+
+module_platform_driver(xgene_dma_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC DMA driver");
+MODULE_AUTHOR("Rameshwar Prasad Sahu <rsahu@apm.com>");
+MODULE_AUTHOR("Loc Ho <lho@apm.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
new file mode 100644
index 0000000..9e91f8f
--- /dev/null
+++ b/drivers/dma/xilinx/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_XILINX_DMA) += xilinx_dma.o
+obj-$(CONFIG_XILINX_ZYNQMP_DMA) += zynqmp_dma.o
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
new file mode 100644
index 0000000..c124423
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -0,0 +1,2774 @@
+/*
+ * DMA driver for Xilinx Video DMA Engine
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * Based on the Freescale DMA driver.
+ *
+ * Description:
+ * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP
+ * core that provides high-bandwidth direct memory access between memory
+ * and AXI4-Stream type video target peripherals. The core provides efficient
+ * two dimensional DMA operations with independent asynchronous read (S2MM)
+ * and write (MM2S) channel operation. It can be configured to have either
+ * one channel or two channels. If configured as two channels, one is to
+ * transmit to the video device (MM2S) and another is to receive from the
+ * video device (S2MM). Initialization, status, interrupt and management
+ * registers are accessed through an AXI4-Lite slave interface.
+ *
+ * The AXI Direct Memory Access (AXI DMA) core is a soft Xilinx IP core that
+ * provides high-bandwidth one dimensional direct memory access between memory
+ * and AXI4-Stream target peripherals. It supports one receive and one
+ * transmit channel, both of them optional at synthesis time.
+ *
+ * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory
+ * Access (DMA) between a memory-mapped source address and a memory-mapped
+ * destination address.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/dma/xilinx_dma.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+#include "../dmaengine.h"
+
+/* Register/Descriptor Offsets */
+#define XILINX_DMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_DMA_S2MM_CTRL_OFFSET		0x0030
+#define XILINX_VDMA_MM2S_DESC_OFFSET		0x0050
+#define XILINX_VDMA_S2MM_DESC_OFFSET		0x00a0
+
+/* Control Registers */
+#define XILINX_DMA_REG_DMACR			0x0000
+#define XILINX_DMA_DMACR_DELAY_MAX		0xff
+#define XILINX_DMA_DMACR_DELAY_SHIFT		24
+#define XILINX_DMA_DMACR_FRAME_COUNT_MAX	0xff
+#define XILINX_DMA_DMACR_FRAME_COUNT_SHIFT	16
+#define XILINX_DMA_DMACR_ERR_IRQ		BIT(14)
+#define XILINX_DMA_DMACR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_DMA_DMACR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_DMA_DMACR_MASTER_SHIFT		8
+#define XILINX_DMA_DMACR_FSYNCSRC_SHIFT	5
+#define XILINX_DMA_DMACR_FRAMECNT_EN		BIT(4)
+#define XILINX_DMA_DMACR_GENLOCK_EN		BIT(3)
+#define XILINX_DMA_DMACR_RESET			BIT(2)
+#define XILINX_DMA_DMACR_CIRC_EN		BIT(1)
+#define XILINX_DMA_DMACR_RUNSTOP		BIT(0)
+#define XILINX_DMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
+
+#define XILINX_DMA_REG_DMASR			0x0004
+#define XILINX_DMA_DMASR_EOL_LATE_ERR		BIT(15)
+#define XILINX_DMA_DMASR_ERR_IRQ		BIT(14)
+#define XILINX_DMA_DMASR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_DMA_DMASR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_DMA_DMASR_SOF_LATE_ERR		BIT(11)
+#define XILINX_DMA_DMASR_SG_DEC_ERR		BIT(10)
+#define XILINX_DMA_DMASR_SG_SLV_ERR		BIT(9)
+#define XILINX_DMA_DMASR_EOF_EARLY_ERR		BIT(8)
+#define XILINX_DMA_DMASR_SOF_EARLY_ERR		BIT(7)
+#define XILINX_DMA_DMASR_DMA_DEC_ERR		BIT(6)
+#define XILINX_DMA_DMASR_DMA_SLAVE_ERR		BIT(5)
+#define XILINX_DMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_DMA_DMASR_IDLE			BIT(1)
+#define XILINX_DMA_DMASR_HALTED		BIT(0)
+#define XILINX_DMA_DMASR_DELAY_MASK		GENMASK(31, 24)
+#define XILINX_DMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
+
+#define XILINX_DMA_REG_CURDESC			0x0008
+#define XILINX_DMA_REG_TAILDESC		0x0010
+#define XILINX_DMA_REG_REG_INDEX		0x0014
+#define XILINX_DMA_REG_FRMSTORE		0x0018
+#define XILINX_DMA_REG_THRESHOLD		0x001c
+#define XILINX_DMA_REG_FRMPTR_STS		0x0024
+#define XILINX_DMA_REG_PARK_PTR		0x0028
+#define XILINX_DMA_PARK_PTR_WR_REF_SHIFT	8
+#define XILINX_DMA_PARK_PTR_WR_REF_MASK		GENMASK(12, 8)
+#define XILINX_DMA_PARK_PTR_RD_REF_SHIFT	0
+#define XILINX_DMA_PARK_PTR_RD_REF_MASK		GENMASK(4, 0)
+#define XILINX_DMA_REG_VDMA_VERSION		0x002c
+
+/* Register Direct Mode Registers */
+#define XILINX_DMA_REG_VSIZE			0x0000
+#define XILINX_DMA_REG_HSIZE			0x0004
+
+#define XILINX_DMA_REG_FRMDLY_STRIDE		0x0008
+#define XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
+#define XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
+
+#define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
+#define XILINX_VDMA_REG_START_ADDRESS_64(n)	(0x000c + 8 * (n))
+
+#define XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP	0x00ec
+#define XILINX_VDMA_ENABLE_VERTICAL_FLIP	BIT(0)
+
+/* HW specific definitions */
+#define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
+
+#define XILINX_DMA_DMAXR_ALL_IRQ_MASK	\
+		(XILINX_DMA_DMASR_FRM_CNT_IRQ | \
+		 XILINX_DMA_DMASR_DLY_CNT_IRQ | \
+		 XILINX_DMA_DMASR_ERR_IRQ)
+
+#define XILINX_DMA_DMASR_ALL_ERR_MASK	\
+		(XILINX_DMA_DMASR_EOL_LATE_ERR | \
+		 XILINX_DMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_DMA_DMASR_SG_DEC_ERR | \
+		 XILINX_DMA_DMASR_SG_SLV_ERR | \
+		 XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_DMA_DEC_ERR | \
+		 XILINX_DMA_DMASR_DMA_SLAVE_ERR | \
+		 XILINX_DMA_DMASR_DMA_INT_ERR)
+
+/*
+ * Recoverable errors are DMA Internal error, SOF Early, EOF Early
+ * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
+ * is enabled in the h/w system.
+ */
+#define XILINX_DMA_DMASR_ERR_RECOVER_MASK	\
+		(XILINX_DMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_DMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_DMA_DMASR_DMA_INT_ERR)
+
+/* Axi VDMA Flush on Fsync bits */
+#define XILINX_DMA_FLUSH_S2MM		3
+#define XILINX_DMA_FLUSH_MM2S		2
+#define XILINX_DMA_FLUSH_BOTH		1
+
+/* Delay loop counter to prevent hardware failure */
+#define XILINX_DMA_LOOP_COUNT		1000000
+
+/* AXI DMA Specific Registers/Offsets */
+#define XILINX_DMA_REG_SRCDSTADDR	0x18
+#define XILINX_DMA_REG_BTT		0x28
+
+/* AXI DMA Specific Masks/Bit fields */
+#define XILINX_DMA_MAX_TRANS_LEN	GENMASK(22, 0)
+#define XILINX_DMA_CR_COALESCE_MAX	GENMASK(23, 16)
+#define XILINX_DMA_CR_CYCLIC_BD_EN_MASK	BIT(4)
+#define XILINX_DMA_CR_COALESCE_SHIFT	16
+#define XILINX_DMA_BD_SOP		BIT(27)
+#define XILINX_DMA_BD_EOP		BIT(26)
+#define XILINX_DMA_COALESCE_MAX		255
+#define XILINX_DMA_NUM_DESCS		255
+#define XILINX_DMA_NUM_APP_WORDS	5
+
+/* Multi-Channel DMA Descriptor offsets*/
+#define XILINX_DMA_MCRX_CDESC(x)	(0x40 + (x-1) * 0x20)
+#define XILINX_DMA_MCRX_TDESC(x)	(0x48 + (x-1) * 0x20)
+
+/* Multi-Channel DMA Masks/Shifts */
+#define XILINX_DMA_BD_HSIZE_MASK	GENMASK(15, 0)
+#define XILINX_DMA_BD_STRIDE_MASK	GENMASK(15, 0)
+#define XILINX_DMA_BD_VSIZE_MASK	GENMASK(31, 19)
+#define XILINX_DMA_BD_TDEST_MASK	GENMASK(4, 0)
+#define XILINX_DMA_BD_STRIDE_SHIFT	0
+#define XILINX_DMA_BD_VSIZE_SHIFT	19
+
+/* AXI CDMA Specific Registers/Offsets */
+#define XILINX_CDMA_REG_SRCADDR		0x18
+#define XILINX_CDMA_REG_DSTADDR		0x20
+
+/* AXI CDMA Specific Masks */
+#define XILINX_CDMA_CR_SGMODE          BIT(3)
+
+/**
+ * struct xilinx_vdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @vsize: Vertical Size @0x10
+ * @hsize: Horizontal Size @0x14
+ * @stride: Number of bytes between the first
+ *	    pixels of each horizontal line @0x18
+ */
+struct xilinx_vdma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 buf_addr;
+	u32 buf_addr_msb;
+	u32 vsize;
+	u32 hsize;
+	u32 stride;
+} __aligned(64);
+
+/**
+ * struct xilinx_axidma_desc_hw - Hardware Descriptor for AXI DMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @mcdma_control: Control field for mcdma @0x10
+ * @vsize_stride: Vsize and Stride field for mcdma @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_axidma_desc_hw {
+	u32 next_desc;
+	u32 next_desc_msb;
+	u32 buf_addr;
+	u32 buf_addr_msb;
+	u32 mcdma_control;
+	u32 vsize_stride;
+	u32 control;
+	u32 status;
+	u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: Next Descriptor Pointer MSB @0x04
+ * @src_addr: Source address @0x08
+ * @src_addr_msb: Source address MSB @0x0C
+ * @dest_addr: Destination address @0x10
+ * @dest_addr_msb: Destination address MSB @0x14
+ * @control: Control field @0x18
+ * @status: Status field @0x1C
+ */
+struct xilinx_cdma_desc_hw {
+	u32 next_desc;
+	u32 next_desc_msb;
+	u32 src_addr;
+	u32 src_addr_msb;
+	u32 dest_addr;
+	u32 dest_addr_msb;
+	u32 control;
+	u32 status;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_vdma_tx_segment {
+	struct xilinx_vdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_axidma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_axidma_tx_segment {
+	struct xilinx_axidma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_cdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_cdma_tx_segment {
+	struct xilinx_cdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_dma_tx_descriptor - Per Transaction structure
+ * @async_tx: Async transaction descriptor
+ * @segments: TX segments list
+ * @node: Node in the channel descriptors list
+ * @cyclic: Check for cyclic transfers.
+ */
+struct xilinx_dma_tx_descriptor {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head segments;
+	struct list_head node;
+	bool cyclic;
+};
+
+/**
+ * struct xilinx_dma_chan - Driver specific DMA channel structure
+ * @xdev: Driver specific device structure
+ * @ctrl_offset: Control registers offset
+ * @desc_offset: TX descriptor registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @active_list: Descriptors ready to submit
+ * @done_list: Complete descriptors
+ * @free_seg_list: Free descriptors
+ * @common: DMA common channel
+ * @desc_pool: Descriptors pool
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @id: Channel ID
+ * @direction: Transfer direction
+ * @num_frms: Number of frames
+ * @has_sg: Support scatter transfers
+ * @cyclic: Check for cyclic transfers.
+ * @genlock: Support genlock mode
+ * @err: Channel has errors
+ * @idle: Check for channel idle
+ * @tasklet: Cleanup work after irq
+ * @config: Device configuration info
+ * @flush_on_fsync: Flush on Frame sync
+ * @desc_pendingcount: Descriptor pending count
+ * @ext_addr: Indicates 64 bit addressing is supported by dma channel
+ * @desc_submitcount: Descriptor h/w submitted count
+ * @residue: Residue for AXI DMA
+ * @seg_v: Statically allocated segments base
+ * @seg_p: Physical allocated segments base
+ * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
+ * @cyclic_seg_p: Physical allocated segments base for cyclic dma
+ * @start_transfer: Differentiate b/w DMA IP's transfer
+ * @stop_transfer: Differentiate b/w DMA IP's quiesce
+ * @tdest: TDEST value for mcdma
+ * @has_vflip: S2MM vertical flip
+ */
+struct xilinx_dma_chan {
+	struct xilinx_dma_device *xdev;
+	u32 ctrl_offset;
+	u32 desc_offset;
+	spinlock_t lock;
+	struct list_head pending_list;
+	struct list_head active_list;
+	struct list_head done_list;
+	struct list_head free_seg_list;
+	struct dma_chan common;
+	struct dma_pool *desc_pool;
+	struct device *dev;
+	int irq;
+	int id;
+	enum dma_transfer_direction direction;
+	int num_frms;
+	bool has_sg;
+	bool cyclic;
+	bool genlock;
+	bool err;
+	bool idle;
+	struct tasklet_struct tasklet;
+	struct xilinx_vdma_config config;
+	bool flush_on_fsync;
+	u32 desc_pendingcount;
+	bool ext_addr;
+	u32 desc_submitcount;
+	u32 residue;
+	struct xilinx_axidma_tx_segment *seg_v;
+	dma_addr_t seg_p;
+	struct xilinx_axidma_tx_segment *cyclic_seg_v;
+	dma_addr_t cyclic_seg_p;
+	void (*start_transfer)(struct xilinx_dma_chan *chan);
+	int (*stop_transfer)(struct xilinx_dma_chan *chan);
+	u16 tdest;
+	bool has_vflip;
+};
+
+/**
+ * enum xdma_ip_type - DMA IP type.
+ *
+ * @XDMA_TYPE_AXIDMA: Axi dma ip.
+ * @XDMA_TYPE_CDMA: Axi cdma ip.
+ * @XDMA_TYPE_VDMA: Axi vdma ip.
+ *
+ */
+enum xdma_ip_type {
+	XDMA_TYPE_AXIDMA = 0,
+	XDMA_TYPE_CDMA,
+	XDMA_TYPE_VDMA,
+};
+
+struct xilinx_dma_config {
+	enum xdma_ip_type dmatype;
+	int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk,
+			struct clk **tx_clk, struct clk **txs_clk,
+			struct clk **rx_clk, struct clk **rxs_clk);
+};
+
+/**
+ * struct xilinx_dma_device - DMA device structure
+ * @regs: I/O mapped base address
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific DMA channel
+ * @has_sg: Specifies whether Scatter-Gather is present or not
+ * @mcdma: Specifies whether Multi-Channel is present or not
+ * @flush_on_fsync: Flush on frame sync
+ * @ext_addr: Indicates 64 bit addressing is supported by dma device
+ * @pdev: Platform device structure pointer
+ * @dma_config: DMA config structure
+ * @axi_clk: DMA Axi4-lite interace clock
+ * @tx_clk: DMA mm2s clock
+ * @txs_clk: DMA mm2s stream clock
+ * @rx_clk: DMA s2mm clock
+ * @rxs_clk: DMA s2mm stream clock
+ * @nr_channels: Number of channels DMA device supports
+ * @chan_id: DMA channel identifier
+ */
+struct xilinx_dma_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
+	bool has_sg;
+	bool mcdma;
+	u32 flush_on_fsync;
+	bool ext_addr;
+	struct platform_device  *pdev;
+	const struct xilinx_dma_config *dma_config;
+	struct clk *axi_clk;
+	struct clk *tx_clk;
+	struct clk *txs_clk;
+	struct clk *rx_clk;
+	struct clk *rxs_clk;
+	u32 nr_channels;
+	u32 chan_id;
+};
+
+/* Macros */
+#define to_xilinx_chan(chan) \
+	container_of(chan, struct xilinx_dma_chan, common)
+#define to_dma_tx_descriptor(tx) \
+	container_of(tx, struct xilinx_dma_tx_descriptor, async_tx)
+#define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
+	readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \
+			   cond, delay_us, timeout_us)
+
+/* IO accessors */
+static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg)
+{
+	return ioread32(chan->xdev->regs + reg);
+}
+
+static inline void dma_write(struct xilinx_dma_chan *chan, u32 reg, u32 value)
+{
+	iowrite32(value, chan->xdev->regs + reg);
+}
+
+static inline void vdma_desc_write(struct xilinx_dma_chan *chan, u32 reg,
+				   u32 value)
+{
+	dma_write(chan, chan->desc_offset + reg, value);
+}
+
+static inline u32 dma_ctrl_read(struct xilinx_dma_chan *chan, u32 reg)
+{
+	return dma_read(chan, chan->ctrl_offset + reg);
+}
+
+static inline void dma_ctrl_write(struct xilinx_dma_chan *chan, u32 reg,
+				   u32 value)
+{
+	dma_write(chan, chan->ctrl_offset + reg, value);
+}
+
+static inline void dma_ctrl_clr(struct xilinx_dma_chan *chan, u32 reg,
+				 u32 clr)
+{
+	dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) & ~clr);
+}
+
+static inline void dma_ctrl_set(struct xilinx_dma_chan *chan, u32 reg,
+				 u32 set)
+{
+	dma_ctrl_write(chan, reg, dma_ctrl_read(chan, reg) | set);
+}
+
+/**
+ * vdma_desc_write_64 - 64-bit descriptor write
+ * @chan: Driver specific VDMA channel
+ * @reg: Register to write
+ * @value_lsb: lower address of the descriptor.
+ * @value_msb: upper address of the descriptor.
+ *
+ * Since vdma driver is trying to write to a register offset which is not a
+ * multiple of 64 bits(ex : 0x5c), we are writing as two separate 32 bits
+ * instead of a single 64 bit register write.
+ */
+static inline void vdma_desc_write_64(struct xilinx_dma_chan *chan, u32 reg,
+				      u32 value_lsb, u32 value_msb)
+{
+	/* Write the lsb 32 bits*/
+	writel(value_lsb, chan->xdev->regs + chan->desc_offset + reg);
+
+	/* Write the msb 32 bits */
+	writel(value_msb, chan->xdev->regs + chan->desc_offset + reg + 4);
+}
+
+static inline void dma_writeq(struct xilinx_dma_chan *chan, u32 reg, u64 value)
+{
+	lo_hi_writeq(value, chan->xdev->regs + chan->ctrl_offset + reg);
+}
+
+static inline void xilinx_write(struct xilinx_dma_chan *chan, u32 reg,
+				dma_addr_t addr)
+{
+	if (chan->ext_addr)
+		dma_writeq(chan, reg, addr);
+	else
+		dma_ctrl_write(chan, reg, addr);
+}
+
+static inline void xilinx_axidma_buf(struct xilinx_dma_chan *chan,
+				     struct xilinx_axidma_desc_hw *hw,
+				     dma_addr_t buf_addr, size_t sg_used,
+				     size_t period_len)
+{
+	if (chan->ext_addr) {
+		hw->buf_addr = lower_32_bits(buf_addr + sg_used + period_len);
+		hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used +
+						 period_len);
+	} else {
+		hw->buf_addr = buf_addr + sg_used + period_len;
+	}
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors and segments alloc and free
+ */
+
+/**
+ * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_segment *
+xilinx_vdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_vdma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_cdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_cdma_tx_segment *
+xilinx_cdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_cdma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_axidma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_axidma_tx_segment *
+xilinx_axidma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_axidma_tx_segment *segment = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (!list_empty(&chan->free_seg_list)) {
+		segment = list_first_entry(&chan->free_seg_list,
+					   struct xilinx_axidma_tx_segment,
+					   node);
+		list_del(&segment->node);
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return segment;
+}
+
+static void xilinx_dma_clean_hw_desc(struct xilinx_axidma_desc_hw *hw)
+{
+	u32 next_desc = hw->next_desc;
+	u32 next_desc_msb = hw->next_desc_msb;
+
+	memset(hw, 0, sizeof(struct xilinx_axidma_desc_hw));
+
+	hw->next_desc = next_desc;
+	hw->next_desc_msb = next_desc_msb;
+}
+
+/**
+ * xilinx_dma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_dma_free_tx_segment(struct xilinx_dma_chan *chan,
+				struct xilinx_axidma_tx_segment *segment)
+{
+	xilinx_dma_clean_hw_desc(&segment->hw);
+
+	list_add_tail(&segment->node, &chan->free_seg_list);
+}
+
+/**
+ * xilinx_cdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_cdma_free_tx_segment(struct xilinx_dma_chan *chan,
+				struct xilinx_cdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_dma_chan *chan,
+					struct xilinx_vdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_dma_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated descriptor on success and NULL on failure.
+ */
+static struct xilinx_dma_tx_descriptor *
+xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	INIT_LIST_HEAD(&desc->segments);
+
+	return desc;
+}
+
+/**
+ * xilinx_dma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific DMA channel
+ * @desc: DMA transaction descriptor
+ */
+static void
+xilinx_dma_free_tx_descriptor(struct xilinx_dma_chan *chan,
+			       struct xilinx_dma_tx_descriptor *desc)
+{
+	struct xilinx_vdma_tx_segment *segment, *next;
+	struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next;
+	struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next;
+
+	if (!desc)
+		return;
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		list_for_each_entry_safe(segment, next, &desc->segments, node) {
+			list_del(&segment->node);
+			xilinx_vdma_free_tx_segment(chan, segment);
+		}
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		list_for_each_entry_safe(cdma_segment, cdma_next,
+					 &desc->segments, node) {
+			list_del(&cdma_segment->node);
+			xilinx_cdma_free_tx_segment(chan, cdma_segment);
+		}
+	} else {
+		list_for_each_entry_safe(axidma_segment, axidma_next,
+					 &desc->segments, node) {
+			list_del(&axidma_segment->node);
+			xilinx_dma_free_tx_segment(chan, axidma_segment);
+		}
+	}
+
+	kfree(desc);
+}
+
+/* Required functions */
+
+/**
+ * xilinx_dma_free_desc_list - Free descriptors list
+ * @chan: Driver specific DMA channel
+ * @list: List to parse and delete the descriptor
+ */
+static void xilinx_dma_free_desc_list(struct xilinx_dma_chan *chan,
+					struct list_head *list)
+{
+	struct xilinx_dma_tx_descriptor *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node) {
+		list_del(&desc->node);
+		xilinx_dma_free_tx_descriptor(chan, desc);
+	}
+}
+
+/**
+ * xilinx_dma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_free_descriptors(struct xilinx_dma_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	xilinx_dma_free_desc_list(chan, &chan->pending_list);
+	xilinx_dma_free_desc_list(chan, &chan->done_list);
+	xilinx_dma_free_desc_list(chan, &chan->active_list);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel
+ */
+static void xilinx_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	unsigned long flags;
+
+	dev_dbg(chan->dev, "Free all channel resources.\n");
+
+	xilinx_dma_free_descriptors(chan);
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		spin_lock_irqsave(&chan->lock, flags);
+		INIT_LIST_HEAD(&chan->free_seg_list);
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		/* Free memory that is allocated for BD */
+		dma_free_coherent(chan->dev, sizeof(*chan->seg_v) *
+				  XILINX_DMA_NUM_DESCS, chan->seg_v,
+				  chan->seg_p);
+
+		/* Free Memory that is allocated for cyclic DMA Mode */
+		dma_free_coherent(chan->dev, sizeof(*chan->cyclic_seg_v),
+				  chan->cyclic_seg_v, chan->cyclic_seg_p);
+	}
+
+	if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) {
+		dma_pool_destroy(chan->desc_pool);
+		chan->desc_pool = NULL;
+	}
+}
+
+/**
+ * xilinx_dma_chan_handle_cyclic - Cyclic dma callback
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ * @flags: flags for spin lock
+ */
+static void xilinx_dma_chan_handle_cyclic(struct xilinx_dma_chan *chan,
+					  struct xilinx_dma_tx_descriptor *desc,
+					  unsigned long *flags)
+{
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+	if (callback) {
+		spin_unlock_irqrestore(&chan->lock, *flags);
+		callback(callback_param);
+		spin_lock_irqsave(&chan->lock, *flags);
+	}
+}
+
+/**
+ * xilinx_dma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_chan_desc_cleanup(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *desc, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+		struct dmaengine_desc_callback cb;
+
+		if (desc->cyclic) {
+			xilinx_dma_chan_handle_cyclic(chan, desc, &flags);
+			break;
+		}
+
+		/* Remove from the list of running transactions */
+		list_del(&desc->node);
+
+		/* Run the link descriptor callback function */
+		dmaengine_desc_get_callback(&desc->async_tx, &cb);
+		if (dmaengine_desc_callback_valid(&cb)) {
+			spin_unlock_irqrestore(&chan->lock, flags);
+			dmaengine_desc_callback_invoke(&cb, NULL);
+			spin_lock_irqsave(&chan->lock, flags);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		dma_run_dependencies(&desc->async_tx);
+		xilinx_dma_free_tx_descriptor(chan, desc);
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Xilinx DMA channel structure
+ */
+static void xilinx_dma_do_tasklet(unsigned long data)
+{
+	struct xilinx_dma_chan *chan = (struct xilinx_dma_chan *)data;
+
+	xilinx_dma_chan_desc_cleanup(chan);
+}
+
+/**
+ * xilinx_dma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	int i;
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 0;
+
+	/*
+	 * We need the descriptor to be aligned to 64bytes
+	 * for meeting Xilinx VDMA specification requirement.
+	 */
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		/* Allocate the buffer descriptors. */
+		chan->seg_v = dma_zalloc_coherent(chan->dev,
+						  sizeof(*chan->seg_v) *
+						  XILINX_DMA_NUM_DESCS,
+						  &chan->seg_p, GFP_KERNEL);
+		if (!chan->seg_v) {
+			dev_err(chan->dev,
+				"unable to allocate channel %d descriptors\n",
+				chan->id);
+			return -ENOMEM;
+		}
+
+		for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) {
+			chan->seg_v[i].hw.next_desc =
+			lower_32_bits(chan->seg_p + sizeof(*chan->seg_v) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_v[i].hw.next_desc_msb =
+			upper_32_bits(chan->seg_p + sizeof(*chan->seg_v) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_v[i].phys = chan->seg_p +
+				sizeof(*chan->seg_v) * i;
+			list_add_tail(&chan->seg_v[i].node,
+				      &chan->free_seg_list);
+		}
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool",
+				   chan->dev,
+				   sizeof(struct xilinx_cdma_tx_segment),
+				   __alignof__(struct xilinx_cdma_tx_segment),
+				   0);
+	} else {
+		chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+				     chan->dev,
+				     sizeof(struct xilinx_vdma_tx_segment),
+				     __alignof__(struct xilinx_vdma_tx_segment),
+				     0);
+	}
+
+	if (!chan->desc_pool &&
+	    (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA)) {
+		dev_err(chan->dev,
+			"unable to allocate channel %d descriptor pool\n",
+			chan->id);
+		return -ENOMEM;
+	}
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		/*
+		 * For cyclic DMA mode we need to program the tail Descriptor
+		 * register with a value which is not a part of the BD chain
+		 * so allocating a desc segment during channel allocation for
+		 * programming tail descriptor.
+		 */
+		chan->cyclic_seg_v = dma_zalloc_coherent(chan->dev,
+					sizeof(*chan->cyclic_seg_v),
+					&chan->cyclic_seg_p, GFP_KERNEL);
+		if (!chan->cyclic_seg_v) {
+			dev_err(chan->dev,
+				"unable to allocate desc segment for cyclic DMA\n");
+			return -ENOMEM;
+		}
+		chan->cyclic_seg_v->phys = chan->cyclic_seg_p;
+	}
+
+	dma_cookie_init(dchan);
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		/* For AXI DMA resetting once channel will reset the
+		 * other channel as well so enable the interrupts here.
+		 */
+		dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+			      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+	}
+
+	if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
+		dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+			     XILINX_CDMA_CR_SGMODE);
+
+	return 0;
+}
+
+/**
+ * xilinx_dma_tx_status - Get DMA transaction status
+ * @dchan: DMA channel
+ * @cookie: Transaction identifier
+ * @txstate: Transaction state
+ *
+ * Return: DMA transaction status
+ */
+static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment;
+	struct xilinx_axidma_desc_hw *hw;
+	enum dma_status ret;
+	unsigned long flags;
+	u32 residue = 0;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (ret == DMA_COMPLETE || !txstate)
+		return ret;
+
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		spin_lock_irqsave(&chan->lock, flags);
+
+		desc = list_last_entry(&chan->active_list,
+				       struct xilinx_dma_tx_descriptor, node);
+		if (chan->has_sg) {
+			list_for_each_entry(segment, &desc->segments, node) {
+				hw = &segment->hw;
+				residue += (hw->control - hw->status) &
+					   XILINX_DMA_MAX_TRANS_LEN;
+			}
+		}
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		chan->residue = residue;
+		dma_set_residue(txstate, chan->residue);
+	}
+
+	return ret;
+}
+
+/**
+ * xilinx_dma_stop_transfer - Halt DMA channel
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+	u32 val;
+
+	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to halt */
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_HALTED, 0,
+				       XILINX_DMA_LOOP_COUNT);
+}
+
+/**
+ * xilinx_cdma_stop_transfer - Wait for the current transfer to complete
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_cdma_stop_transfer(struct xilinx_dma_chan *chan)
+{
+	u32 val;
+
+	return xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				       val & XILINX_DMA_DMASR_IDLE, 0,
+				       XILINX_DMA_LOOP_COUNT);
+}
+
+/**
+ * xilinx_dma_start - Start DMA channel
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_start(struct xilinx_dma_chan *chan)
+{
+	int err;
+	u32 val;
+
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to start */
+	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMASR, val,
+				      !(val & XILINX_DMA_DMASR_HALTED), 0,
+				      XILINX_DMA_LOOP_COUNT);
+
+	if (err) {
+		dev_err(chan->dev, "Cannot start channel %p: %x\n",
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+
+		chan->err = true;
+	}
+}
+
+/**
+ * xilinx_vdma_start_transfer - Starts VDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_vdma_config *config = &chan->config;
+	struct xilinx_dma_tx_descriptor *desc, *tail_desc;
+	u32 reg, j;
+	struct xilinx_vdma_tx_segment *tail_segment;
+
+	/* This function was invoked with lock held */
+	if (chan->err)
+		return;
+
+	if (!chan->idle)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	desc = list_first_entry(&chan->pending_list,
+				struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_vdma_tx_segment, node);
+
+	/*
+	 * If hardware is idle, then all descriptors on the running lists are
+	 * done, start new transfers
+	 */
+	if (chan->has_sg)
+		dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+				desc->async_tx.phys);
+
+	/* Configure the hardware using info in the config structure */
+	if (chan->has_vflip) {
+		reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
+		reg &= ~XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		reg |= config->vflip_en;
+		dma_write(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP,
+			  reg);
+	}
+
+	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+	if (config->frm_cnt_en)
+		reg |= XILINX_DMA_DMACR_FRAMECNT_EN;
+	else
+		reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN;
+
+	/*
+	 * With SG, start with circular mode, so that BDs can be fetched.
+	 * In direct register mode, if not parking, enable circular mode
+	 */
+	if (chan->has_sg || !config->park)
+		reg |= XILINX_DMA_DMACR_CIRC_EN;
+
+	if (config->park)
+		reg &= ~XILINX_DMA_DMACR_CIRC_EN;
+
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+
+	j = chan->desc_submitcount;
+	reg = dma_read(chan, XILINX_DMA_REG_PARK_PTR);
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		reg &= ~XILINX_DMA_PARK_PTR_RD_REF_MASK;
+		reg |= j << XILINX_DMA_PARK_PTR_RD_REF_SHIFT;
+	} else {
+		reg &= ~XILINX_DMA_PARK_PTR_WR_REF_MASK;
+		reg |= j << XILINX_DMA_PARK_PTR_WR_REF_SHIFT;
+	}
+	dma_write(chan, XILINX_DMA_REG_PARK_PTR, reg);
+
+	/* Start the hardware */
+	xilinx_dma_start(chan);
+
+	if (chan->err)
+		return;
+
+	/* Start the transfer */
+	if (chan->has_sg) {
+		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+				tail_segment->phys);
+		list_splice_tail_init(&chan->pending_list, &chan->active_list);
+		chan->desc_pendingcount = 0;
+	} else {
+		struct xilinx_vdma_tx_segment *segment, *last = NULL;
+		int i = 0;
+
+		if (chan->desc_submitcount < chan->num_frms)
+			i = chan->desc_submitcount;
+
+		list_for_each_entry(segment, &desc->segments, node) {
+			if (chan->ext_addr)
+				vdma_desc_write_64(chan,
+					XILINX_VDMA_REG_START_ADDRESS_64(i++),
+					segment->hw.buf_addr,
+					segment->hw.buf_addr_msb);
+			else
+				vdma_desc_write(chan,
+					XILINX_VDMA_REG_START_ADDRESS(i++),
+					segment->hw.buf_addr);
+
+			last = segment;
+		}
+
+		if (!last)
+			return;
+
+		/* HW expects these parameters to be same for one transaction */
+		vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize);
+		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
+				last->hw.stride);
+		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
+
+		chan->desc_submitcount++;
+		chan->desc_pendingcount--;
+		list_del(&desc->node);
+		list_add_tail(&desc->node, &chan->active_list);
+		if (chan->desc_submitcount == chan->num_frms)
+			chan->desc_submitcount = 0;
+	}
+
+	chan->idle = false;
+}
+
+/**
+ * xilinx_cdma_start_transfer - Starts cdma transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_cdma_tx_segment *tail_segment;
+	u32 ctrl_reg = dma_read(chan, XILINX_DMA_REG_DMACR);
+
+	if (chan->err)
+		return;
+
+	if (!chan->idle)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_cdma_tx_segment, node);
+
+	if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+		ctrl_reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+		ctrl_reg |= chan->desc_pendingcount <<
+				XILINX_DMA_CR_COALESCE_SHIFT;
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, ctrl_reg);
+	}
+
+	if (chan->has_sg) {
+		dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+			     XILINX_CDMA_CR_SGMODE);
+
+		dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+			     XILINX_CDMA_CR_SGMODE);
+
+		xilinx_write(chan, XILINX_DMA_REG_CURDESC,
+			     head_desc->async_tx.phys);
+
+		/* Update tail ptr register which will start the transfer */
+		xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+			     tail_segment->phys);
+	} else {
+		/* In simple mode */
+		struct xilinx_cdma_tx_segment *segment;
+		struct xilinx_cdma_desc_hw *hw;
+
+		segment = list_first_entry(&head_desc->segments,
+					   struct xilinx_cdma_tx_segment,
+					   node);
+
+		hw = &segment->hw;
+
+		xilinx_write(chan, XILINX_CDMA_REG_SRCADDR, hw->src_addr);
+		xilinx_write(chan, XILINX_CDMA_REG_DSTADDR, hw->dest_addr);
+
+		/* Start the transfer */
+		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+				hw->control & XILINX_DMA_MAX_TRANS_LEN);
+	}
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	chan->desc_pendingcount = 0;
+	chan->idle = false;
+}
+
+/**
+ * xilinx_dma_start_transfer - Starts DMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_axidma_tx_segment *tail_segment;
+	u32 reg;
+
+	if (chan->err)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	if (!chan->idle)
+		return;
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_axidma_tx_segment, node);
+
+	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+	if (chan->desc_pendingcount <= XILINX_DMA_COALESCE_MAX) {
+		reg &= ~XILINX_DMA_CR_COALESCE_MAX;
+		reg |= chan->desc_pendingcount <<
+				  XILINX_DMA_CR_COALESCE_SHIFT;
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+	}
+
+	if (chan->has_sg && !chan->xdev->mcdma)
+		xilinx_write(chan, XILINX_DMA_REG_CURDESC,
+			     head_desc->async_tx.phys);
+
+	if (chan->has_sg && chan->xdev->mcdma) {
+		if (chan->direction == DMA_MEM_TO_DEV) {
+			dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+				       head_desc->async_tx.phys);
+		} else {
+			if (!chan->tdest) {
+				dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
+				       head_desc->async_tx.phys);
+			} else {
+				dma_ctrl_write(chan,
+					XILINX_DMA_MCRX_CDESC(chan->tdest),
+				       head_desc->async_tx.phys);
+			}
+		}
+	}
+
+	xilinx_dma_start(chan);
+
+	if (chan->err)
+		return;
+
+	/* Start the transfer */
+	if (chan->has_sg && !chan->xdev->mcdma) {
+		if (chan->cyclic)
+			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+				     chan->cyclic_seg_v->phys);
+		else
+			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
+				     tail_segment->phys);
+	} else if (chan->has_sg && chan->xdev->mcdma) {
+		if (chan->direction == DMA_MEM_TO_DEV) {
+			dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+			       tail_segment->phys);
+		} else {
+			if (!chan->tdest) {
+				dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
+					       tail_segment->phys);
+			} else {
+				dma_ctrl_write(chan,
+					XILINX_DMA_MCRX_TDESC(chan->tdest),
+					tail_segment->phys);
+			}
+		}
+	} else {
+		struct xilinx_axidma_tx_segment *segment;
+		struct xilinx_axidma_desc_hw *hw;
+
+		segment = list_first_entry(&head_desc->segments,
+					   struct xilinx_axidma_tx_segment,
+					   node);
+		hw = &segment->hw;
+
+		xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr);
+
+		/* Start the transfer */
+		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
+			       hw->control & XILINX_DMA_MAX_TRANS_LEN);
+	}
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	chan->desc_pendingcount = 0;
+	chan->idle = false;
+}
+
+/**
+ * xilinx_dma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel
+ */
+static void xilinx_dma_issue_pending(struct dma_chan *dchan)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->start_transfer(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dma_complete_descriptor - Mark the active descriptor as complete
+ * @chan : xilinx DMA channel
+ *
+ * CONTEXT: hardirq
+ */
+static void xilinx_dma_complete_descriptor(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *desc, *next;
+
+	/* This function was invoked with lock held */
+	if (list_empty(&chan->active_list))
+		return;
+
+	list_for_each_entry_safe(desc, next, &chan->active_list, node) {
+		list_del(&desc->node);
+		if (!desc->cyclic)
+			dma_cookie_complete(&desc->async_tx);
+		list_add_tail(&desc->node, &chan->done_list);
+	}
+}
+
+/**
+ * xilinx_dma_reset - Reset DMA channel
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_reset(struct xilinx_dma_chan *chan)
+{
+	int err;
+	u32 tmp;
+
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR, XILINX_DMA_DMACR_RESET);
+
+	/* Wait for the hardware to finish reset */
+	err = xilinx_dma_poll_timeout(chan, XILINX_DMA_REG_DMACR, tmp,
+				      !(tmp & XILINX_DMA_DMACR_RESET), 0,
+				      XILINX_DMA_LOOP_COUNT);
+
+	if (err) {
+		dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
+			dma_ctrl_read(chan, XILINX_DMA_REG_DMACR),
+			dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+		return -ETIMEDOUT;
+	}
+
+	chan->err = false;
+	chan->idle = true;
+	chan->desc_submitcount = 0;
+
+	return err;
+}
+
+/**
+ * xilinx_dma_chan_reset - Reset DMA channel and enable interrupts
+ * @chan: Driver specific DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_chan_reset(struct xilinx_dma_chan *chan)
+{
+	int err;
+
+	/* Reset VDMA */
+	err = xilinx_dma_reset(chan);
+	if (err)
+		return err;
+
+	/* Enable interrupts */
+	dma_ctrl_set(chan, XILINX_DMA_REG_DMACR,
+		      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+	return 0;
+}
+
+/**
+ * xilinx_dma_irq_handler - DMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx DMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_dma_irq_handler(int irq, void *data)
+{
+	struct xilinx_dma_chan *chan = data;
+	u32 status;
+
+	/* Read the status and ack the interrupts. */
+	status = dma_ctrl_read(chan, XILINX_DMA_REG_DMASR);
+	if (!(status & XILINX_DMA_DMAXR_ALL_IRQ_MASK))
+		return IRQ_NONE;
+
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+			status & XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+	if (status & XILINX_DMA_DMASR_ERR_IRQ) {
+		/*
+		 * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
+		 * error is recoverable, ignore it. Otherwise flag the error.
+		 *
+		 * Only recoverable errors can be cleared in the DMASR register,
+		 * make sure not to write to other error bits to 1.
+		 */
+		u32 errors = status & XILINX_DMA_DMASR_ALL_ERR_MASK;
+
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMASR,
+				errors & XILINX_DMA_DMASR_ERR_RECOVER_MASK);
+
+		if (!chan->flush_on_fsync ||
+		    (errors & ~XILINX_DMA_DMASR_ERR_RECOVER_MASK)) {
+			dev_err(chan->dev,
+				"Channel %p has errors %x, cdr %x tdr %x\n",
+				chan, errors,
+				dma_ctrl_read(chan, XILINX_DMA_REG_CURDESC),
+				dma_ctrl_read(chan, XILINX_DMA_REG_TAILDESC));
+			chan->err = true;
+		}
+	}
+
+	if (status & XILINX_DMA_DMASR_DLY_CNT_IRQ) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_DMA_DMASR_FRM_CNT_IRQ) {
+		spin_lock(&chan->lock);
+		xilinx_dma_complete_descriptor(chan);
+		chan->idle = true;
+		chan->start_transfer(chan);
+		spin_unlock(&chan->lock);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * append_desc_queue - Queuing descriptor
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ */
+static void append_desc_queue(struct xilinx_dma_chan *chan,
+			      struct xilinx_dma_tx_descriptor *desc)
+{
+	struct xilinx_vdma_tx_segment *tail_segment;
+	struct xilinx_dma_tx_descriptor *tail_desc;
+	struct xilinx_axidma_tx_segment *axidma_tail_segment;
+	struct xilinx_cdma_tx_segment *cdma_tail_segment;
+
+	if (list_empty(&chan->pending_list))
+		goto append;
+
+	/*
+	 * Add the hardware descriptor to the chain of hardware descriptors
+	 * that already exists in memory.
+	 */
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		tail_segment = list_last_entry(&tail_desc->segments,
+					       struct xilinx_vdma_tx_segment,
+					       node);
+		tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		cdma_tail_segment = list_last_entry(&tail_desc->segments,
+						struct xilinx_cdma_tx_segment,
+						node);
+		cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else {
+		axidma_tail_segment = list_last_entry(&tail_desc->segments,
+					       struct xilinx_axidma_tx_segment,
+					       node);
+		axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	}
+
+	/*
+	 * Add the software descriptor and all children to the list
+	 * of pending transactions
+	 */
+append:
+	list_add_tail(&desc->node, &chan->pending_list);
+	chan->desc_pendingcount++;
+
+	if (chan->has_sg && (chan->xdev->dma_config->dmatype == XDMA_TYPE_VDMA)
+	    && unlikely(chan->desc_pendingcount > chan->num_frms)) {
+		dev_dbg(chan->dev, "desc pendingcount is too high\n");
+		chan->desc_pendingcount = chan->num_frms;
+	}
+}
+
+/**
+ * xilinx_dma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor
+ *
+ * Return: cookie value on success and failure value on error
+ */
+static dma_cookie_t xilinx_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct xilinx_dma_tx_descriptor *desc = to_dma_tx_descriptor(tx);
+	struct xilinx_dma_chan *chan = to_xilinx_chan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int err;
+
+	if (chan->cyclic) {
+		xilinx_dma_free_tx_descriptor(chan, desc);
+		return -EBUSY;
+	}
+
+	if (chan->err) {
+		/*
+		 * If reset fails, need to hard reset the system.
+		 * Channel is no longer functional
+		 */
+		err = xilinx_dma_chan_reset(chan);
+		if (err < 0)
+			return err;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Put this transaction onto the tail of the pending queue */
+	append_desc_queue(chan, desc);
+
+	if (desc->cyclic)
+		chan->cyclic = true;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_vdma_tx_segment *segment;
+	struct xilinx_vdma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	if (xt->frame_size != 1)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+	async_tx_ack(&desc->async_tx);
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_vdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	/* Fill in the hardware descriptor */
+	hw = &segment->hw;
+	hw->vsize = xt->numf;
+	hw->hsize = xt->sgl[0].size;
+	hw->stride = (xt->sgl[0].icg + xt->sgl[0].size) <<
+			XILINX_DMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+	hw->stride |= chan->config.frm_dly <<
+			XILINX_DMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+
+	if (xt->dir != DMA_MEM_TO_DEV) {
+		if (chan->ext_addr) {
+			hw->buf_addr = lower_32_bits(xt->dst_start);
+			hw->buf_addr_msb = upper_32_bits(xt->dst_start);
+		} else {
+			hw->buf_addr = xt->dst_start;
+		}
+	} else {
+		if (chan->ext_addr) {
+			hw->buf_addr = lower_32_bits(xt->src_start);
+			hw->buf_addr_msb = upper_32_bits(xt->src_start);
+		} else {
+			hw->buf_addr = xt->src_start;
+		}
+	}
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	/* Link the last hardware descriptor with the first. */
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_vdma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_cdma_prep_memcpy - prepare descriptors for a memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: destination address
+ * @dma_src: source address
+ * @len: transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
+			dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_cdma_tx_segment *segment;
+	struct xilinx_cdma_desc_hw *hw;
+
+	if (!len || len > XILINX_DMA_MAX_TRANS_LEN)
+		return NULL;
+
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_cdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	hw = &segment->hw;
+	hw->control = len;
+	hw->src_addr = dma_src;
+	hw->dest_addr = dma_dst;
+	if (chan->ext_addr) {
+		hw->src_addr_msb = upper_32_bits(dma_src);
+		hw->dest_addr_msb = upper_32_bits(dma_dst);
+	}
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	desc->async_tx.phys = segment->phys;
+	hw->next_desc = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ * @context: APP words of the descriptor
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg(
+	struct dma_chan *dchan, struct scatterlist *sgl, unsigned int sg_len,
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment = NULL;
+	u32 *app_w = (u32 *)context;
+	struct scatterlist *sg;
+	size_t copy;
+	size_t sg_used;
+	unsigned int i;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Build transactions using information in the scatter gather list */
+	for_each_sg(sgl, sg, sg_len, i) {
+		sg_used = 0;
+
+		/* Loop until the entire scatterlist entry is used */
+		while (sg_used < sg_dma_len(sg)) {
+			struct xilinx_axidma_desc_hw *hw;
+
+			/* Get a free segment */
+			segment = xilinx_axidma_alloc_tx_segment(chan);
+			if (!segment)
+				goto error;
+
+			/*
+			 * Calculate the maximum number of bytes to transfer,
+			 * making sure it is less than the hw limit
+			 */
+			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+				     XILINX_DMA_MAX_TRANS_LEN);
+			hw = &segment->hw;
+
+			/* Fill in the descriptor */
+			xilinx_axidma_buf(chan, hw, sg_dma_address(sg),
+					  sg_used, 0);
+
+			hw->control = copy;
+
+			if (chan->direction == DMA_MEM_TO_DEV) {
+				if (app_w)
+					memcpy(hw->app, app_w, sizeof(u32) *
+					       XILINX_DMA_NUM_APP_WORDS);
+			}
+
+			sg_used += copy;
+
+			/*
+			 * Insert the segment into the descriptor segments
+			 * list.
+			 */
+			list_add_tail(&segment->node, &desc->segments);
+		}
+	}
+
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_axidma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+
+	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_DMA_BD_SOP;
+		segment = list_last_entry(&desc->segments,
+					  struct xilinx_axidma_tx_segment,
+					  node);
+		segment->hw.control |= XILINX_DMA_BD_EOP;
+	}
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_prep_dma_cyclic - prepare descriptors for a DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @buf_addr: Physical address of the buffer
+ * @buf_len: Total length of the cyclic buffers
+ * @period_len: length of individual cyclic buffer
+ * @direction: DMA direction
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *xilinx_dma_prep_dma_cyclic(
+	struct dma_chan *dchan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment, *head_segment, *prev = NULL;
+	size_t copy, sg_used;
+	unsigned int num_periods;
+	int i;
+	u32 reg;
+
+	if (!period_len)
+		return NULL;
+
+	num_periods = buf_len / period_len;
+
+	if (!num_periods)
+		return NULL;
+
+	if (!is_slave_direction(direction))
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	chan->direction = direction;
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	for (i = 0; i < num_periods; ++i) {
+		sg_used = 0;
+
+		while (sg_used < period_len) {
+			struct xilinx_axidma_desc_hw *hw;
+
+			/* Get a free segment */
+			segment = xilinx_axidma_alloc_tx_segment(chan);
+			if (!segment)
+				goto error;
+
+			/*
+			 * Calculate the maximum number of bytes to transfer,
+			 * making sure it is less than the hw limit
+			 */
+			copy = min_t(size_t, period_len - sg_used,
+				     XILINX_DMA_MAX_TRANS_LEN);
+			hw = &segment->hw;
+			xilinx_axidma_buf(chan, hw, buf_addr, sg_used,
+					  period_len * i);
+			hw->control = copy;
+
+			if (prev)
+				prev->hw.next_desc = segment->phys;
+
+			prev = segment;
+			sg_used += copy;
+
+			/*
+			 * Insert the segment into the descriptor segments
+			 * list.
+			 */
+			list_add_tail(&segment->node, &desc->segments);
+		}
+	}
+
+	head_segment = list_first_entry(&desc->segments,
+				   struct xilinx_axidma_tx_segment, node);
+	desc->async_tx.phys = head_segment->phys;
+
+	desc->cyclic = true;
+	reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+	reg |= XILINX_DMA_CR_CYCLIC_BD_EN_MASK;
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+
+	segment = list_last_entry(&desc->segments,
+				  struct xilinx_axidma_tx_segment,
+				  node);
+	segment->hw.next_desc = (u32) head_segment->phys;
+
+	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
+	if (direction == DMA_MEM_TO_DEV) {
+		head_segment->hw.control |= XILINX_DMA_BD_SOP;
+		segment->hw.control |= XILINX_DMA_BD_EOP;
+	}
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dma_tx_descriptor *desc;
+	struct xilinx_axidma_tx_segment *segment;
+	struct xilinx_axidma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	if (xt->frame_size != 1)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_dma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	chan->direction = xt->dir;
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
+
+	/* Get a free segment */
+	segment = xilinx_axidma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	hw = &segment->hw;
+
+	/* Fill in the descriptor */
+	if (xt->dir != DMA_MEM_TO_DEV)
+		hw->buf_addr = xt->dst_start;
+	else
+		hw->buf_addr = xt->src_start;
+
+	hw->mcdma_control = chan->tdest & XILINX_DMA_BD_TDEST_MASK;
+	hw->vsize_stride = (xt->numf << XILINX_DMA_BD_VSIZE_SHIFT) &
+			    XILINX_DMA_BD_VSIZE_MASK;
+	hw->vsize_stride |= (xt->sgl[0].icg + xt->sgl[0].size) &
+			    XILINX_DMA_BD_STRIDE_MASK;
+	hw->control = xt->sgl[0].size & XILINX_DMA_BD_HSIZE_MASK;
+
+	/*
+	 * Insert the segment into the descriptor segments
+	 * list.
+	 */
+	list_add_tail(&segment->node, &desc->segments);
+
+
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_axidma_tx_segment, node);
+	desc->async_tx.phys = segment->phys;
+
+	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
+	if (xt->dir == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_DMA_BD_SOP;
+		segment = list_last_entry(&desc->segments,
+					  struct xilinx_axidma_tx_segment,
+					  node);
+		segment->hw.control |= XILINX_DMA_BD_EOP;
+	}
+
+	return &desc->async_tx;
+
+error:
+	xilinx_dma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_dma_terminate_all - Halt the channel and free descriptors
+ * @dchan: Driver specific DMA Channel pointer
+ *
+ * Return: '0' always.
+ */
+static int xilinx_dma_terminate_all(struct dma_chan *dchan)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	u32 reg;
+	int err;
+
+	if (chan->cyclic)
+		xilinx_dma_chan_reset(chan);
+
+	err = chan->stop_transfer(chan);
+	if (err) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
+		chan->err = true;
+	}
+
+	/* Remove and free all of the descriptors in the lists */
+	xilinx_dma_free_descriptors(chan);
+	chan->idle = true;
+
+	if (chan->cyclic) {
+		reg = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+		reg &= ~XILINX_DMA_CR_CYCLIC_BD_EN_MASK;
+		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
+		chan->cyclic = false;
+	}
+
+	if ((chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) && chan->has_sg)
+		dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+			     XILINX_CDMA_CR_SGMODE);
+
+	return 0;
+}
+
+/**
+ * xilinx_dma_channel_set_config - Configure VDMA channel
+ * Run-time configuration for Axi VDMA, supports:
+ * . halt the channel
+ * . configure interrupt coalescing and inter-packet delay threshold
+ * . start/stop parking
+ * . enable genlock
+ *
+ * @dchan: DMA channel
+ * @cfg: VDMA device configuration pointer
+ *
+ * Return: '0' on success and failure value on error
+ */
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg)
+{
+	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+	u32 dmacr;
+
+	if (cfg->reset)
+		return xilinx_dma_chan_reset(chan);
+
+	dmacr = dma_ctrl_read(chan, XILINX_DMA_REG_DMACR);
+
+	chan->config.frm_dly = cfg->frm_dly;
+	chan->config.park = cfg->park;
+
+	/* genlock settings */
+	chan->config.gen_lock = cfg->gen_lock;
+	chan->config.master = cfg->master;
+
+	if (cfg->gen_lock && chan->genlock) {
+		dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
+		dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
+	}
+
+	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	chan->config.vflip_en = cfg->vflip_en;
+
+	if (cfg->park)
+		chan->config.park_frm = cfg->park_frm;
+	else
+		chan->config.park_frm = -1;
+
+	chan->config.coalesc = cfg->coalesc;
+	chan->config.delay = cfg->delay;
+
+	if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
+		dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
+		chan->config.coalesc = cfg->coalesc;
+	}
+
+	if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
+		dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
+		chan->config.delay = cfg->delay;
+	}
+
+	/* FSync Source selection */
+	dmacr &= ~XILINX_DMA_DMACR_FSYNCSRC_MASK;
+	dmacr |= cfg->ext_fsync << XILINX_DMA_DMACR_FSYNCSRC_SHIFT;
+
+	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, dmacr);
+
+	return 0;
+}
+EXPORT_SYMBOL(xilinx_vdma_channel_set_config);
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+/**
+ * xilinx_dma_chan_remove - Per Channel remove function
+ * @chan: Driver specific DMA channel
+ */
+static void xilinx_dma_chan_remove(struct xilinx_dma_chan *chan)
+{
+	/* Disable all interrupts */
+	dma_ctrl_clr(chan, XILINX_DMA_REG_DMACR,
+		      XILINX_DMA_DMAXR_ALL_IRQ_MASK);
+
+	if (chan->irq > 0)
+		free_irq(chan->irq, chan);
+
+	tasklet_kill(&chan->tasklet);
+
+	list_del(&chan->common.device_node);
+}
+
+static int axidma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **tx_clk, struct clk **rx_clk,
+			    struct clk **sg_clk, struct clk **tmp_clk)
+{
+	int err;
+
+	*tmp_clk = NULL;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
+		return err;
+	}
+
+	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+	if (IS_ERR(*tx_clk))
+		*tx_clk = NULL;
+
+	*rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+	if (IS_ERR(*rx_clk))
+		*rx_clk = NULL;
+
+	*sg_clk = devm_clk_get(&pdev->dev, "m_axi_sg_aclk");
+	if (IS_ERR(*sg_clk))
+		*sg_clk = NULL;
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	err = clk_prepare_enable(*rx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
+		goto err_disable_txclk;
+	}
+
+	err = clk_prepare_enable(*sg_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable sg_clk (%d)\n", err);
+		goto err_disable_rxclk;
+	}
+
+	return 0;
+
+err_disable_rxclk:
+	clk_disable_unprepare(*rx_clk);
+err_disable_txclk:
+	clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static int axicdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **dev_clk, struct clk **tmp_clk,
+			    struct clk **tmp1_clk, struct clk **tmp2_clk)
+{
+	int err;
+
+	*tmp_clk = NULL;
+	*tmp1_clk = NULL;
+	*tmp2_clk = NULL;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_clk (%d)\n", err);
+		return err;
+	}
+
+	*dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
+	if (IS_ERR(*dev_clk)) {
+		err = PTR_ERR(*dev_clk);
+		dev_err(&pdev->dev, "failed to get dev_clk (%d)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*dev_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable dev_clk (%d)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	return 0;
+
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static int axivdma_clk_init(struct platform_device *pdev, struct clk **axi_clk,
+			    struct clk **tx_clk, struct clk **txs_clk,
+			    struct clk **rx_clk, struct clk **rxs_clk)
+{
+	int err;
+
+	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
+	if (IS_ERR(*axi_clk)) {
+		err = PTR_ERR(*axi_clk);
+		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
+		return err;
+	}
+
+	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
+	if (IS_ERR(*tx_clk))
+		*tx_clk = NULL;
+
+	*txs_clk = devm_clk_get(&pdev->dev, "m_axis_mm2s_aclk");
+	if (IS_ERR(*txs_clk))
+		*txs_clk = NULL;
+
+	*rx_clk = devm_clk_get(&pdev->dev, "m_axi_s2mm_aclk");
+	if (IS_ERR(*rx_clk))
+		*rx_clk = NULL;
+
+	*rxs_clk = devm_clk_get(&pdev->dev, "s_axis_s2mm_aclk");
+	if (IS_ERR(*rxs_clk))
+		*rxs_clk = NULL;
+
+	err = clk_prepare_enable(*axi_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(*tx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err);
+		goto err_disable_axiclk;
+	}
+
+	err = clk_prepare_enable(*txs_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable txs_clk (%d)\n", err);
+		goto err_disable_txclk;
+	}
+
+	err = clk_prepare_enable(*rx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err);
+		goto err_disable_txsclk;
+	}
+
+	err = clk_prepare_enable(*rxs_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rxs_clk (%d)\n", err);
+		goto err_disable_rxclk;
+	}
+
+	return 0;
+
+err_disable_rxclk:
+	clk_disable_unprepare(*rx_clk);
+err_disable_txsclk:
+	clk_disable_unprepare(*txs_clk);
+err_disable_txclk:
+	clk_disable_unprepare(*tx_clk);
+err_disable_axiclk:
+	clk_disable_unprepare(*axi_clk);
+
+	return err;
+}
+
+static void xdma_disable_allclks(struct xilinx_dma_device *xdev)
+{
+	clk_disable_unprepare(xdev->rxs_clk);
+	clk_disable_unprepare(xdev->rx_clk);
+	clk_disable_unprepare(xdev->txs_clk);
+	clk_disable_unprepare(xdev->tx_clk);
+	clk_disable_unprepare(xdev->axi_clk);
+}
+
+/**
+ * xilinx_dma_chan_probe - Per Channel Probing
+ * It get channel features from the device tree entry and
+ * initialize special channel handling routines
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ * @chan_id: DMA Channel id
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
+				  struct device_node *node, int chan_id)
+{
+	struct xilinx_dma_chan *chan;
+	bool has_dre = false;
+	u32 value, width;
+	int err;
+
+	/* Allocate and initialize the channel structure */
+	chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	chan->dev = xdev->dev;
+	chan->xdev = xdev;
+	chan->has_sg = xdev->has_sg;
+	chan->desc_pendingcount = 0x0;
+	chan->ext_addr = xdev->ext_addr;
+	/* This variable ensures that descriptors are not
+	 * Submitted when dma engine is in progress. This variable is
+	 * Added to avoid polling for a bit in the status register to
+	 * Know dma state in the driver hot path.
+	 */
+	chan->idle = true;
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->pending_list);
+	INIT_LIST_HEAD(&chan->done_list);
+	INIT_LIST_HEAD(&chan->active_list);
+	INIT_LIST_HEAD(&chan->free_seg_list);
+
+	/* Retrieve the channel properties from the device tree */
+	has_dre = of_property_read_bool(node, "xlnx,include-dre");
+
+	chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
+
+	err = of_property_read_u32(node, "xlnx,datawidth", &value);
+	if (err) {
+		dev_err(xdev->dev, "missing xlnx,datawidth property\n");
+		return err;
+	}
+	width = value >> 3; /* Convert bits to bytes */
+
+	/* If data width is greater than 8 bytes, DRE is not in hw */
+	if (width > 8)
+		has_dre = false;
+
+	if (!has_dre)
+		xdev->common.copy_align = fls(width - 1);
+
+	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel") ||
+	    of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") ||
+	    of_device_is_compatible(node, "xlnx,axi-cdma-channel")) {
+		chan->direction = DMA_MEM_TO_DEV;
+		chan->id = chan_id;
+		chan->tdest = chan_id;
+
+		chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+			chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+			chan->config.park = 1;
+
+			if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+			    xdev->flush_on_fsync == XILINX_DMA_FLUSH_MM2S)
+				chan->flush_on_fsync = true;
+		}
+	} else if (of_device_is_compatible(node,
+					   "xlnx,axi-vdma-s2mm-channel") ||
+		   of_device_is_compatible(node,
+					   "xlnx,axi-dma-s2mm-channel")) {
+		chan->direction = DMA_DEV_TO_MEM;
+		chan->id = chan_id;
+		chan->tdest = chan_id - xdev->nr_channels;
+		chan->has_vflip = of_property_read_bool(node,
+					"xlnx,enable-vert-flip");
+		if (chan->has_vflip) {
+			chan->config.vflip_en = dma_read(chan,
+				XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP) &
+				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
+		}
+
+		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+			chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+			chan->config.park = 1;
+
+			if (xdev->flush_on_fsync == XILINX_DMA_FLUSH_BOTH ||
+			    xdev->flush_on_fsync == XILINX_DMA_FLUSH_S2MM)
+				chan->flush_on_fsync = true;
+		}
+	} else {
+		dev_err(xdev->dev, "Invalid channel compatible node\n");
+		return -EINVAL;
+	}
+
+	/* Request the interrupt */
+	chan->irq = irq_of_parse_and_map(node, 0);
+	err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED,
+			  "xilinx-dma-controller", chan);
+	if (err) {
+		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
+		return err;
+	}
+
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		chan->start_transfer = xilinx_dma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		chan->start_transfer = xilinx_cdma_start_transfer;
+		chan->stop_transfer = xilinx_cdma_stop_transfer;
+	} else {
+		chan->start_transfer = xilinx_vdma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
+	}
+
+	/* Initialize the tasklet */
+	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
+			(unsigned long)chan);
+
+	/*
+	 * Initialize the DMA channel and add it to the DMA engine channels
+	 * list.
+	 */
+	chan->common.device = &xdev->common;
+
+	list_add_tail(&chan->common.device_node, &xdev->common.channels);
+	xdev->chan[chan->id] = chan;
+
+	/* Reset the channel */
+	err = xilinx_dma_chan_reset(chan);
+	if (err < 0) {
+		dev_err(xdev->dev, "Reset channel failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * xilinx_dma_child_probe - Per child node probe
+ * It get number of dma-channels per child node from
+ * device-tree and initializes all the channels.
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: 0 always.
+ */
+static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
+				    struct device_node *node)
+{
+	int ret, i, nr_channels = 1;
+
+	ret = of_property_read_u32(node, "dma-channels", &nr_channels);
+	if ((ret < 0) && xdev->mcdma)
+		dev_warn(xdev->dev, "missing dma-channels property\n");
+
+	for (i = 0; i < nr_channels; i++)
+		xilinx_dma_chan_probe(xdev, node, xdev->chan_id++);
+
+	xdev->nr_channels += nr_channels;
+
+	return 0;
+}
+
+/**
+ * of_dma_xilinx_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct xilinx_dma_device *xdev = ofdma->of_dma_data;
+	int chan_id = dma_spec->args[0];
+
+	if (chan_id >= xdev->nr_channels || !xdev->chan[chan_id])
+		return NULL;
+
+	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
+}
+
+static const struct xilinx_dma_config axidma_config = {
+	.dmatype = XDMA_TYPE_AXIDMA,
+	.clk_init = axidma_clk_init,
+};
+
+static const struct xilinx_dma_config axicdma_config = {
+	.dmatype = XDMA_TYPE_CDMA,
+	.clk_init = axicdma_clk_init,
+};
+
+static const struct xilinx_dma_config axivdma_config = {
+	.dmatype = XDMA_TYPE_VDMA,
+	.clk_init = axivdma_clk_init,
+};
+
+static const struct of_device_id xilinx_dma_of_ids[] = {
+	{ .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config },
+	{ .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config },
+	{ .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config },
+	{}
+};
+MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids);
+
+/**
+ * xilinx_dma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_dma_probe(struct platform_device *pdev)
+{
+	int (*clk_init)(struct platform_device *, struct clk **, struct clk **,
+			struct clk **, struct clk **, struct clk **)
+					= axivdma_clk_init;
+	struct device_node *node = pdev->dev.of_node;
+	struct xilinx_dma_device *xdev;
+	struct device_node *child, *np = pdev->dev.of_node;
+	struct resource *io;
+	u32 num_frames, addr_width;
+	int i, err;
+
+	/* Allocate and initialize the DMA engine structure */
+	xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+	if (!xdev)
+		return -ENOMEM;
+
+	xdev->dev = &pdev->dev;
+	if (np) {
+		const struct of_device_id *match;
+
+		match = of_match_node(xilinx_dma_of_ids, np);
+		if (match && match->data) {
+			xdev->dma_config = match->data;
+			clk_init = xdev->dma_config->clk_init;
+		}
+	}
+
+	err = clk_init(pdev, &xdev->axi_clk, &xdev->tx_clk, &xdev->txs_clk,
+		       &xdev->rx_clk, &xdev->rxs_clk);
+	if (err)
+		return err;
+
+	/* Request and map I/O memory */
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	if (IS_ERR(xdev->regs))
+		return PTR_ERR(xdev->regs);
+
+	/* Retrieve the DMA engine properties from the device tree */
+	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma");
+
+	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		err = of_property_read_u32(node, "xlnx,num-fstores",
+					   &num_frames);
+		if (err < 0) {
+			dev_err(xdev->dev,
+				"missing xlnx,num-fstores property\n");
+			return err;
+		}
+
+		err = of_property_read_u32(node, "xlnx,flush-fsync",
+					   &xdev->flush_on_fsync);
+		if (err < 0)
+			dev_warn(xdev->dev,
+				 "missing xlnx,flush-fsync property\n");
+	}
+
+	err = of_property_read_u32(node, "xlnx,addrwidth", &addr_width);
+	if (err < 0)
+		dev_warn(xdev->dev, "missing xlnx,addrwidth property\n");
+
+	if (addr_width > 32)
+		xdev->ext_addr = true;
+	else
+		xdev->ext_addr = false;
+
+	/* Set the dma mask bits */
+	dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width));
+
+	/* Initialize the DMA engine */
+	xdev->common.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&xdev->common.channels);
+	if (!(xdev->dma_config->dmatype == XDMA_TYPE_CDMA)) {
+		dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+		dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+	}
+
+	xdev->common.device_alloc_chan_resources =
+				xilinx_dma_alloc_chan_resources;
+	xdev->common.device_free_chan_resources =
+				xilinx_dma_free_chan_resources;
+	xdev->common.device_terminate_all = xilinx_dma_terminate_all;
+	xdev->common.device_tx_status = xilinx_dma_tx_status;
+	xdev->common.device_issue_pending = xilinx_dma_issue_pending;
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
+		dma_cap_set(DMA_CYCLIC, xdev->common.cap_mask);
+		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
+		xdev->common.device_prep_dma_cyclic =
+					  xilinx_dma_prep_dma_cyclic;
+		xdev->common.device_prep_interleaved_dma =
+					xilinx_dma_prep_interleaved;
+		/* Residue calculation is supported by only AXI DMA */
+		xdev->common.residue_granularity =
+					  DMA_RESIDUE_GRANULARITY_SEGMENT;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+		dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
+		xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+	} else {
+		xdev->common.device_prep_interleaved_dma =
+				xilinx_vdma_dma_prep_interleaved;
+	}
+
+	platform_set_drvdata(pdev, xdev);
+
+	/* Initialize the channels */
+	for_each_child_of_node(node, child) {
+		err = xilinx_dma_child_probe(xdev, child);
+		if (err < 0)
+			goto disable_clks;
+	}
+
+	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
+		for (i = 0; i < xdev->nr_channels; i++)
+			if (xdev->chan[i])
+				xdev->chan[i]->num_frms = num_frames;
+	}
+
+	/* Register the DMA engine with the core */
+	dma_async_device_register(&xdev->common);
+
+	err = of_dma_controller_register(node, of_dma_xilinx_xlate,
+					 xdev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+		dma_async_device_unregister(&xdev->common);
+		goto error;
+	}
+
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+		dev_info(&pdev->dev, "Xilinx AXI DMA Engine Driver Probed!!\n");
+	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
+		dev_info(&pdev->dev, "Xilinx AXI CDMA Engine Driver Probed!!\n");
+	else
+		dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
+
+	return 0;
+
+disable_clks:
+	xdma_disable_allclks(xdev);
+error:
+	for (i = 0; i < xdev->nr_channels; i++)
+		if (xdev->chan[i])
+			xilinx_dma_chan_remove(xdev->chan[i]);
+
+	return err;
+}
+
+/**
+ * xilinx_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int xilinx_dma_remove(struct platform_device *pdev)
+{
+	struct xilinx_dma_device *xdev = platform_get_drvdata(pdev);
+	int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+
+	dma_async_device_unregister(&xdev->common);
+
+	for (i = 0; i < xdev->nr_channels; i++)
+		if (xdev->chan[i])
+			xilinx_dma_chan_remove(xdev->chan[i]);
+
+	xdma_disable_allclks(xdev);
+
+	return 0;
+}
+
+static struct platform_driver xilinx_vdma_driver = {
+	.driver = {
+		.name = "xilinx-vdma",
+		.of_match_table = xilinx_dma_of_ids,
+	},
+	.probe = xilinx_dma_probe,
+	.remove = xilinx_dma_remove,
+};
+
+module_platform_driver(xilinx_vdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx VDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
new file mode 100644
index 0000000..c74a88b
--- /dev/null
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -0,0 +1,1155 @@
+/*
+ * DMA driver for Xilinx ZynqMP DMA Engine
+ *
+ * Copyright (C) 2016 Xilinx, Inc. All rights reserved.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/dma/xilinx_dma.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/pm_runtime.h>
+
+#include "../dmaengine.h"
+
+/* Register Offsets */
+#define ZYNQMP_DMA_ISR			0x100
+#define ZYNQMP_DMA_IMR			0x104
+#define ZYNQMP_DMA_IER			0x108
+#define ZYNQMP_DMA_IDS			0x10C
+#define ZYNQMP_DMA_CTRL0		0x110
+#define ZYNQMP_DMA_CTRL1		0x114
+#define ZYNQMP_DMA_DATA_ATTR		0x120
+#define ZYNQMP_DMA_DSCR_ATTR		0x124
+#define ZYNQMP_DMA_SRC_DSCR_WRD0	0x128
+#define ZYNQMP_DMA_SRC_DSCR_WRD1	0x12C
+#define ZYNQMP_DMA_SRC_DSCR_WRD2	0x130
+#define ZYNQMP_DMA_SRC_DSCR_WRD3	0x134
+#define ZYNQMP_DMA_DST_DSCR_WRD0	0x138
+#define ZYNQMP_DMA_DST_DSCR_WRD1	0x13C
+#define ZYNQMP_DMA_DST_DSCR_WRD2	0x140
+#define ZYNQMP_DMA_DST_DSCR_WRD3	0x144
+#define ZYNQMP_DMA_SRC_START_LSB	0x158
+#define ZYNQMP_DMA_SRC_START_MSB	0x15C
+#define ZYNQMP_DMA_DST_START_LSB	0x160
+#define ZYNQMP_DMA_DST_START_MSB	0x164
+#define ZYNQMP_DMA_TOTAL_BYTE		0x188
+#define ZYNQMP_DMA_RATE_CTRL		0x18C
+#define ZYNQMP_DMA_IRQ_SRC_ACCT		0x190
+#define ZYNQMP_DMA_IRQ_DST_ACCT		0x194
+#define ZYNQMP_DMA_CTRL2		0x200
+
+/* Interrupt registers bit field definitions */
+#define ZYNQMP_DMA_DONE			BIT(10)
+#define ZYNQMP_DMA_AXI_WR_DATA		BIT(9)
+#define ZYNQMP_DMA_AXI_RD_DATA		BIT(8)
+#define ZYNQMP_DMA_AXI_RD_DST_DSCR	BIT(7)
+#define ZYNQMP_DMA_AXI_RD_SRC_DSCR	BIT(6)
+#define ZYNQMP_DMA_IRQ_DST_ACCT_ERR	BIT(5)
+#define ZYNQMP_DMA_IRQ_SRC_ACCT_ERR	BIT(4)
+#define ZYNQMP_DMA_BYTE_CNT_OVRFL	BIT(3)
+#define ZYNQMP_DMA_DST_DSCR_DONE	BIT(2)
+#define ZYNQMP_DMA_INV_APB		BIT(0)
+
+/* Control 0 register bit field definitions */
+#define ZYNQMP_DMA_OVR_FETCH		BIT(7)
+#define ZYNQMP_DMA_POINT_TYPE_SG	BIT(6)
+#define ZYNQMP_DMA_RATE_CTRL_EN		BIT(3)
+
+/* Control 1 register bit field definitions */
+#define ZYNQMP_DMA_SRC_ISSUE		GENMASK(4, 0)
+
+/* Data Attribute register bit field definitions */
+#define ZYNQMP_DMA_ARBURST		GENMASK(27, 26)
+#define ZYNQMP_DMA_ARCACHE		GENMASK(25, 22)
+#define ZYNQMP_DMA_ARCACHE_OFST		22
+#define ZYNQMP_DMA_ARQOS		GENMASK(21, 18)
+#define ZYNQMP_DMA_ARQOS_OFST		18
+#define ZYNQMP_DMA_ARLEN		GENMASK(17, 14)
+#define ZYNQMP_DMA_ARLEN_OFST		14
+#define ZYNQMP_DMA_AWBURST		GENMASK(13, 12)
+#define ZYNQMP_DMA_AWCACHE		GENMASK(11, 8)
+#define ZYNQMP_DMA_AWCACHE_OFST		8
+#define ZYNQMP_DMA_AWQOS		GENMASK(7, 4)
+#define ZYNQMP_DMA_AWQOS_OFST		4
+#define ZYNQMP_DMA_AWLEN		GENMASK(3, 0)
+#define ZYNQMP_DMA_AWLEN_OFST		0
+
+/* Descriptor Attribute register bit field definitions */
+#define ZYNQMP_DMA_AXCOHRNT		BIT(8)
+#define ZYNQMP_DMA_AXCACHE		GENMASK(7, 4)
+#define ZYNQMP_DMA_AXCACHE_OFST		4
+#define ZYNQMP_DMA_AXQOS		GENMASK(3, 0)
+#define ZYNQMP_DMA_AXQOS_OFST		0
+
+/* Control register 2 bit field definitions */
+#define ZYNQMP_DMA_ENABLE		BIT(0)
+
+/* Buffer Descriptor definitions */
+#define ZYNQMP_DMA_DESC_CTRL_STOP	0x10
+#define ZYNQMP_DMA_DESC_CTRL_COMP_INT	0x4
+#define ZYNQMP_DMA_DESC_CTRL_SIZE_256	0x2
+#define ZYNQMP_DMA_DESC_CTRL_COHRNT	0x1
+
+/* Interrupt Mask specific definitions */
+#define ZYNQMP_DMA_INT_ERR	(ZYNQMP_DMA_AXI_RD_DATA | \
+				ZYNQMP_DMA_AXI_WR_DATA | \
+				ZYNQMP_DMA_AXI_RD_DST_DSCR | \
+				ZYNQMP_DMA_AXI_RD_SRC_DSCR | \
+				ZYNQMP_DMA_INV_APB)
+#define ZYNQMP_DMA_INT_OVRFL	(ZYNQMP_DMA_BYTE_CNT_OVRFL | \
+				ZYNQMP_DMA_IRQ_SRC_ACCT_ERR | \
+				ZYNQMP_DMA_IRQ_DST_ACCT_ERR)
+#define ZYNQMP_DMA_INT_DONE	(ZYNQMP_DMA_DONE | ZYNQMP_DMA_DST_DSCR_DONE)
+#define ZYNQMP_DMA_INT_EN_DEFAULT_MASK	(ZYNQMP_DMA_INT_DONE | \
+					ZYNQMP_DMA_INT_ERR | \
+					ZYNQMP_DMA_INT_OVRFL | \
+					ZYNQMP_DMA_DST_DSCR_DONE)
+
+/* Max number of descriptors per channel */
+#define ZYNQMP_DMA_NUM_DESCS	32
+
+/* Max transfer size per descriptor */
+#define ZYNQMP_DMA_MAX_TRANS_LEN	0x40000000
+
+/* Reset values for data attributes */
+#define ZYNQMP_DMA_AXCACHE_VAL		0xF
+#define ZYNQMP_DMA_ARLEN_RST_VAL	0xF
+#define ZYNQMP_DMA_AWLEN_RST_VAL	0xF
+
+#define ZYNQMP_DMA_SRC_ISSUE_RST_VAL	0x1F
+
+#define ZYNQMP_DMA_IDS_DEFAULT_MASK	0xFFF
+
+/* Bus width in bits */
+#define ZYNQMP_DMA_BUS_WIDTH_64		64
+#define ZYNQMP_DMA_BUS_WIDTH_128	128
+
+#define ZDMA_PM_TIMEOUT			100
+
+#define ZYNQMP_DMA_DESC_SIZE(chan)	(chan->desc_size)
+
+#define to_chan(chan)		container_of(chan, struct zynqmp_dma_chan, \
+					     common)
+#define tx_to_desc(tx)		container_of(tx, struct zynqmp_dma_desc_sw, \
+					     async_tx)
+
+/**
+ * struct zynqmp_dma_desc_ll - Hw linked list descriptor
+ * @addr: Buffer address
+ * @size: Size of the buffer
+ * @ctrl: Control word
+ * @nxtdscraddr: Next descriptor base address
+ * @rsvd: Reserved field and for Hw internal use.
+ */
+struct zynqmp_dma_desc_ll {
+	u64 addr;
+	u32 size;
+	u32 ctrl;
+	u64 nxtdscraddr;
+	u64 rsvd;
+}; __aligned(64)
+
+/**
+ * struct zynqmp_dma_desc_sw - Per Transaction structure
+ * @src: Source address for simple mode dma
+ * @dst: Destination address for simple mode dma
+ * @len: Transfer length for simple mode dma
+ * @node: Node in the channel descriptor list
+ * @tx_list: List head for the current transfer
+ * @async_tx: Async transaction descriptor
+ * @src_v: Virtual address of the src descriptor
+ * @src_p: Physical address of the src descriptor
+ * @dst_v: Virtual address of the dst descriptor
+ * @dst_p: Physical address of the dst descriptor
+ */
+struct zynqmp_dma_desc_sw {
+	u64 src;
+	u64 dst;
+	u32 len;
+	struct list_head node;
+	struct list_head tx_list;
+	struct dma_async_tx_descriptor async_tx;
+	struct zynqmp_dma_desc_ll *src_v;
+	dma_addr_t src_p;
+	struct zynqmp_dma_desc_ll *dst_v;
+	dma_addr_t dst_p;
+};
+
+/**
+ * struct zynqmp_dma_chan - Driver specific DMA channel structure
+ * @zdev: Driver specific device structure
+ * @regs: Control registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @free_list: Descriptors free
+ * @active_list: Descriptors active
+ * @sw_desc_pool: SW descriptor pool
+ * @done_list: Complete descriptors
+ * @common: DMA common channel
+ * @desc_pool_v: Statically allocated descriptor base
+ * @desc_pool_p: Physical allocated descriptor base
+ * @desc_free_cnt: Descriptor available count
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @is_dmacoherent: Tells whether dma operations are coherent or not
+ * @tasklet: Cleanup work after irq
+ * @idle : Channel status;
+ * @desc_size: Size of the low level descriptor
+ * @err: Channel has errors
+ * @bus_width: Bus width
+ * @src_burst_len: Source burst length
+ * @dst_burst_len: Dest burst length
+ */
+struct zynqmp_dma_chan {
+	struct zynqmp_dma_device *zdev;
+	void __iomem *regs;
+	spinlock_t lock;
+	struct list_head pending_list;
+	struct list_head free_list;
+	struct list_head active_list;
+	struct zynqmp_dma_desc_sw *sw_desc_pool;
+	struct list_head done_list;
+	struct dma_chan common;
+	void *desc_pool_v;
+	dma_addr_t desc_pool_p;
+	u32 desc_free_cnt;
+	struct device *dev;
+	int irq;
+	bool is_dmacoherent;
+	struct tasklet_struct tasklet;
+	bool idle;
+	u32 desc_size;
+	bool err;
+	u32 bus_width;
+	u32 src_burst_len;
+	u32 dst_burst_len;
+};
+
+/**
+ * struct zynqmp_dma_device - DMA device structure
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific DMA channel
+ * @clk_main: Pointer to main clock
+ * @clk_apb: Pointer to apb clock
+ */
+struct zynqmp_dma_device {
+	struct device *dev;
+	struct dma_device common;
+	struct zynqmp_dma_chan *chan;
+	struct clk *clk_main;
+	struct clk *clk_apb;
+};
+
+static inline void zynqmp_dma_writeq(struct zynqmp_dma_chan *chan, u32 reg,
+				     u64 value)
+{
+	lo_hi_writeq(value, chan->regs + reg);
+}
+
+/**
+ * zynqmp_dma_update_desc_to_ctrlr - Updates descriptor to the controller
+ * @chan: ZynqMP DMA DMA channel pointer
+ * @desc: Transaction descriptor pointer
+ */
+static void zynqmp_dma_update_desc_to_ctrlr(struct zynqmp_dma_chan *chan,
+				      struct zynqmp_dma_desc_sw *desc)
+{
+	dma_addr_t addr;
+
+	addr = desc->src_p;
+	zynqmp_dma_writeq(chan, ZYNQMP_DMA_SRC_START_LSB, addr);
+	addr = desc->dst_p;
+	zynqmp_dma_writeq(chan, ZYNQMP_DMA_DST_START_LSB, addr);
+}
+
+/**
+ * zynqmp_dma_desc_config_eod - Mark the descriptor as end descriptor
+ * @chan: ZynqMP DMA channel pointer
+ * @desc: Hw descriptor pointer
+ */
+static void zynqmp_dma_desc_config_eod(struct zynqmp_dma_chan *chan,
+				       void *desc)
+{
+	struct zynqmp_dma_desc_ll *hw = (struct zynqmp_dma_desc_ll *)desc;
+
+	hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_STOP;
+	hw++;
+	hw->ctrl |= ZYNQMP_DMA_DESC_CTRL_COMP_INT | ZYNQMP_DMA_DESC_CTRL_STOP;
+}
+
+/**
+ * zynqmp_dma_config_sg_ll_desc - Configure the linked list descriptor
+ * @chan: ZynqMP DMA channel pointer
+ * @sdesc: Hw descriptor pointer
+ * @src: Source buffer address
+ * @dst: Destination buffer address
+ * @len: Transfer length
+ * @prev: Previous hw descriptor pointer
+ */
+static void zynqmp_dma_config_sg_ll_desc(struct zynqmp_dma_chan *chan,
+				   struct zynqmp_dma_desc_ll *sdesc,
+				   dma_addr_t src, dma_addr_t dst, size_t len,
+				   struct zynqmp_dma_desc_ll *prev)
+{
+	struct zynqmp_dma_desc_ll *ddesc = sdesc + 1;
+
+	sdesc->size = ddesc->size = len;
+	sdesc->addr = src;
+	ddesc->addr = dst;
+
+	sdesc->ctrl = ddesc->ctrl = ZYNQMP_DMA_DESC_CTRL_SIZE_256;
+	if (chan->is_dmacoherent) {
+		sdesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT;
+		ddesc->ctrl |= ZYNQMP_DMA_DESC_CTRL_COHRNT;
+	}
+
+	if (prev) {
+		dma_addr_t addr = chan->desc_pool_p +
+			    ((uintptr_t)sdesc - (uintptr_t)chan->desc_pool_v);
+		ddesc = prev + 1;
+		prev->nxtdscraddr = addr;
+		ddesc->nxtdscraddr = addr + ZYNQMP_DMA_DESC_SIZE(chan);
+	}
+}
+
+/**
+ * zynqmp_dma_init - Initialize the channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_init(struct zynqmp_dma_chan *chan)
+{
+	u32 val;
+
+	writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+	val = readl(chan->regs + ZYNQMP_DMA_ISR);
+	writel(val, chan->regs + ZYNQMP_DMA_ISR);
+
+	if (chan->is_dmacoherent) {
+		val = ZYNQMP_DMA_AXCOHRNT;
+		val = (val & ~ZYNQMP_DMA_AXCACHE) |
+			(ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AXCACHE_OFST);
+		writel(val, chan->regs + ZYNQMP_DMA_DSCR_ATTR);
+	}
+
+	val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+	if (chan->is_dmacoherent) {
+		val = (val & ~ZYNQMP_DMA_ARCACHE) |
+			(ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_ARCACHE_OFST);
+		val = (val & ~ZYNQMP_DMA_AWCACHE) |
+			(ZYNQMP_DMA_AXCACHE_VAL << ZYNQMP_DMA_AWCACHE_OFST);
+	}
+	writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
+
+	/* Clearing the interrupt account rgisters */
+	val = readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT);
+	val = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+
+	chan->idle = true;
+}
+
+/**
+ * zynqmp_dma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor pointer
+ *
+ * Return: cookie value
+ */
+static dma_cookie_t zynqmp_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct zynqmp_dma_chan *chan = to_chan(tx->chan);
+	struct zynqmp_dma_desc_sw *desc, *new;
+	dma_cookie_t cookie;
+
+	new = tx_to_desc(tx);
+	spin_lock_bh(&chan->lock);
+	cookie = dma_cookie_assign(tx);
+
+	if (!list_empty(&chan->pending_list)) {
+		desc = list_last_entry(&chan->pending_list,
+				     struct zynqmp_dma_desc_sw, node);
+		if (!list_empty(&desc->tx_list))
+			desc = list_last_entry(&desc->tx_list,
+					       struct zynqmp_dma_desc_sw, node);
+		desc->src_v->nxtdscraddr = new->src_p;
+		desc->src_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP;
+		desc->dst_v->nxtdscraddr = new->dst_p;
+		desc->dst_v->ctrl &= ~ZYNQMP_DMA_DESC_CTRL_STOP;
+	}
+
+	list_add_tail(&new->node, &chan->pending_list);
+	spin_unlock_bh(&chan->lock);
+
+	return cookie;
+}
+
+/**
+ * zynqmp_dma_get_descriptor - Get the sw descriptor from the pool
+ * @chan: ZynqMP DMA channel pointer
+ *
+ * Return: The sw descriptor
+ */
+static struct zynqmp_dma_desc_sw *
+zynqmp_dma_get_descriptor(struct zynqmp_dma_chan *chan)
+{
+	struct zynqmp_dma_desc_sw *desc;
+
+	spin_lock_bh(&chan->lock);
+	desc = list_first_entry(&chan->free_list,
+				struct zynqmp_dma_desc_sw, node);
+	list_del(&desc->node);
+	spin_unlock_bh(&chan->lock);
+
+	INIT_LIST_HEAD(&desc->tx_list);
+	/* Clear the src and dst descriptor memory */
+	memset((void *)desc->src_v, 0, ZYNQMP_DMA_DESC_SIZE(chan));
+	memset((void *)desc->dst_v, 0, ZYNQMP_DMA_DESC_SIZE(chan));
+
+	return desc;
+}
+
+/**
+ * zynqmp_dma_free_descriptor - Issue pending transactions
+ * @chan: ZynqMP DMA channel pointer
+ * @sdesc: Transaction descriptor pointer
+ */
+static void zynqmp_dma_free_descriptor(struct zynqmp_dma_chan *chan,
+				 struct zynqmp_dma_desc_sw *sdesc)
+{
+	struct zynqmp_dma_desc_sw *child, *next;
+
+	chan->desc_free_cnt++;
+	list_add_tail(&sdesc->node, &chan->free_list);
+	list_for_each_entry_safe(child, next, &sdesc->tx_list, node) {
+		chan->desc_free_cnt++;
+		list_move_tail(&child->node, &chan->free_list);
+	}
+}
+
+/**
+ * zynqmp_dma_free_desc_list - Free descriptors list
+ * @chan: ZynqMP DMA channel pointer
+ * @list: List to parse and delete the descriptor
+ */
+static void zynqmp_dma_free_desc_list(struct zynqmp_dma_chan *chan,
+				      struct list_head *list)
+{
+	struct zynqmp_dma_desc_sw *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node)
+		zynqmp_dma_free_descriptor(chan, desc);
+}
+
+/**
+ * zynqmp_dma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: Number of descriptors on success and failure value on error
+ */
+static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+	struct zynqmp_dma_desc_sw *desc;
+	int i, ret;
+
+	ret = pm_runtime_get_sync(chan->dev);
+	if (ret < 0)
+		return ret;
+
+	chan->sw_desc_pool = kcalloc(ZYNQMP_DMA_NUM_DESCS, sizeof(*desc),
+				     GFP_KERNEL);
+	if (!chan->sw_desc_pool)
+		return -ENOMEM;
+
+	chan->idle = true;
+	chan->desc_free_cnt = ZYNQMP_DMA_NUM_DESCS;
+
+	INIT_LIST_HEAD(&chan->free_list);
+
+	for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) {
+		desc = chan->sw_desc_pool + i;
+		dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+		desc->async_tx.tx_submit = zynqmp_dma_tx_submit;
+		list_add_tail(&desc->node, &chan->free_list);
+	}
+
+	chan->desc_pool_v = dma_zalloc_coherent(chan->dev,
+				(2 * chan->desc_size * ZYNQMP_DMA_NUM_DESCS),
+				&chan->desc_pool_p, GFP_KERNEL);
+	if (!chan->desc_pool_v)
+		return -ENOMEM;
+
+	for (i = 0; i < ZYNQMP_DMA_NUM_DESCS; i++) {
+		desc = chan->sw_desc_pool + i;
+		desc->src_v = (struct zynqmp_dma_desc_ll *) (chan->desc_pool_v +
+					(i * ZYNQMP_DMA_DESC_SIZE(chan) * 2));
+		desc->dst_v = (struct zynqmp_dma_desc_ll *) (desc->src_v + 1);
+		desc->src_p = chan->desc_pool_p +
+				(i * ZYNQMP_DMA_DESC_SIZE(chan) * 2);
+		desc->dst_p = desc->src_p + ZYNQMP_DMA_DESC_SIZE(chan);
+	}
+
+	return ZYNQMP_DMA_NUM_DESCS;
+}
+
+/**
+ * zynqmp_dma_start - Start DMA channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_start(struct zynqmp_dma_chan *chan)
+{
+	writel(ZYNQMP_DMA_INT_EN_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IER);
+	writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE);
+	chan->idle = false;
+	writel(ZYNQMP_DMA_ENABLE, chan->regs + ZYNQMP_DMA_CTRL2);
+}
+
+/**
+ * zynqmp_dma_handle_ovfl_int - Process the overflow interrupt
+ * @chan: ZynqMP DMA channel pointer
+ * @status: Interrupt status value
+ */
+static void zynqmp_dma_handle_ovfl_int(struct zynqmp_dma_chan *chan, u32 status)
+{
+	if (status & ZYNQMP_DMA_BYTE_CNT_OVRFL)
+		writel(0, chan->regs + ZYNQMP_DMA_TOTAL_BYTE);
+	if (status & ZYNQMP_DMA_IRQ_DST_ACCT_ERR)
+		readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+	if (status & ZYNQMP_DMA_IRQ_SRC_ACCT_ERR)
+		readl(chan->regs + ZYNQMP_DMA_IRQ_SRC_ACCT);
+}
+
+static void zynqmp_dma_config(struct zynqmp_dma_chan *chan)
+{
+	u32 val;
+
+	val = readl(chan->regs + ZYNQMP_DMA_CTRL0);
+	val |= ZYNQMP_DMA_POINT_TYPE_SG;
+	writel(val, chan->regs + ZYNQMP_DMA_CTRL0);
+
+	val = readl(chan->regs + ZYNQMP_DMA_DATA_ATTR);
+	val = (val & ~ZYNQMP_DMA_ARLEN) |
+		(chan->src_burst_len << ZYNQMP_DMA_ARLEN_OFST);
+	val = (val & ~ZYNQMP_DMA_AWLEN) |
+		(chan->dst_burst_len << ZYNQMP_DMA_AWLEN_OFST);
+	writel(val, chan->regs + ZYNQMP_DMA_DATA_ATTR);
+}
+
+/**
+ * zynqmp_dma_device_config - Zynqmp dma device configuration
+ * @dchan: DMA channel
+ * @config: DMA device config
+ *
+ * Return: 0 always
+ */
+static int zynqmp_dma_device_config(struct dma_chan *dchan,
+				    struct dma_slave_config *config)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+	chan->src_burst_len = config->src_maxburst;
+	chan->dst_burst_len = config->dst_maxburst;
+
+	return 0;
+}
+
+/**
+ * zynqmp_dma_start_transfer - Initiate the new transfer
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_start_transfer(struct zynqmp_dma_chan *chan)
+{
+	struct zynqmp_dma_desc_sw *desc;
+
+	if (!chan->idle)
+		return;
+
+	zynqmp_dma_config(chan);
+
+	desc = list_first_entry_or_null(&chan->pending_list,
+					struct zynqmp_dma_desc_sw, node);
+	if (!desc)
+		return;
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	zynqmp_dma_update_desc_to_ctrlr(chan, desc);
+	zynqmp_dma_start(chan);
+}
+
+
+/**
+ * zynqmp_dma_chan_desc_cleanup - Cleanup the completed descriptors
+ * @chan: ZynqMP DMA channel
+ */
+static void zynqmp_dma_chan_desc_cleanup(struct zynqmp_dma_chan *chan)
+{
+	struct zynqmp_dma_desc_sw *desc, *next;
+
+	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		list_del(&desc->node);
+
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock(&chan->lock);
+			callback(callback_param);
+			spin_lock(&chan->lock);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		zynqmp_dma_free_descriptor(chan, desc);
+	}
+}
+
+/**
+ * zynqmp_dma_complete_descriptor - Mark the active descriptor as complete
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_complete_descriptor(struct zynqmp_dma_chan *chan)
+{
+	struct zynqmp_dma_desc_sw *desc;
+
+	desc = list_first_entry_or_null(&chan->active_list,
+					struct zynqmp_dma_desc_sw, node);
+	if (!desc)
+		return;
+	list_del(&desc->node);
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &chan->done_list);
+}
+
+/**
+ * zynqmp_dma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_issue_pending(struct dma_chan *dchan)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+	spin_lock_bh(&chan->lock);
+	zynqmp_dma_start_transfer(chan);
+	spin_unlock_bh(&chan->lock);
+}
+
+/**
+ * zynqmp_dma_free_descriptors - Free channel descriptors
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_free_descriptors(struct zynqmp_dma_chan *chan)
+{
+	zynqmp_dma_free_desc_list(chan, &chan->active_list);
+	zynqmp_dma_free_desc_list(chan, &chan->pending_list);
+	zynqmp_dma_free_desc_list(chan, &chan->done_list);
+}
+
+/**
+ * zynqmp_dma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel pointer
+ */
+static void zynqmp_dma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+	spin_lock_bh(&chan->lock);
+	zynqmp_dma_free_descriptors(chan);
+	spin_unlock_bh(&chan->lock);
+	dma_free_coherent(chan->dev,
+		(2 * ZYNQMP_DMA_DESC_SIZE(chan) * ZYNQMP_DMA_NUM_DESCS),
+		chan->desc_pool_v, chan->desc_pool_p);
+	kfree(chan->sw_desc_pool);
+	pm_runtime_mark_last_busy(chan->dev);
+	pm_runtime_put_autosuspend(chan->dev);
+}
+
+/**
+ * zynqmp_dma_reset - Reset the channel
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_reset(struct zynqmp_dma_chan *chan)
+{
+	writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+
+	zynqmp_dma_complete_descriptor(chan);
+	zynqmp_dma_chan_desc_cleanup(chan);
+	zynqmp_dma_free_descriptors(chan);
+	zynqmp_dma_init(chan);
+}
+
+/**
+ * zynqmp_dma_irq_handler - ZynqMP DMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the ZynqMP DMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t zynqmp_dma_irq_handler(int irq, void *data)
+{
+	struct zynqmp_dma_chan *chan = (struct zynqmp_dma_chan *)data;
+	u32 isr, imr, status;
+	irqreturn_t ret = IRQ_NONE;
+
+	isr = readl(chan->regs + ZYNQMP_DMA_ISR);
+	imr = readl(chan->regs + ZYNQMP_DMA_IMR);
+	status = isr & ~imr;
+
+	writel(isr, chan->regs + ZYNQMP_DMA_ISR);
+	if (status & ZYNQMP_DMA_INT_DONE) {
+		tasklet_schedule(&chan->tasklet);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status & ZYNQMP_DMA_DONE)
+		chan->idle = true;
+
+	if (status & ZYNQMP_DMA_INT_ERR) {
+		chan->err = true;
+		tasklet_schedule(&chan->tasklet);
+		dev_err(chan->dev, "Channel %p has errors\n", chan);
+		ret = IRQ_HANDLED;
+	}
+
+	if (status & ZYNQMP_DMA_INT_OVRFL) {
+		zynqmp_dma_handle_ovfl_int(chan, status);
+		dev_dbg(chan->dev, "Channel %p overflow interrupt\n", chan);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+/**
+ * zynqmp_dma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the ZynqMP DMA channel structure
+ */
+static void zynqmp_dma_do_tasklet(unsigned long data)
+{
+	struct zynqmp_dma_chan *chan = (struct zynqmp_dma_chan *)data;
+	u32 count;
+
+	spin_lock(&chan->lock);
+
+	if (chan->err) {
+		zynqmp_dma_reset(chan);
+		chan->err = false;
+		goto unlock;
+	}
+
+	count = readl(chan->regs + ZYNQMP_DMA_IRQ_DST_ACCT);
+
+	while (count) {
+		zynqmp_dma_complete_descriptor(chan);
+		zynqmp_dma_chan_desc_cleanup(chan);
+		count--;
+	}
+
+	if (chan->idle)
+		zynqmp_dma_start_transfer(chan);
+
+unlock:
+	spin_unlock(&chan->lock);
+}
+
+/**
+ * zynqmp_dma_device_terminate_all - Aborts all transfers on a channel
+ * @dchan: DMA channel pointer
+ *
+ * Return: Always '0'
+ */
+static int zynqmp_dma_device_terminate_all(struct dma_chan *dchan)
+{
+	struct zynqmp_dma_chan *chan = to_chan(dchan);
+
+	spin_lock_bh(&chan->lock);
+	writel(ZYNQMP_DMA_IDS_DEFAULT_MASK, chan->regs + ZYNQMP_DMA_IDS);
+	zynqmp_dma_free_descriptors(chan);
+	spin_unlock_bh(&chan->lock);
+
+	return 0;
+}
+
+/**
+ * zynqmp_dma_prep_memcpy - prepare descriptors for memcpy transaction
+ * @dchan: DMA channel
+ * @dma_dst: Destination buffer address
+ * @dma_src: Source buffer address
+ * @len: Transfer length
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *zynqmp_dma_prep_memcpy(
+				struct dma_chan *dchan, dma_addr_t dma_dst,
+				dma_addr_t dma_src, size_t len, ulong flags)
+{
+	struct zynqmp_dma_chan *chan;
+	struct zynqmp_dma_desc_sw *new, *first = NULL;
+	void *desc = NULL, *prev = NULL;
+	size_t copy;
+	u32 desc_cnt;
+
+	chan = to_chan(dchan);
+
+	desc_cnt = DIV_ROUND_UP(len, ZYNQMP_DMA_MAX_TRANS_LEN);
+
+	spin_lock_bh(&chan->lock);
+	if (desc_cnt > chan->desc_free_cnt) {
+		spin_unlock_bh(&chan->lock);
+		dev_dbg(chan->dev, "chan %p descs are not available\n", chan);
+		return NULL;
+	}
+	chan->desc_free_cnt = chan->desc_free_cnt - desc_cnt;
+	spin_unlock_bh(&chan->lock);
+
+	do {
+		/* Allocate and populate the descriptor */
+		new = zynqmp_dma_get_descriptor(chan);
+
+		copy = min_t(size_t, len, ZYNQMP_DMA_MAX_TRANS_LEN);
+		desc = (struct zynqmp_dma_desc_ll *)new->src_v;
+		zynqmp_dma_config_sg_ll_desc(chan, desc, dma_src,
+					     dma_dst, copy, prev);
+		prev = desc;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+		if (!first)
+			first = new;
+		else
+			list_add_tail(&new->node, &first->tx_list);
+	} while (len);
+
+	zynqmp_dma_desc_config_eod(chan, desc);
+	async_tx_ack(&first->async_tx);
+	first->async_tx.flags = flags;
+	return &first->async_tx;
+}
+
+/**
+ * zynqmp_dma_chan_remove - Channel remove function
+ * @chan: ZynqMP DMA channel pointer
+ */
+static void zynqmp_dma_chan_remove(struct zynqmp_dma_chan *chan)
+{
+	if (!chan)
+		return;
+
+	if (chan->irq)
+		devm_free_irq(chan->zdev->dev, chan->irq, chan);
+	tasklet_kill(&chan->tasklet);
+	list_del(&chan->common.device_node);
+}
+
+/**
+ * zynqmp_dma_chan_probe - Per Channel Probing
+ * @zdev: Driver specific device structure
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int zynqmp_dma_chan_probe(struct zynqmp_dma_device *zdev,
+			   struct platform_device *pdev)
+{
+	struct zynqmp_dma_chan *chan;
+	struct resource *res;
+	struct device_node *node = pdev->dev.of_node;
+	int err;
+
+	chan = devm_kzalloc(zdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+	chan->dev = zdev->dev;
+	chan->zdev = zdev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chan->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(chan->regs))
+		return PTR_ERR(chan->regs);
+
+	chan->bus_width = ZYNQMP_DMA_BUS_WIDTH_64;
+	chan->dst_burst_len = ZYNQMP_DMA_AWLEN_RST_VAL;
+	chan->src_burst_len = ZYNQMP_DMA_ARLEN_RST_VAL;
+	err = of_property_read_u32(node, "xlnx,bus-width", &chan->bus_width);
+	if (err < 0) {
+		dev_err(&pdev->dev, "missing xlnx,bus-width property\n");
+		return err;
+	}
+
+	if (chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_64 &&
+	    chan->bus_width != ZYNQMP_DMA_BUS_WIDTH_128) {
+		dev_err(zdev->dev, "invalid bus-width value");
+		return -EINVAL;
+	}
+
+	chan->is_dmacoherent =  of_property_read_bool(node, "dma-coherent");
+	zdev->chan = chan;
+	tasklet_init(&chan->tasklet, zynqmp_dma_do_tasklet, (ulong)chan);
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->active_list);
+	INIT_LIST_HEAD(&chan->pending_list);
+	INIT_LIST_HEAD(&chan->done_list);
+	INIT_LIST_HEAD(&chan->free_list);
+
+	dma_cookie_init(&chan->common);
+	chan->common.device = &zdev->common;
+	list_add_tail(&chan->common.device_node, &zdev->common.channels);
+
+	zynqmp_dma_init(chan);
+	chan->irq = platform_get_irq(pdev, 0);
+	if (chan->irq < 0)
+		return -ENXIO;
+	err = devm_request_irq(&pdev->dev, chan->irq, zynqmp_dma_irq_handler, 0,
+			       "zynqmp-dma", chan);
+	if (err)
+		return err;
+
+	chan->desc_size = sizeof(struct zynqmp_dma_desc_ll);
+	chan->idle = true;
+	return 0;
+}
+
+/**
+ * of_zynqmp_dma_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_zynqmp_dma_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct zynqmp_dma_device *zdev = ofdma->of_dma_data;
+
+	return dma_get_slave_channel(&zdev->chan->common);
+}
+
+/**
+ * zynqmp_dma_suspend - Suspend method for the driver
+ * @dev:	Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 on success and failure value on error
+ */
+static int __maybe_unused zynqmp_dma_suspend(struct device *dev)
+{
+	if (!device_may_wakeup(dev))
+		return pm_runtime_force_suspend(dev);
+
+	return 0;
+}
+
+/**
+ * zynqmp_dma_resume - Resume from suspend
+ * @dev:	Address of the device structure
+ *
+ * Resume operation after suspend.
+ * Return: 0 on success and failure value on error
+ */
+static int __maybe_unused zynqmp_dma_resume(struct device *dev)
+{
+	if (!device_may_wakeup(dev))
+		return pm_runtime_force_resume(dev);
+
+	return 0;
+}
+
+/**
+ * zynqmp_dma_runtime_suspend - Runtime suspend method for the driver
+ * @dev:	Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 always
+ */
+static int __maybe_unused zynqmp_dma_runtime_suspend(struct device *dev)
+{
+	struct zynqmp_dma_device *zdev = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(zdev->clk_main);
+	clk_disable_unprepare(zdev->clk_apb);
+
+	return 0;
+}
+
+/**
+ * zynqmp_dma_runtime_resume - Runtime suspend method for the driver
+ * @dev:	Address of the device structure
+ *
+ * Put the driver into low power mode.
+ * Return: 0 always
+ */
+static int __maybe_unused zynqmp_dma_runtime_resume(struct device *dev)
+{
+	struct zynqmp_dma_device *zdev = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(zdev->clk_main);
+	if (err) {
+		dev_err(dev, "Unable to enable main clock.\n");
+		return err;
+	}
+
+	err = clk_prepare_enable(zdev->clk_apb);
+	if (err) {
+		dev_err(dev, "Unable to enable apb clock.\n");
+		clk_disable_unprepare(zdev->clk_main);
+		return err;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops zynqmp_dma_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(zynqmp_dma_suspend, zynqmp_dma_resume)
+	SET_RUNTIME_PM_OPS(zynqmp_dma_runtime_suspend,
+			   zynqmp_dma_runtime_resume, NULL)
+};
+
+/**
+ * zynqmp_dma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int zynqmp_dma_probe(struct platform_device *pdev)
+{
+	struct zynqmp_dma_device *zdev;
+	struct dma_device *p;
+	int ret;
+
+	zdev = devm_kzalloc(&pdev->dev, sizeof(*zdev), GFP_KERNEL);
+	if (!zdev)
+		return -ENOMEM;
+
+	zdev->dev = &pdev->dev;
+	INIT_LIST_HEAD(&zdev->common.channels);
+
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+	dma_cap_set(DMA_MEMCPY, zdev->common.cap_mask);
+
+	p = &zdev->common;
+	p->device_prep_dma_memcpy = zynqmp_dma_prep_memcpy;
+	p->device_terminate_all = zynqmp_dma_device_terminate_all;
+	p->device_issue_pending = zynqmp_dma_issue_pending;
+	p->device_alloc_chan_resources = zynqmp_dma_alloc_chan_resources;
+	p->device_free_chan_resources = zynqmp_dma_free_chan_resources;
+	p->device_tx_status = dma_cookie_status;
+	p->device_config = zynqmp_dma_device_config;
+	p->dev = &pdev->dev;
+
+	zdev->clk_main = devm_clk_get(&pdev->dev, "clk_main");
+	if (IS_ERR(zdev->clk_main)) {
+		dev_err(&pdev->dev, "main clock not found.\n");
+		return PTR_ERR(zdev->clk_main);
+	}
+
+	zdev->clk_apb = devm_clk_get(&pdev->dev, "clk_apb");
+	if (IS_ERR(zdev->clk_apb)) {
+		dev_err(&pdev->dev, "apb clock not found.\n");
+		return PTR_ERR(zdev->clk_apb);
+	}
+
+	platform_set_drvdata(pdev, zdev);
+	pm_runtime_set_autosuspend_delay(zdev->dev, ZDMA_PM_TIMEOUT);
+	pm_runtime_use_autosuspend(zdev->dev);
+	pm_runtime_enable(zdev->dev);
+	pm_runtime_get_sync(zdev->dev);
+	if (!pm_runtime_enabled(zdev->dev)) {
+		ret = zynqmp_dma_runtime_resume(zdev->dev);
+		if (ret)
+			return ret;
+	}
+
+	ret = zynqmp_dma_chan_probe(zdev, pdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Probing channel failed\n");
+		goto err_disable_pm;
+	}
+
+	p->dst_addr_widths = BIT(zdev->chan->bus_width / 8);
+	p->src_addr_widths = BIT(zdev->chan->bus_width / 8);
+
+	dma_async_device_register(&zdev->common);
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+					 of_zynqmp_dma_xlate, zdev);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+		dma_async_device_unregister(&zdev->common);
+		goto free_chan_resources;
+	}
+
+	pm_runtime_mark_last_busy(zdev->dev);
+	pm_runtime_put_sync_autosuspend(zdev->dev);
+
+	dev_info(&pdev->dev, "ZynqMP DMA driver Probe success\n");
+
+	return 0;
+
+free_chan_resources:
+	zynqmp_dma_chan_remove(zdev->chan);
+err_disable_pm:
+	if (!pm_runtime_enabled(zdev->dev))
+		zynqmp_dma_runtime_suspend(zdev->dev);
+	pm_runtime_disable(zdev->dev);
+	return ret;
+}
+
+/**
+ * zynqmp_dma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int zynqmp_dma_remove(struct platform_device *pdev)
+{
+	struct zynqmp_dma_device *zdev = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&zdev->common);
+
+	zynqmp_dma_chan_remove(zdev->chan);
+	pm_runtime_disable(zdev->dev);
+	if (!pm_runtime_enabled(zdev->dev))
+		zynqmp_dma_runtime_suspend(zdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id zynqmp_dma_of_match[] = {
+	{ .compatible = "xlnx,zynqmp-dma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, zynqmp_dma_of_match);
+
+static struct platform_driver zynqmp_dma_driver = {
+	.driver = {
+		.name = "xilinx-zynqmp-dma",
+		.of_match_table = zynqmp_dma_of_match,
+		.pm = &zynqmp_dma_dev_pm_ops,
+	},
+	.probe = zynqmp_dma_probe,
+	.remove = zynqmp_dma_remove,
+};
+
+module_platform_driver(zynqmp_dma_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx ZynqMP DMA driver");
diff --git a/drivers/dma/zx_dma.c b/drivers/dma/zx_dma.c
new file mode 100644
index 0000000..2571bc7
--- /dev/null
+++ b/drivers/dma/zx_dma.c
@@ -0,0 +1,952 @@
+/*
+ * Copyright 2015 Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME		"zx-dma"
+#define DMA_ALIGN		4
+#define DMA_MAX_SIZE		(0x10000 - 512)
+#define LLI_BLOCK_SIZE		(4 * PAGE_SIZE)
+
+#define REG_ZX_SRC_ADDR			0x00
+#define REG_ZX_DST_ADDR			0x04
+#define REG_ZX_TX_X_COUNT		0x08
+#define REG_ZX_TX_ZY_COUNT		0x0c
+#define REG_ZX_SRC_ZY_STEP		0x10
+#define REG_ZX_DST_ZY_STEP		0x14
+#define REG_ZX_LLI_ADDR			0x1c
+#define REG_ZX_CTRL			0x20
+#define REG_ZX_TC_IRQ			0x800
+#define REG_ZX_SRC_ERR_IRQ		0x804
+#define REG_ZX_DST_ERR_IRQ		0x808
+#define REG_ZX_CFG_ERR_IRQ		0x80c
+#define REG_ZX_TC_IRQ_RAW		0x810
+#define REG_ZX_SRC_ERR_IRQ_RAW		0x814
+#define REG_ZX_DST_ERR_IRQ_RAW		0x818
+#define REG_ZX_CFG_ERR_IRQ_RAW		0x81c
+#define REG_ZX_STATUS			0x820
+#define REG_ZX_DMA_GRP_PRIO		0x824
+#define REG_ZX_DMA_ARB			0x828
+
+#define ZX_FORCE_CLOSE			BIT(31)
+#define ZX_DST_BURST_WIDTH(x)		(((x) & 0x7) << 13)
+#define ZX_MAX_BURST_LEN		16
+#define ZX_SRC_BURST_LEN(x)		(((x) & 0xf) << 9)
+#define ZX_SRC_BURST_WIDTH(x)		(((x) & 0x7) << 6)
+#define ZX_IRQ_ENABLE_ALL		(3 << 4)
+#define ZX_DST_FIFO_MODE		BIT(3)
+#define ZX_SRC_FIFO_MODE		BIT(2)
+#define ZX_SOFT_REQ			BIT(1)
+#define ZX_CH_ENABLE			BIT(0)
+
+#define ZX_DMA_BUSWIDTHS \
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+enum zx_dma_burst_width {
+	ZX_DMA_WIDTH_8BIT	= 0,
+	ZX_DMA_WIDTH_16BIT	= 1,
+	ZX_DMA_WIDTH_32BIT	= 2,
+	ZX_DMA_WIDTH_64BIT	= 3,
+};
+
+struct zx_desc_hw {
+	u32 saddr;
+	u32 daddr;
+	u32 src_x;
+	u32 src_zy;
+	u32 src_zy_step;
+	u32 dst_zy_step;
+	u32 reserved1;
+	u32 lli;
+	u32 ctr;
+	u32 reserved[7]; /* pack as hardware registers region size */
+} __aligned(32);
+
+struct zx_dma_desc_sw {
+	struct virt_dma_desc	vd;
+	dma_addr_t		desc_hw_lli;
+	size_t			desc_num;
+	size_t			size;
+	struct zx_desc_hw	*desc_hw;
+};
+
+struct zx_dma_phy;
+
+struct zx_dma_chan {
+	struct dma_slave_config slave_cfg;
+	int			id; /* Request phy chan id */
+	u32			ccfg;
+	u32			cyclic;
+	struct virt_dma_chan	vc;
+	struct zx_dma_phy	*phy;
+	struct list_head	node;
+	dma_addr_t		dev_addr;
+	enum dma_status		status;
+};
+
+struct zx_dma_phy {
+	u32			idx;
+	void __iomem		*base;
+	struct zx_dma_chan	*vchan;
+	struct zx_dma_desc_sw	*ds_run;
+	struct zx_dma_desc_sw	*ds_done;
+};
+
+struct zx_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	spinlock_t		lock; /* lock for ch and phy */
+	struct list_head	chan_pending;
+	struct zx_dma_phy	*phy;
+	struct zx_dma_chan	*chans;
+	struct clk		*clk;
+	struct dma_pool		*pool;
+	u32			dma_channels;
+	u32			dma_requests;
+	int 			irq;
+};
+
+#define to_zx_dma(dmadev) container_of(dmadev, struct zx_dma_dev, slave)
+
+static struct zx_dma_chan *to_zx_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct zx_dma_chan, vc.chan);
+}
+
+static void zx_dma_terminate_chan(struct zx_dma_phy *phy, struct zx_dma_dev *d)
+{
+	u32 val = 0;
+
+	val = readl_relaxed(phy->base + REG_ZX_CTRL);
+	val &= ~ZX_CH_ENABLE;
+	val |= ZX_FORCE_CLOSE;
+	writel_relaxed(val, phy->base + REG_ZX_CTRL);
+
+	val = 0x1 << phy->idx;
+	writel_relaxed(val, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+}
+
+static void zx_dma_set_desc(struct zx_dma_phy *phy, struct zx_desc_hw *hw)
+{
+	writel_relaxed(hw->saddr, phy->base + REG_ZX_SRC_ADDR);
+	writel_relaxed(hw->daddr, phy->base + REG_ZX_DST_ADDR);
+	writel_relaxed(hw->src_x, phy->base + REG_ZX_TX_X_COUNT);
+	writel_relaxed(0, phy->base + REG_ZX_TX_ZY_COUNT);
+	writel_relaxed(0, phy->base + REG_ZX_SRC_ZY_STEP);
+	writel_relaxed(0, phy->base + REG_ZX_DST_ZY_STEP);
+	writel_relaxed(hw->lli, phy->base + REG_ZX_LLI_ADDR);
+	writel_relaxed(hw->ctr, phy->base + REG_ZX_CTRL);
+}
+
+static u32 zx_dma_get_curr_lli(struct zx_dma_phy *phy)
+{
+	return readl_relaxed(phy->base + REG_ZX_LLI_ADDR);
+}
+
+static u32 zx_dma_get_chan_stat(struct zx_dma_dev *d)
+{
+	return readl_relaxed(d->base + REG_ZX_STATUS);
+}
+
+static void zx_dma_init_state(struct zx_dma_dev *d)
+{
+	/* set same priority */
+	writel_relaxed(0x0, d->base + REG_ZX_DMA_ARB);
+	/* clear all irq */
+	writel_relaxed(0xffffffff, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+}
+
+static int zx_dma_start_txd(struct zx_dma_chan *c)
+{
+	struct zx_dma_dev *d = to_zx_dma(c->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	if (!c->phy)
+		return -EAGAIN;
+
+	if (BIT(c->phy->idx) & zx_dma_get_chan_stat(d))
+		return -EAGAIN;
+
+	if (vd) {
+		struct zx_dma_desc_sw *ds =
+			container_of(vd, struct zx_dma_desc_sw, vd);
+		/*
+		 * fetch and remove request from vc->desc_issued
+		 * so vc->desc_issued only contains desc pending
+		 */
+		list_del(&ds->vd.node);
+		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
+		/* start dma */
+		zx_dma_set_desc(c->phy, ds->desc_hw);
+		return 0;
+	}
+	c->phy->ds_done = NULL;
+	c->phy->ds_run = NULL;
+	return -EAGAIN;
+}
+
+static void zx_dma_task(struct zx_dma_dev *d)
+{
+	struct zx_dma_phy *p;
+	struct zx_dma_chan *c, *cn;
+	unsigned pch, pch_alloc = 0;
+	unsigned long flags;
+
+	/* check new dma request of running channel in vc->desc_issued */
+	list_for_each_entry_safe(c, cn, &d->slave.channels,
+				 vc.chan.device_node) {
+		spin_lock_irqsave(&c->vc.lock, flags);
+		p = c->phy;
+		if (p && p->ds_done && zx_dma_start_txd(c)) {
+			/* No current txd associated with this channel */
+			dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+			/* Mark this channel free */
+			c->phy = NULL;
+			p->vchan = NULL;
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+	}
+
+	/* check new channel request in d->chan_pending */
+	spin_lock_irqsave(&d->lock, flags);
+	while (!list_empty(&d->chan_pending)) {
+		c = list_first_entry(&d->chan_pending,
+				     struct zx_dma_chan, node);
+		p = &d->phy[c->id];
+		if (!p->vchan) {
+			/* remove from d->chan_pending */
+			list_del_init(&c->node);
+			pch_alloc |= 1 << c->id;
+			/* Mark this channel allocated */
+			p->vchan = c;
+			c->phy = p;
+		} else {
+			dev_dbg(d->slave.dev, "pchan %u: busy!\n", c->id);
+		}
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+			if (c) {
+				spin_lock_irqsave(&c->vc.lock, flags);
+				zx_dma_start_txd(c);
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+		}
+	}
+}
+
+static irqreturn_t zx_dma_int_handler(int irq, void *dev_id)
+{
+	struct zx_dma_dev *d = (struct zx_dma_dev *)dev_id;
+	struct zx_dma_phy *p;
+	struct zx_dma_chan *c;
+	u32 tc = readl_relaxed(d->base + REG_ZX_TC_IRQ);
+	u32 serr = readl_relaxed(d->base + REG_ZX_SRC_ERR_IRQ);
+	u32 derr = readl_relaxed(d->base + REG_ZX_DST_ERR_IRQ);
+	u32 cfg = readl_relaxed(d->base + REG_ZX_CFG_ERR_IRQ);
+	u32 i, irq_chan = 0, task = 0;
+
+	while (tc) {
+		i = __ffs(tc);
+		tc &= ~BIT(i);
+		p = &d->phy[i];
+		c = p->vchan;
+		if (c) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&c->vc.lock, flags);
+			if (c->cyclic) {
+				vchan_cyclic_callback(&p->ds_run->vd);
+			} else {
+				vchan_cookie_complete(&p->ds_run->vd);
+				p->ds_done = p->ds_run;
+				task = 1;
+			}
+			spin_unlock_irqrestore(&c->vc.lock, flags);
+			irq_chan |= BIT(i);
+		}
+	}
+
+	if (serr || derr || cfg)
+		dev_warn(d->slave.dev, "DMA ERR src 0x%x, dst 0x%x, cfg 0x%x\n",
+			 serr, derr, cfg);
+
+	writel_relaxed(irq_chan, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(serr, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(derr, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(cfg, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+
+	if (task)
+		zx_dma_task(d);
+	return IRQ_HANDLED;
+}
+
+static void zx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+	c->ccfg = 0;
+}
+
+static enum dma_status zx_dma_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *state)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_COMPLETE || !state)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+	ret = c->status;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		bytes = container_of(vd, struct zx_dma_desc_sw, vd)->size;
+	} else if ((!p) || (!p->ds_run)) {
+		bytes = 0;
+	} else {
+		struct zx_dma_desc_sw *ds = p->ds_run;
+		u32 clli = 0, index = 0;
+
+		bytes = 0;
+		clli = zx_dma_get_curr_lli(p);
+		index = (clli - ds->desc_hw_lli) /
+				sizeof(struct zx_desc_hw) + 1;
+		for (; index < ds->desc_num; index++) {
+			bytes += ds->desc_hw[index].src_x;
+			/* end of lli */
+			if (!ds->desc_hw[index].lli)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	dma_set_residue(state, bytes);
+	return ret;
+}
+
+static void zx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	unsigned long flags;
+	int issue = 0;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	/* add request to vc->desc_issued */
+	if (vchan_issue_pending(&c->vc)) {
+		spin_lock(&d->lock);
+		if (!c->phy && list_empty(&c->node)) {
+			/* if new channel, add chan_pending */
+			list_add_tail(&c->node, &d->chan_pending);
+			issue = 1;
+			dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+		}
+		spin_unlock(&d->lock);
+	} else {
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	if (issue)
+		zx_dma_task(d);
+}
+
+static void zx_dma_fill_desc(struct zx_dma_desc_sw *ds, dma_addr_t dst,
+			     dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+	if ((num + 1) < ds->desc_num)
+		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+			sizeof(struct zx_desc_hw);
+	ds->desc_hw[num].saddr = src;
+	ds->desc_hw[num].daddr = dst;
+	ds->desc_hw[num].src_x = len;
+	ds->desc_hw[num].ctr = ccfg;
+}
+
+static struct zx_dma_desc_sw *zx_alloc_desc_resource(int num,
+						     struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	int lli_limit = LLI_BLOCK_SIZE / sizeof(struct zx_desc_hw);
+
+	if (num > lli_limit) {
+		dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
+			&c->vc, num, lli_limit);
+		return NULL;
+	}
+
+	ds = kzalloc(sizeof(*ds), GFP_ATOMIC);
+	if (!ds)
+		return NULL;
+
+	ds->desc_hw = dma_pool_zalloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
+	if (!ds->desc_hw) {
+		dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
+		kfree(ds);
+		return NULL;
+	}
+	ds->desc_num = num;
+	return ds;
+}
+
+static enum zx_dma_burst_width zx_dma_burst_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return ffs(width) - 1;
+	default:
+		return ZX_DMA_WIDTH_32BIT;
+	}
+}
+
+static int zx_pre_config(struct zx_dma_chan *c, enum dma_transfer_direction dir)
+{
+	struct dma_slave_config *cfg = &c->slave_cfg;
+	enum zx_dma_burst_width src_width;
+	enum zx_dma_burst_width dst_width;
+	u32 maxburst = 0;
+
+	switch (dir) {
+	case DMA_MEM_TO_MEM:
+		c->ccfg = ZX_CH_ENABLE | ZX_SOFT_REQ
+			| ZX_SRC_BURST_LEN(ZX_MAX_BURST_LEN - 1)
+			| ZX_SRC_BURST_WIDTH(ZX_DMA_WIDTH_32BIT)
+			| ZX_DST_BURST_WIDTH(ZX_DMA_WIDTH_32BIT);
+		break;
+	case DMA_MEM_TO_DEV:
+		c->dev_addr = cfg->dst_addr;
+		/* dst len is calculated from src width, len and dst width.
+		 * We need make sure dst len not exceed MAX LEN.
+		 * Trailing single transaction that does not fill a full
+		 * burst also require identical src/dst data width.
+		 */
+		dst_width = zx_dma_burst_width(cfg->dst_addr_width);
+		maxburst = cfg->dst_maxburst;
+		maxburst = maxburst < ZX_MAX_BURST_LEN ?
+				maxburst : ZX_MAX_BURST_LEN;
+		c->ccfg = ZX_DST_FIFO_MODE | ZX_CH_ENABLE
+			| ZX_SRC_BURST_LEN(maxburst - 1)
+			| ZX_SRC_BURST_WIDTH(dst_width)
+			| ZX_DST_BURST_WIDTH(dst_width);
+		break;
+	case DMA_DEV_TO_MEM:
+		c->dev_addr = cfg->src_addr;
+		src_width = zx_dma_burst_width(cfg->src_addr_width);
+		maxburst = cfg->src_maxburst;
+		maxburst = maxburst < ZX_MAX_BURST_LEN ?
+				maxburst : ZX_MAX_BURST_LEN;
+		c->ccfg = ZX_SRC_FIFO_MODE | ZX_CH_ENABLE
+			| ZX_SRC_BURST_LEN(maxburst - 1)
+			| ZX_SRC_BURST_WIDTH(src_width)
+			| ZX_DST_BURST_WIDTH(src_width);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	size_t copy = 0;
+	int num = 0;
+
+	if (!len)
+		return NULL;
+
+	if (zx_pre_config(c, DMA_MEM_TO_MEM))
+		return NULL;
+
+	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+
+	ds = zx_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	ds->size = len;
+	num = 0;
+
+	do {
+		copy = min_t(size_t, len, DMA_MAX_SIZE);
+		zx_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+		src += copy;
+		dst += copy;
+		len -= copy;
+	} while (len);
+
+	c->cyclic = 0;
+	ds->desc_hw[num - 1].lli = 0;	/* end of link */
+	ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	struct scatterlist *sg;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = sglen, i;
+
+	if (!sgl)
+		return NULL;
+
+	if (zx_pre_config(c, dir))
+		return NULL;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		avail = sg_dma_len(sg);
+		if (avail > DMA_MAX_SIZE)
+			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+	}
+
+	ds = zx_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	c->cyclic = 0;
+	num = 0;
+	for_each_sg(sgl, sg, sglen, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		total += avail;
+
+		do {
+			len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = addr;
+				dst = c->dev_addr;
+			} else if (dir == DMA_DEV_TO_MEM) {
+				src = c->dev_addr;
+				dst = addr;
+			}
+
+			zx_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	ds->desc_hw[num - 1].lli = 0;	/* end of link */
+	ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
+	ds->size = total;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction dir,
+		unsigned long flags)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	dma_addr_t src = 0, dst = 0;
+	int num_periods = buf_len / period_len;
+	int buf = 0, num = 0;
+
+	if (period_len > DMA_MAX_SIZE) {
+		dev_err(chan->device->dev, "maximum period size exceeded\n");
+		return NULL;
+	}
+
+	if (zx_pre_config(c, dir))
+		return NULL;
+
+	ds = zx_alloc_desc_resource(num_periods, chan);
+	if (!ds)
+		return NULL;
+	c->cyclic = 1;
+
+	while (buf < buf_len) {
+		if (dir == DMA_MEM_TO_DEV) {
+			src = dma_addr;
+			dst = c->dev_addr;
+		} else if (dir == DMA_DEV_TO_MEM) {
+			src = c->dev_addr;
+			dst = dma_addr;
+		}
+		zx_dma_fill_desc(ds, dst, src, period_len, num++,
+				 c->ccfg | ZX_IRQ_ENABLE_ALL);
+		dma_addr += period_len;
+		buf += period_len;
+	}
+
+	ds->desc_hw[num - 1].lli = ds->desc_hw_lli;
+	ds->size = buf_len;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int zx_dma_config(struct dma_chan *chan,
+			 struct dma_slave_config *cfg)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+
+	if (!cfg)
+		return -EINVAL;
+
+	memcpy(&c->slave_cfg, cfg, sizeof(*cfg));
+
+	return 0;
+}
+
+static int zx_dma_terminate_all(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	struct zx_dma_phy *p = c->phy;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+	/* Prevent this channel being scheduled */
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	/* Clear the tx descriptor lists */
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vchan_get_all_descriptors(&c->vc, &head);
+	if (p) {
+		/* vchan is assigned to a pchan - stop the channel */
+		zx_dma_terminate_chan(p, d);
+		c->phy = NULL;
+		p->vchan = NULL;
+		p->ds_run = NULL;
+		p->ds_done = NULL;
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static int zx_dma_transfer_pause(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	u32 val = 0;
+
+	val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
+	val &= ~ZX_CH_ENABLE;
+	writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
+
+	return 0;
+}
+
+static int zx_dma_transfer_resume(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	u32 val = 0;
+
+	val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
+	val |= ZX_CH_ENABLE;
+	writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
+
+	return 0;
+}
+
+static void zx_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct zx_dma_desc_sw *ds =
+		container_of(vd, struct zx_dma_desc_sw, vd);
+	struct zx_dma_dev *d = to_zx_dma(vd->tx.chan->device);
+
+	dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
+	kfree(ds);
+}
+
+static const struct of_device_id zx6702_dma_dt_ids[] = {
+	{ .compatible = "zte,zx296702-dma", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, zx6702_dma_dt_ids);
+
+static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+					       struct of_dma *ofdma)
+{
+	struct zx_dma_dev *d = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+	struct dma_chan *chan;
+	struct zx_dma_chan *c;
+
+	if (request >= d->dma_requests)
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan) {
+		dev_err(d->slave.dev, "get channel fail in %s.\n", __func__);
+		return NULL;
+	}
+	c = to_zx_chan(chan);
+	c->id = request;
+	dev_info(d->slave.dev, "zx_dma: pchan %u: alloc vchan %p\n",
+		 c->id, &c->vc);
+	return chan;
+}
+
+static int zx_dma_probe(struct platform_device *op)
+{
+	struct zx_dma_dev *d;
+	struct resource *iores;
+	int i, ret = 0;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(d->base))
+		return PTR_ERR(d->base);
+
+	of_property_read_u32((&op->dev)->of_node,
+			     "dma-channels", &d->dma_channels);
+	of_property_read_u32((&op->dev)->of_node,
+			     "dma-requests", &d->dma_requests);
+	if (!d->dma_requests || !d->dma_channels)
+		return -EINVAL;
+
+	d->clk = devm_clk_get(&op->dev, NULL);
+	if (IS_ERR(d->clk)) {
+		dev_err(&op->dev, "no dma clk\n");
+		return PTR_ERR(d->clk);
+	}
+
+	d->irq = platform_get_irq(op, 0);
+	ret = devm_request_irq(&op->dev, d->irq, zx_dma_int_handler,
+			       0, DRIVER_NAME, d);
+	if (ret)
+		return ret;
+
+	/* A DMA memory pool for LLIs, align on 32-byte boundary */
+	d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
+			LLI_BLOCK_SIZE, 32, 0);
+	if (!d->pool)
+		return -ENOMEM;
+
+	/* init phy channel */
+	d->phy = devm_kcalloc(&op->dev,
+		d->dma_channels, sizeof(struct zx_dma_phy), GFP_KERNEL);
+	if (!d->phy)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_channels; i++) {
+		struct zx_dma_phy *p = &d->phy[i];
+
+		p->idx = i;
+		p->base = d->base + i * 0x40;
+	}
+
+	INIT_LIST_HEAD(&d->slave.channels);
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, d->slave.cap_mask);
+	d->slave.dev = &op->dev;
+	d->slave.device_free_chan_resources = zx_dma_free_chan_resources;
+	d->slave.device_tx_status = zx_dma_tx_status;
+	d->slave.device_prep_dma_memcpy = zx_dma_prep_memcpy;
+	d->slave.device_prep_slave_sg = zx_dma_prep_slave_sg;
+	d->slave.device_prep_dma_cyclic = zx_dma_prep_dma_cyclic;
+	d->slave.device_issue_pending = zx_dma_issue_pending;
+	d->slave.device_config = zx_dma_config;
+	d->slave.device_terminate_all = zx_dma_terminate_all;
+	d->slave.device_pause = zx_dma_transfer_pause;
+	d->slave.device_resume = zx_dma_transfer_resume;
+	d->slave.copy_align = DMA_ALIGN;
+	d->slave.src_addr_widths = ZX_DMA_BUSWIDTHS;
+	d->slave.dst_addr_widths = ZX_DMA_BUSWIDTHS;
+	d->slave.directions = BIT(DMA_MEM_TO_MEM) | BIT(DMA_MEM_TO_DEV)
+			| BIT(DMA_DEV_TO_MEM);
+	d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+
+	/* init virtual channel */
+	d->chans = devm_kcalloc(&op->dev,
+		d->dma_requests, sizeof(struct zx_dma_chan), GFP_KERNEL);
+	if (!d->chans)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_requests; i++) {
+		struct zx_dma_chan *c = &d->chans[i];
+
+		c->status = DMA_IN_PROGRESS;
+		INIT_LIST_HEAD(&c->node);
+		c->vc.desc_free = zx_dma_free_desc;
+		vchan_init(&c->vc, &d->slave);
+	}
+
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+		goto zx_dma_out;
+	}
+
+	zx_dma_init_state(d);
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	platform_set_drvdata(op, d);
+
+	ret = dma_async_device_register(&d->slave);
+	if (ret)
+		goto clk_dis;
+
+	ret = of_dma_controller_register((&op->dev)->of_node,
+					 zx_of_dma_simple_xlate, d);
+	if (ret)
+		goto of_dma_register_fail;
+
+	dev_info(&op->dev, "initialized\n");
+	return 0;
+
+of_dma_register_fail:
+	dma_async_device_unregister(&d->slave);
+clk_dis:
+	clk_disable_unprepare(d->clk);
+zx_dma_out:
+	return ret;
+}
+
+static int zx_dma_remove(struct platform_device *op)
+{
+	struct zx_dma_chan *c, *cn;
+	struct zx_dma_dev *d = platform_get_drvdata(op);
+
+	/* explictly free the irq */
+	devm_free_irq(&op->dev, d->irq, d);
+
+	dma_async_device_unregister(&d->slave);
+	of_dma_controller_free((&op->dev)->of_node);
+
+	list_for_each_entry_safe(c, cn, &d->slave.channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+	}
+	clk_disable_unprepare(d->clk);
+	dmam_pool_destroy(d->pool);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int zx_dma_suspend_dev(struct device *dev)
+{
+	struct zx_dma_dev *d = dev_get_drvdata(dev);
+	u32 stat = 0;
+
+	stat = zx_dma_get_chan_stat(d);
+	if (stat) {
+		dev_warn(d->slave.dev,
+			 "chan %d is running fail to suspend\n", stat);
+		return -1;
+	}
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int zx_dma_resume_dev(struct device *dev)
+{
+	struct zx_dma_dev *d = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+	zx_dma_init_state(d);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(zx_dma_pmops, zx_dma_suspend_dev, zx_dma_resume_dev);
+
+static struct platform_driver zx_pdma_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.pm	= &zx_dma_pmops,
+		.of_match_table = zx6702_dma_dt_ids,
+	},
+	.probe		= zx_dma_probe,
+	.remove		= zx_dma_remove,
+};
+
+module_platform_driver(zx_pdma_driver);
+
+MODULE_DESCRIPTION("ZTE ZX296702 DMA Driver");
+MODULE_AUTHOR("Jun Nie jun.nie@linaro.org");
+MODULE_LICENSE("GPL v2");