Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
index e921de5..767bb45 100644
--- a/drivers/dma/xilinx/Makefile
+++ b/drivers/dma/xilinx/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_XILINX_DMA) += xilinx_dma.o
 obj-$(CONFIG_XILINX_ZYNQMP_DMA) += zynqmp_dma.o
+obj-$(CONFIG_XILINX_ZYNQMP_DPDMA) += xilinx_dpdma.o
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index ce18bca..cab4719 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -25,6 +25,12 @@
  * The AXI CDMA, is a soft IP, which provides high-bandwidth Direct Memory
  * Access (DMA) between a memory-mapped source address and a memory-mapped
  * destination address.
+ *
+ * The AXI Multichannel Direct Memory Access (AXI MCDMA) core is a soft
+ * Xilinx IP that provides high-bandwidth direct memory access between
+ * memory and AXI4-Stream target peripherals. It provides scatter gather
+ * (SG) interface with multiple channels independent configuration support.
+ *
  */
 
 #include <linux/bitops.h>
@@ -119,7 +125,9 @@
 #define XILINX_VDMA_ENABLE_VERTICAL_FLIP	BIT(0)
 
 /* HW specific definitions */
-#define XILINX_DMA_MAX_CHANS_PER_DEVICE	0x20
+#define XILINX_MCDMA_MAX_CHANS_PER_DEVICE	0x20
+#define XILINX_DMA_MAX_CHANS_PER_DEVICE		0x2
+#define XILINX_CDMA_MAX_CHANS_PER_DEVICE	0x1
 
 #define XILINX_DMA_DMAXR_ALL_IRQ_MASK	\
 		(XILINX_DMA_DMASR_FRM_CNT_IRQ | \
@@ -173,18 +181,6 @@
 #define XILINX_DMA_NUM_DESCS		255
 #define XILINX_DMA_NUM_APP_WORDS	5
 
-/* Multi-Channel DMA Descriptor offsets*/
-#define XILINX_DMA_MCRX_CDESC(x)	(0x40 + (x-1) * 0x20)
-#define XILINX_DMA_MCRX_TDESC(x)	(0x48 + (x-1) * 0x20)
-
-/* Multi-Channel DMA Masks/Shifts */
-#define XILINX_DMA_BD_HSIZE_MASK	GENMASK(15, 0)
-#define XILINX_DMA_BD_STRIDE_MASK	GENMASK(15, 0)
-#define XILINX_DMA_BD_VSIZE_MASK	GENMASK(31, 19)
-#define XILINX_DMA_BD_TDEST_MASK	GENMASK(4, 0)
-#define XILINX_DMA_BD_STRIDE_SHIFT	0
-#define XILINX_DMA_BD_VSIZE_SHIFT	19
-
 /* AXI CDMA Specific Registers/Offsets */
 #define XILINX_CDMA_REG_SRCADDR		0x18
 #define XILINX_CDMA_REG_DSTADDR		0x20
@@ -194,6 +190,31 @@
 
 #define xilinx_prep_dma_addr_t(addr)	\
 	((dma_addr_t)((u64)addr##_##msb << 32 | (addr)))
+
+/* AXI MCDMA Specific Registers/Offsets */
+#define XILINX_MCDMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_MCDMA_S2MM_CTRL_OFFSET		0x0500
+#define XILINX_MCDMA_CHEN_OFFSET		0x0008
+#define XILINX_MCDMA_CH_ERR_OFFSET		0x0010
+#define XILINX_MCDMA_RXINT_SER_OFFSET		0x0020
+#define XILINX_MCDMA_TXINT_SER_OFFSET		0x0028
+#define XILINX_MCDMA_CHAN_CR_OFFSET(x)		(0x40 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_SR_OFFSET(x)		(0x44 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_CDESC_OFFSET(x)	(0x48 + (x) * 0x40)
+#define XILINX_MCDMA_CHAN_TDESC_OFFSET(x)	(0x50 + (x) * 0x40)
+
+/* AXI MCDMA Specific Masks/Shifts */
+#define XILINX_MCDMA_COALESCE_SHIFT		16
+#define XILINX_MCDMA_COALESCE_MAX		24
+#define XILINX_MCDMA_IRQ_ALL_MASK		GENMASK(7, 5)
+#define XILINX_MCDMA_COALESCE_MASK		GENMASK(23, 16)
+#define XILINX_MCDMA_CR_RUNSTOP_MASK		BIT(0)
+#define XILINX_MCDMA_IRQ_IOC_MASK		BIT(5)
+#define XILINX_MCDMA_IRQ_DELAY_MASK		BIT(6)
+#define XILINX_MCDMA_IRQ_ERR_MASK		BIT(7)
+#define XILINX_MCDMA_BD_EOP			BIT(30)
+#define XILINX_MCDMA_BD_SOP			BIT(31)
+
 /**
  * struct xilinx_vdma_desc_hw - Hardware Descriptor
  * @next_desc: Next Descriptor Pointer @0x00
@@ -221,8 +242,8 @@
  * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
  * @buf_addr: Buffer address @0x08
  * @buf_addr_msb: MSB of Buffer address @0x0C
- * @mcdma_control: Control field for mcdma @0x10
- * @vsize_stride: Vsize and Stride field for mcdma @0x14
+ * @reserved1: Reserved @0x10
+ * @reserved2: Reserved @0x14
  * @control: Control field @0x18
  * @status: Status field @0x1C
  * @app: APP Fields @0x20 - 0x30
@@ -232,14 +253,38 @@
 	u32 next_desc_msb;
 	u32 buf_addr;
 	u32 buf_addr_msb;
-	u32 mcdma_control;
-	u32 vsize_stride;
+	u32 reserved1;
+	u32 reserved2;
 	u32 control;
 	u32 status;
 	u32 app[XILINX_DMA_NUM_APP_WORDS];
 } __aligned(64);
 
 /**
+ * struct xilinx_aximcdma_desc_hw - Hardware Descriptor for AXI MCDMA
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @next_desc_msb: MSB of Next Descriptor Pointer @0x04
+ * @buf_addr: Buffer address @0x08
+ * @buf_addr_msb: MSB of Buffer address @0x0C
+ * @rsvd: Reserved field @0x10
+ * @control: Control Information field @0x14
+ * @status: Status field @0x18
+ * @sideband_status: Status of sideband signals @0x1C
+ * @app: APP Fields @0x20 - 0x30
+ */
+struct xilinx_aximcdma_desc_hw {
+	u32 next_desc;
+	u32 next_desc_msb;
+	u32 buf_addr;
+	u32 buf_addr_msb;
+	u32 rsvd;
+	u32 control;
+	u32 status;
+	u32 sideband_status;
+	u32 app[XILINX_DMA_NUM_APP_WORDS];
+} __aligned(64);
+
+/**
  * struct xilinx_cdma_desc_hw - Hardware Descriptor
  * @next_desc: Next Descriptor Pointer @0x00
  * @next_desc_msb: Next Descriptor Pointer MSB @0x04
@@ -286,6 +331,18 @@
 } __aligned(64);
 
 /**
+ * struct xilinx_aximcdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_aximcdma_tx_segment {
+	struct xilinx_aximcdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
  * struct xilinx_cdma_tx_segment - Descriptor segment
  * @hw: Hardware descriptor
  * @node: Node in the descriptor segments list
@@ -303,12 +360,16 @@
  * @segments: TX segments list
  * @node: Node in the channel descriptors list
  * @cyclic: Check for cyclic transfers.
+ * @err: Whether the descriptor has an error.
+ * @residue: Residue of the completed descriptor
  */
 struct xilinx_dma_tx_descriptor {
 	struct dma_async_tx_descriptor async_tx;
 	struct list_head segments;
 	struct list_head node;
 	bool cyclic;
+	bool err;
+	u32 residue;
 };
 
 /**
@@ -340,8 +401,8 @@
  * @desc_pendingcount: Descriptor pending count
  * @ext_addr: Indicates 64 bit addressing is supported by dma channel
  * @desc_submitcount: Descriptor h/w submitted count
- * @residue: Residue for AXI DMA
  * @seg_v: Statically allocated segments base
+ * @seg_mv: Statically allocated segments base for MCDMA
  * @seg_p: Physical allocated segments base
  * @cyclic_seg_v: Statically allocated segment base for cyclic transfers
  * @cyclic_seg_p: Physical allocated segments base for cyclic dma
@@ -378,8 +439,8 @@
 	u32 desc_pendingcount;
 	bool ext_addr;
 	u32 desc_submitcount;
-	u32 residue;
 	struct xilinx_axidma_tx_segment *seg_v;
+	struct xilinx_aximcdma_tx_segment *seg_mv;
 	dma_addr_t seg_p;
 	struct xilinx_axidma_tx_segment *cyclic_seg_v;
 	dma_addr_t cyclic_seg_p;
@@ -395,12 +456,14 @@
  * @XDMA_TYPE_AXIDMA: Axi dma ip.
  * @XDMA_TYPE_CDMA: Axi cdma ip.
  * @XDMA_TYPE_VDMA: Axi vdma ip.
+ * @XDMA_TYPE_AXIMCDMA: Axi MCDMA ip.
  *
  */
 enum xdma_ip_type {
 	XDMA_TYPE_AXIDMA = 0,
 	XDMA_TYPE_CDMA,
 	XDMA_TYPE_VDMA,
+	XDMA_TYPE_AXIMCDMA
 };
 
 struct xilinx_dma_config {
@@ -408,6 +471,8 @@
 	int (*clk_init)(struct platform_device *pdev, struct clk **axi_clk,
 			struct clk **tx_clk, struct clk **txs_clk,
 			struct clk **rx_clk, struct clk **rxs_clk);
+	irqreturn_t (*irq_handler)(int irq, void *data);
+	const int max_channels;
 };
 
 /**
@@ -416,7 +481,6 @@
  * @dev: Device Structure
  * @common: DMA device structure
  * @chan: Driver specific DMA channel
- * @mcdma: Specifies whether Multi-Channel is present or not
  * @flush_on_fsync: Flush on frame sync
  * @ext_addr: Indicates 64 bit addressing is supported by dma device
  * @pdev: Platform device structure pointer
@@ -426,16 +490,15 @@
  * @txs_clk: DMA mm2s stream clock
  * @rx_clk: DMA s2mm clock
  * @rxs_clk: DMA s2mm stream clock
- * @nr_channels: Number of channels DMA device supports
- * @chan_id: DMA channel identifier
+ * @s2mm_chan_id: DMA s2mm channel identifier
+ * @mm2s_chan_id: DMA mm2s channel identifier
  * @max_buffer_len: Max buffer length
  */
 struct xilinx_dma_device {
 	void __iomem *regs;
 	struct device *dev;
 	struct dma_device common;
-	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
-	bool mcdma;
+	struct xilinx_dma_chan *chan[XILINX_MCDMA_MAX_CHANS_PER_DEVICE];
 	u32 flush_on_fsync;
 	bool ext_addr;
 	struct platform_device  *pdev;
@@ -445,8 +508,8 @@
 	struct clk *txs_clk;
 	struct clk *rx_clk;
 	struct clk *rxs_clk;
-	u32 nr_channels;
-	u32 chan_id;
+	u32 s2mm_chan_id;
+	u32 mm2s_chan_id;
 	u32 max_buffer_len;
 };
 
@@ -548,6 +611,18 @@
 	}
 }
 
+static inline void xilinx_aximcdma_buf(struct xilinx_dma_chan *chan,
+				       struct xilinx_aximcdma_desc_hw *hw,
+				       dma_addr_t buf_addr, size_t sg_used)
+{
+	if (chan->ext_addr) {
+		hw->buf_addr = lower_32_bits(buf_addr + sg_used);
+		hw->buf_addr_msb = upper_32_bits(buf_addr + sg_used);
+	} else {
+		hw->buf_addr = buf_addr + sg_used;
+	}
+}
+
 /* -----------------------------------------------------------------------------
  * Descriptors and segments alloc and free
  */
@@ -615,6 +690,33 @@
 	}
 	spin_unlock_irqrestore(&chan->lock, flags);
 
+	if (!segment)
+		dev_dbg(chan->dev, "Could not find free tx segment\n");
+
+	return segment;
+}
+
+/**
+ * xilinx_aximcdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific DMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_aximcdma_tx_segment *
+xilinx_aximcdma_alloc_tx_segment(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_aximcdma_tx_segment *segment = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (!list_empty(&chan->free_seg_list)) {
+		segment = list_first_entry(&chan->free_seg_list,
+					   struct xilinx_aximcdma_tx_segment,
+					   node);
+		list_del(&segment->node);
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
 	return segment;
 }
 
@@ -629,6 +731,17 @@
 	hw->next_desc_msb = next_desc_msb;
 }
 
+static void xilinx_mcdma_clean_hw_desc(struct xilinx_aximcdma_desc_hw *hw)
+{
+	u32 next_desc = hw->next_desc;
+	u32 next_desc_msb = hw->next_desc_msb;
+
+	memset(hw, 0, sizeof(struct xilinx_aximcdma_desc_hw));
+
+	hw->next_desc = next_desc;
+	hw->next_desc_msb = next_desc_msb;
+}
+
 /**
  * xilinx_dma_free_tx_segment - Free transaction segment
  * @chan: Driver specific DMA channel
@@ -643,6 +756,20 @@
 }
 
 /**
+ * xilinx_mcdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific DMA channel
+ * @segment: DMA transaction segment
+ */
+static void xilinx_mcdma_free_tx_segment(struct xilinx_dma_chan *chan,
+					 struct xilinx_aximcdma_tx_segment *
+					 segment)
+{
+	xilinx_mcdma_clean_hw_desc(&segment->hw);
+
+	list_add_tail(&segment->node, &chan->free_seg_list);
+}
+
+/**
  * xilinx_cdma_free_tx_segment - Free transaction segment
  * @chan: Driver specific DMA channel
  * @segment: DMA transaction segment
@@ -696,6 +823,7 @@
 	struct xilinx_vdma_tx_segment *segment, *next;
 	struct xilinx_cdma_tx_segment *cdma_segment, *cdma_next;
 	struct xilinx_axidma_tx_segment *axidma_segment, *axidma_next;
+	struct xilinx_aximcdma_tx_segment *aximcdma_segment, *aximcdma_next;
 
 	if (!desc)
 		return;
@@ -711,12 +839,18 @@
 			list_del(&cdma_segment->node);
 			xilinx_cdma_free_tx_segment(chan, cdma_segment);
 		}
-	} else {
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		list_for_each_entry_safe(axidma_segment, axidma_next,
 					 &desc->segments, node) {
 			list_del(&axidma_segment->node);
 			xilinx_dma_free_tx_segment(chan, axidma_segment);
 		}
+	} else {
+		list_for_each_entry_safe(aximcdma_segment, aximcdma_next,
+					 &desc->segments, node) {
+			list_del(&aximcdma_segment->node);
+			xilinx_mcdma_free_tx_segment(chan, aximcdma_segment);
+		}
 	}
 
 	kfree(desc);
@@ -785,10 +919,73 @@
 				  chan->cyclic_seg_v, chan->cyclic_seg_p);
 	}
 
-	if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) {
+	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		spin_lock_irqsave(&chan->lock, flags);
+		INIT_LIST_HEAD(&chan->free_seg_list);
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		/* Free memory that is allocated for BD */
+		dma_free_coherent(chan->dev, sizeof(*chan->seg_mv) *
+				  XILINX_DMA_NUM_DESCS, chan->seg_mv,
+				  chan->seg_p);
+	}
+
+	if (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA &&
+	    chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA) {
 		dma_pool_destroy(chan->desc_pool);
 		chan->desc_pool = NULL;
 	}
+
+}
+
+/**
+ * xilinx_dma_get_residue - Compute residue for a given descriptor
+ * @chan: Driver specific dma channel
+ * @desc: dma transaction descriptor
+ *
+ * Return: The number of residue bytes for the descriptor.
+ */
+static u32 xilinx_dma_get_residue(struct xilinx_dma_chan *chan,
+				  struct xilinx_dma_tx_descriptor *desc)
+{
+	struct xilinx_cdma_tx_segment *cdma_seg;
+	struct xilinx_axidma_tx_segment *axidma_seg;
+	struct xilinx_aximcdma_tx_segment *aximcdma_seg;
+	struct xilinx_cdma_desc_hw *cdma_hw;
+	struct xilinx_axidma_desc_hw *axidma_hw;
+	struct xilinx_aximcdma_desc_hw *aximcdma_hw;
+	struct list_head *entry;
+	u32 residue = 0;
+
+	list_for_each(entry, &desc->segments) {
+		if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
+			cdma_seg = list_entry(entry,
+					      struct xilinx_cdma_tx_segment,
+					      node);
+			cdma_hw = &cdma_seg->hw;
+			residue += (cdma_hw->control - cdma_hw->status) &
+				   chan->xdev->max_buffer_len;
+		} else if (chan->xdev->dma_config->dmatype ==
+			   XDMA_TYPE_AXIDMA) {
+			axidma_seg = list_entry(entry,
+						struct xilinx_axidma_tx_segment,
+						node);
+			axidma_hw = &axidma_seg->hw;
+			residue += (axidma_hw->control - axidma_hw->status) &
+				   chan->xdev->max_buffer_len;
+		} else {
+			aximcdma_seg =
+				list_entry(entry,
+					   struct xilinx_aximcdma_tx_segment,
+					   node);
+			aximcdma_hw = &aximcdma_seg->hw;
+			residue +=
+				(aximcdma_hw->control - aximcdma_hw->status) &
+				chan->xdev->max_buffer_len;
+		}
+	}
+
+	return residue;
 }
 
 /**
@@ -825,7 +1022,7 @@
 	spin_lock_irqsave(&chan->lock, flags);
 
 	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
-		struct dmaengine_desc_callback cb;
+		struct dmaengine_result result;
 
 		if (desc->cyclic) {
 			xilinx_dma_chan_handle_cyclic(chan, desc, &flags);
@@ -835,14 +1032,22 @@
 		/* Remove from the list of running transactions */
 		list_del(&desc->node);
 
-		/* Run the link descriptor callback function */
-		dmaengine_desc_get_callback(&desc->async_tx, &cb);
-		if (dmaengine_desc_callback_valid(&cb)) {
-			spin_unlock_irqrestore(&chan->lock, flags);
-			dmaengine_desc_callback_invoke(&cb, NULL);
-			spin_lock_irqsave(&chan->lock, flags);
+		if (unlikely(desc->err)) {
+			if (chan->direction == DMA_DEV_TO_MEM)
+				result.result = DMA_TRANS_READ_FAILED;
+			else
+				result.result = DMA_TRANS_WRITE_FAILED;
+		} else {
+			result.result = DMA_TRANS_NOERROR;
 		}
 
+		result.residue = desc->residue;
+
+		/* Run the link descriptor callback function */
+		spin_unlock_irqrestore(&chan->lock, flags);
+		dmaengine_desc_get_callback_invoke(&desc->async_tx, &result);
+		spin_lock_irqsave(&chan->lock, flags);
+
 		/* Run any dependencies, then free the descriptor */
 		dma_run_dependencies(&desc->async_tx);
 		xilinx_dma_free_tx_descriptor(chan, desc);
@@ -860,11 +1065,11 @@
 
 /**
  * xilinx_dma_do_tasklet - Schedule completion tasklet
- * @data: Pointer to the Xilinx DMA channel structure
+ * @t: Pointer to the Xilinx DMA channel structure
  */
-static void xilinx_dma_do_tasklet(unsigned long data)
+static void xilinx_dma_do_tasklet(struct tasklet_struct *t)
 {
-	struct xilinx_dma_chan *chan = (struct xilinx_dma_chan *)data;
+	struct xilinx_dma_chan *chan = from_tasklet(chan, t, tasklet);
 
 	xilinx_dma_chan_desc_cleanup(chan);
 }
@@ -931,6 +1136,30 @@
 			list_add_tail(&chan->seg_v[i].node,
 				      &chan->free_seg_list);
 		}
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		/* Allocate the buffer descriptors. */
+		chan->seg_mv = dma_alloc_coherent(chan->dev,
+						  sizeof(*chan->seg_mv) *
+						  XILINX_DMA_NUM_DESCS,
+						  &chan->seg_p, GFP_KERNEL);
+		if (!chan->seg_mv) {
+			dev_err(chan->dev,
+				"unable to allocate channel %d descriptors\n",
+				chan->id);
+			return -ENOMEM;
+		}
+		for (i = 0; i < XILINX_DMA_NUM_DESCS; i++) {
+			chan->seg_mv[i].hw.next_desc =
+			lower_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_mv[i].hw.next_desc_msb =
+			upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
+				((i + 1) % XILINX_DMA_NUM_DESCS));
+			chan->seg_mv[i].phys = chan->seg_p +
+				sizeof(*chan->seg_mv) * i;
+			list_add_tail(&chan->seg_mv[i].node,
+				      &chan->free_seg_list);
+		}
 	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->desc_pool = dma_pool_create("xilinx_cdma_desc_pool",
 				   chan->dev,
@@ -946,7 +1175,8 @@
 	}
 
 	if (!chan->desc_pool &&
-	    (chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA)) {
+	    ((chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIDMA) &&
+		chan->xdev->dma_config->dmatype != XDMA_TYPE_AXIMCDMA)) {
 		dev_err(chan->dev,
 			"unable to allocate channel %d descriptor pool\n",
 			chan->id);
@@ -1012,8 +1242,6 @@
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_axidma_tx_segment *segment;
-	struct xilinx_axidma_desc_hw *hw;
 	enum dma_status ret;
 	unsigned long flags;
 	u32 residue = 0;
@@ -1022,23 +1250,20 @@
 	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
 
-	if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
-		spin_lock_irqsave(&chan->lock, flags);
-
+	spin_lock_irqsave(&chan->lock, flags);
+	if (!list_empty(&chan->active_list)) {
 		desc = list_last_entry(&chan->active_list,
 				       struct xilinx_dma_tx_descriptor, node);
-		if (chan->has_sg) {
-			list_for_each_entry(segment, &desc->segments, node) {
-				hw = &segment->hw;
-				residue += (hw->control - hw->status) &
-					   chan->xdev->max_buffer_len;
-			}
-		}
-		spin_unlock_irqrestore(&chan->lock, flags);
-
-		chan->residue = residue;
-		dma_set_residue(txstate, chan->residue);
+		/*
+		 * VDMA and simple mode do not support residue reporting, so the
+		 * residue field will always be 0.
+		 */
+		if (chan->has_sg && chan->xdev->dma_config->dmatype != XDMA_TYPE_VDMA)
+			residue = xilinx_dma_get_residue(chan, desc);
 	}
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dma_set_residue(txstate, residue);
 
 	return ret;
 }
@@ -1310,53 +1535,23 @@
 		dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
 	}
 
-	if (chan->has_sg && !chan->xdev->mcdma)
+	if (chan->has_sg)
 		xilinx_write(chan, XILINX_DMA_REG_CURDESC,
 			     head_desc->async_tx.phys);
 
-	if (chan->has_sg && chan->xdev->mcdma) {
-		if (chan->direction == DMA_MEM_TO_DEV) {
-			dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
-				       head_desc->async_tx.phys);
-		} else {
-			if (!chan->tdest) {
-				dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
-				       head_desc->async_tx.phys);
-			} else {
-				dma_ctrl_write(chan,
-					XILINX_DMA_MCRX_CDESC(chan->tdest),
-				       head_desc->async_tx.phys);
-			}
-		}
-	}
-
 	xilinx_dma_start(chan);
 
 	if (chan->err)
 		return;
 
 	/* Start the transfer */
-	if (chan->has_sg && !chan->xdev->mcdma) {
+	if (chan->has_sg) {
 		if (chan->cyclic)
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     chan->cyclic_seg_v->phys);
 		else
 			xilinx_write(chan, XILINX_DMA_REG_TAILDESC,
 				     tail_segment->phys);
-	} else if (chan->has_sg && chan->xdev->mcdma) {
-		if (chan->direction == DMA_MEM_TO_DEV) {
-			dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
-			       tail_segment->phys);
-		} else {
-			if (!chan->tdest) {
-				dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
-					       tail_segment->phys);
-			} else {
-				dma_ctrl_write(chan,
-					XILINX_DMA_MCRX_TDESC(chan->tdest),
-					tail_segment->phys);
-			}
-		}
 	} else {
 		struct xilinx_axidma_tx_segment *segment;
 		struct xilinx_axidma_desc_hw *hw;
@@ -1380,6 +1575,76 @@
 }
 
 /**
+ * xilinx_mcdma_start_transfer - Starts MCDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
+{
+	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
+	struct xilinx_aximcdma_tx_segment *tail_segment;
+	u32 reg;
+
+	/*
+	 * lock has been held by calling functions, so we don't need it
+	 * to take it here again.
+	 */
+
+	if (chan->err)
+		return;
+
+	if (!chan->idle)
+		return;
+
+	if (list_empty(&chan->pending_list))
+		return;
+
+	head_desc = list_first_entry(&chan->pending_list,
+				     struct xilinx_dma_tx_descriptor, node);
+	tail_desc = list_last_entry(&chan->pending_list,
+				    struct xilinx_dma_tx_descriptor, node);
+	tail_segment = list_last_entry(&tail_desc->segments,
+				       struct xilinx_aximcdma_tx_segment, node);
+
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+
+	if (chan->desc_pendingcount <= XILINX_MCDMA_COALESCE_MAX) {
+		reg &= ~XILINX_MCDMA_COALESCE_MASK;
+		reg |= chan->desc_pendingcount <<
+			XILINX_MCDMA_COALESCE_SHIFT;
+	}
+
+	reg |= XILINX_MCDMA_IRQ_ALL_MASK;
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+	/* Program current descriptor */
+	xilinx_write(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET(chan->tdest),
+		     head_desc->async_tx.phys);
+
+	/* Program channel enable register */
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHEN_OFFSET);
+	reg |= BIT(chan->tdest);
+	dma_ctrl_write(chan, XILINX_MCDMA_CHEN_OFFSET, reg);
+
+	/* Start the fetch of BDs for the channel */
+	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
+	reg |= XILINX_MCDMA_CR_RUNSTOP_MASK;
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest), reg);
+
+	xilinx_dma_start(chan);
+
+	if (chan->err)
+		return;
+
+	/* Start the transfer */
+	xilinx_write(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET(chan->tdest),
+		     tail_segment->phys);
+
+	list_splice_tail_init(&chan->pending_list, &chan->active_list);
+	chan->desc_pendingcount = 0;
+	chan->idle = false;
+}
+
+/**
  * xilinx_dma_issue_pending - Issue pending transactions
  * @dchan: DMA channel
  */
@@ -1408,6 +1673,13 @@
 		return;
 
 	list_for_each_entry_safe(desc, next, &chan->active_list, node) {
+		if (chan->has_sg && chan->xdev->dma_config->dmatype !=
+		    XDMA_TYPE_VDMA)
+			desc->residue = xilinx_dma_get_residue(chan, desc);
+		else
+			desc->residue = 0;
+		desc->err = chan->err;
+
 		list_del(&desc->node);
 		if (!desc->cyclic)
 			dma_cookie_complete(&desc->async_tx);
@@ -1471,6 +1743,74 @@
 }
 
 /**
+ * xilinx_mcdma_irq_handler - MCDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx MCDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_mcdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_dma_chan *chan = data;
+	u32 status, ser_offset, chan_sermask, chan_offset = 0, chan_id;
+
+	if (chan->direction == DMA_DEV_TO_MEM)
+		ser_offset = XILINX_MCDMA_RXINT_SER_OFFSET;
+	else
+		ser_offset = XILINX_MCDMA_TXINT_SER_OFFSET;
+
+	/* Read the channel id raising the interrupt*/
+	chan_sermask = dma_ctrl_read(chan, ser_offset);
+	chan_id = ffs(chan_sermask);
+
+	if (!chan_id)
+		return IRQ_NONE;
+
+	if (chan->direction == DMA_DEV_TO_MEM)
+		chan_offset = chan->xdev->dma_config->max_channels / 2;
+
+	chan_offset = chan_offset + (chan_id - 1);
+	chan = chan->xdev->chan[chan_offset];
+	/* Read the status and ack the interrupts. */
+	status = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest));
+	if (!(status & XILINX_MCDMA_IRQ_ALL_MASK))
+		return IRQ_NONE;
+
+	dma_ctrl_write(chan, XILINX_MCDMA_CHAN_SR_OFFSET(chan->tdest),
+		       status & XILINX_MCDMA_IRQ_ALL_MASK);
+
+	if (status & XILINX_MCDMA_IRQ_ERR_MASK) {
+		dev_err(chan->dev, "Channel %p has errors %x cdr %x tdr %x\n",
+			chan,
+			dma_ctrl_read(chan, XILINX_MCDMA_CH_ERR_OFFSET),
+			dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CDESC_OFFSET
+				      (chan->tdest)),
+			dma_ctrl_read(chan, XILINX_MCDMA_CHAN_TDESC_OFFSET
+				      (chan->tdest)));
+		chan->err = true;
+	}
+
+	if (status & XILINX_MCDMA_IRQ_DELAY_MASK) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_MCDMA_IRQ_IOC_MASK) {
+		spin_lock(&chan->lock);
+		xilinx_dma_complete_descriptor(chan);
+		chan->idle = true;
+		chan->start_transfer(chan);
+		spin_unlock(&chan->lock);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
  * xilinx_dma_irq_handler - DMA Interrupt handler
  * @irq: IRQ number
  * @data: Pointer to the Xilinx DMA channel structure
@@ -1545,6 +1885,7 @@
 	struct xilinx_vdma_tx_segment *tail_segment;
 	struct xilinx_dma_tx_descriptor *tail_desc;
 	struct xilinx_axidma_tx_segment *axidma_tail_segment;
+	struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment;
 	struct xilinx_cdma_tx_segment *cdma_tail_segment;
 
 	if (list_empty(&chan->pending_list))
@@ -1566,11 +1907,17 @@
 						struct xilinx_cdma_tx_segment,
 						node);
 		cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
-	} else {
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		axidma_tail_segment = list_last_entry(&tail_desc->segments,
 					       struct xilinx_axidma_tx_segment,
 					       node);
 		axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else {
+		aximcdma_tail_segment =
+			list_last_entry(&tail_desc->segments,
+					struct xilinx_aximcdma_tx_segment,
+					node);
+		aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
 	}
 
 	/*
@@ -1979,31 +2326,32 @@
 }
 
 /**
- * xilinx_dma_prep_interleaved - prepare a descriptor for a
- *	DMA_SLAVE transaction
+ * xilinx_mcdma_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
  * @dchan: DMA channel
- * @xt: Interleaved template pointer
+ * @sgl: scatterlist to transfer to/from
+ * @sg_len: number of entries in @scatterlist
+ * @direction: DMA direction
  * @flags: transfer ack flags
+ * @context: APP words of the descriptor
  *
  * Return: Async transaction descriptor on success and NULL on failure
  */
 static struct dma_async_tx_descriptor *
-xilinx_dma_prep_interleaved(struct dma_chan *dchan,
-				 struct dma_interleaved_template *xt,
-				 unsigned long flags)
+xilinx_mcdma_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+			   unsigned int sg_len,
+			   enum dma_transfer_direction direction,
+			   unsigned long flags, void *context)
 {
 	struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
 	struct xilinx_dma_tx_descriptor *desc;
-	struct xilinx_axidma_tx_segment *segment;
-	struct xilinx_axidma_desc_hw *hw;
+	struct xilinx_aximcdma_tx_segment *segment = NULL;
+	u32 *app_w = (u32 *)context;
+	struct scatterlist *sg;
+	size_t copy;
+	size_t sg_used;
+	unsigned int i;
 
-	if (!is_slave_direction(xt->dir))
-		return NULL;
-
-	if (!xt->numf || !xt->sgl[0].size)
-		return NULL;
-
-	if (xt->frame_size != 1)
+	if (!is_slave_direction(direction))
 		return NULL;
 
 	/* Allocate a transaction descriptor. */
@@ -2011,54 +2359,67 @@
 	if (!desc)
 		return NULL;
 
-	chan->direction = xt->dir;
 	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
 	desc->async_tx.tx_submit = xilinx_dma_tx_submit;
 
-	/* Get a free segment */
-	segment = xilinx_axidma_alloc_tx_segment(chan);
-	if (!segment)
-		goto error;
+	/* Build transactions using information in the scatter gather list */
+	for_each_sg(sgl, sg, sg_len, i) {
+		sg_used = 0;
 
-	hw = &segment->hw;
+		/* Loop until the entire scatterlist entry is used */
+		while (sg_used < sg_dma_len(sg)) {
+			struct xilinx_aximcdma_desc_hw *hw;
 
-	/* Fill in the descriptor */
-	if (xt->dir != DMA_MEM_TO_DEV)
-		hw->buf_addr = xt->dst_start;
-	else
-		hw->buf_addr = xt->src_start;
+			/* Get a free segment */
+			segment = xilinx_aximcdma_alloc_tx_segment(chan);
+			if (!segment)
+				goto error;
 
-	hw->mcdma_control = chan->tdest & XILINX_DMA_BD_TDEST_MASK;
-	hw->vsize_stride = (xt->numf << XILINX_DMA_BD_VSIZE_SHIFT) &
-			    XILINX_DMA_BD_VSIZE_MASK;
-	hw->vsize_stride |= (xt->sgl[0].icg + xt->sgl[0].size) &
-			    XILINX_DMA_BD_STRIDE_MASK;
-	hw->control = xt->sgl[0].size & XILINX_DMA_BD_HSIZE_MASK;
+			/*
+			 * Calculate the maximum number of bytes to transfer,
+			 * making sure it is less than the hw limit
+			 */
+			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
+				     chan->xdev->max_buffer_len);
+			hw = &segment->hw;
 
-	/*
-	 * Insert the segment into the descriptor segments
-	 * list.
-	 */
-	list_add_tail(&segment->node, &desc->segments);
+			/* Fill in the descriptor */
+			xilinx_aximcdma_buf(chan, hw, sg_dma_address(sg),
+					    sg_used);
+			hw->control = copy;
 
+			if (chan->direction == DMA_MEM_TO_DEV && app_w) {
+				memcpy(hw->app, app_w, sizeof(u32) *
+				       XILINX_DMA_NUM_APP_WORDS);
+			}
+
+			sg_used += copy;
+			/*
+			 * Insert the segment into the descriptor segments
+			 * list.
+			 */
+			list_add_tail(&segment->node, &desc->segments);
+		}
+	}
 
 	segment = list_first_entry(&desc->segments,
-				   struct xilinx_axidma_tx_segment, node);
+				   struct xilinx_aximcdma_tx_segment, node);
 	desc->async_tx.phys = segment->phys;
 
 	/* For the last DMA_MEM_TO_DEV transfer, set EOP */
-	if (xt->dir == DMA_MEM_TO_DEV) {
-		segment->hw.control |= XILINX_DMA_BD_SOP;
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		segment->hw.control |= XILINX_MCDMA_BD_SOP;
 		segment = list_last_entry(&desc->segments,
-					  struct xilinx_axidma_tx_segment,
+					  struct xilinx_aximcdma_tx_segment,
 					  node);
-		segment->hw.control |= XILINX_DMA_BD_EOP;
+		segment->hw.control |= XILINX_MCDMA_BD_EOP;
 	}
 
 	return &desc->async_tx;
 
 error:
 	xilinx_dma_free_tx_descriptor(chan, desc);
+
 	return NULL;
 }
 
@@ -2074,16 +2435,17 @@
 	u32 reg;
 	int err;
 
-	if (chan->cyclic)
-		xilinx_dma_chan_reset(chan);
-
-	err = chan->stop_transfer(chan);
-	if (err) {
-		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
-			chan, dma_ctrl_read(chan, XILINX_DMA_REG_DMASR));
-		chan->err = true;
+	if (!chan->cyclic) {
+		err = chan->stop_transfer(chan);
+		if (err) {
+			dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+				chan, dma_ctrl_read(chan,
+				XILINX_DMA_REG_DMASR));
+			chan->err = true;
+		}
 	}
 
+	xilinx_dma_chan_reset(chan);
 	/* Remove and free all of the descriptors in the lists */
 	chan->terminating = true;
 	xilinx_dma_free_descriptors(chan);
@@ -2205,11 +2567,8 @@
 	*tmp_clk = NULL;
 
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
-	if (IS_ERR(*axi_clk)) {
-		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR(*axi_clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
 
 	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
 	if (IS_ERR(*tx_clk))
@@ -2270,18 +2629,12 @@
 	*tmp2_clk = NULL;
 
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
-	if (IS_ERR(*axi_clk)) {
-		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_clk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR(*axi_clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
 
 	*dev_clk = devm_clk_get(&pdev->dev, "m_axi_aclk");
-	if (IS_ERR(*dev_clk)) {
-		err = PTR_ERR(*dev_clk);
-		dev_err(&pdev->dev, "failed to get dev_clk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR(*dev_clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(*dev_clk), "failed to get dev_clk\n");
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
@@ -2310,11 +2663,8 @@
 	int err;
 
 	*axi_clk = devm_clk_get(&pdev->dev, "s_axi_lite_aclk");
-	if (IS_ERR(*axi_clk)) {
-		err = PTR_ERR(*axi_clk);
-		dev_err(&pdev->dev, "failed to get axi_aclk (%d)\n", err);
-		return err;
-	}
+	if (IS_ERR(*axi_clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(*axi_clk), "failed to get axi_aclk\n");
 
 	*tx_clk = devm_clk_get(&pdev->dev, "m_axi_mm2s_aclk");
 	if (IS_ERR(*tx_clk))
@@ -2334,7 +2684,8 @@
 
 	err = clk_prepare_enable(*axi_clk);
 	if (err) {
-		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n", err);
+		dev_err(&pdev->dev, "failed to enable axi_clk (%d)\n",
+			err);
 		return err;
 	}
 
@@ -2392,12 +2743,11 @@
  *
  * @xdev: Driver specific device structure
  * @node: Device node
- * @chan_id: DMA Channel id
  *
  * Return: '0' on success and failure value on error
  */
 static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
-				  struct device_node *node, int chan_id)
+				  struct device_node *node)
 {
 	struct xilinx_dma_chan *chan;
 	bool has_dre = false;
@@ -2449,8 +2799,8 @@
 	    of_device_is_compatible(node, "xlnx,axi-dma-mm2s-channel") ||
 	    of_device_is_compatible(node, "xlnx,axi-cdma-channel")) {
 		chan->direction = DMA_MEM_TO_DEV;
-		chan->id = chan_id;
-		chan->tdest = chan_id;
+		chan->id = xdev->mm2s_chan_id++;
+		chan->tdest = chan->id;
 
 		chan->ctrl_offset = XILINX_DMA_MM2S_CTRL_OFFSET;
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
@@ -2466,8 +2816,8 @@
 		   of_device_is_compatible(node,
 					   "xlnx,axi-dma-s2mm-channel")) {
 		chan->direction = DMA_DEV_TO_MEM;
-		chan->id = chan_id;
-		chan->tdest = chan_id - xdev->nr_channels;
+		chan->id = xdev->s2mm_chan_id++;
+		chan->tdest = chan->id - xdev->dma_config->max_channels / 2;
 		chan->has_vflip = of_property_read_bool(node,
 					"xlnx,enable-vert-flip");
 		if (chan->has_vflip) {
@@ -2476,7 +2826,11 @@
 				XILINX_VDMA_ENABLE_VERTICAL_FLIP;
 		}
 
-		chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+		if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+			chan->ctrl_offset = XILINX_MCDMA_S2MM_CTRL_OFFSET;
+		else
+			chan->ctrl_offset = XILINX_DMA_S2MM_CTRL_OFFSET;
+
 		if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
 			chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
 			chan->config.park = 1;
@@ -2491,9 +2845,9 @@
 	}
 
 	/* Request the interrupt */
-	chan->irq = irq_of_parse_and_map(node, 0);
-	err = request_irq(chan->irq, xilinx_dma_irq_handler, IRQF_SHARED,
-			  "xilinx-dma-controller", chan);
+	chan->irq = irq_of_parse_and_map(node, chan->tdest);
+	err = request_irq(chan->irq, xdev->dma_config->irq_handler,
+			  IRQF_SHARED, "xilinx-dma-controller", chan);
 	if (err) {
 		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
 		return err;
@@ -2502,6 +2856,9 @@
 	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		chan->start_transfer = xilinx_dma_start_transfer;
 		chan->stop_transfer = xilinx_dma_stop_transfer;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		chan->start_transfer = xilinx_mcdma_start_transfer;
+		chan->stop_transfer = xilinx_dma_stop_transfer;
 	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		chan->start_transfer = xilinx_cdma_start_transfer;
 		chan->stop_transfer = xilinx_cdma_stop_transfer;
@@ -2510,18 +2867,18 @@
 		chan->stop_transfer = xilinx_dma_stop_transfer;
 	}
 
-	/* check if SG is enabled (only for AXIDMA and CDMA) */
+	/* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */
 	if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) {
-		if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
-		    XILINX_DMA_DMASR_SG_MASK)
+		if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA ||
+		    dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+			    XILINX_DMA_DMASR_SG_MASK)
 			chan->has_sg = true;
 		dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id,
 			chan->has_sg ? "enabled" : "disabled");
 	}
 
 	/* Initialize the tasklet */
-	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
-			(unsigned long)chan);
+	tasklet_setup(&chan->tasklet, xilinx_dma_do_tasklet);
 
 	/*
 	 * Initialize the DMA channel and add it to the DMA engine channels
@@ -2559,13 +2916,11 @@
 	u32 nr_channels = 1;
 
 	ret = of_property_read_u32(node, "dma-channels", &nr_channels);
-	if ((ret < 0) && xdev->mcdma)
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA && ret < 0)
 		dev_warn(xdev->dev, "missing dma-channels property\n");
 
 	for (i = 0; i < nr_channels; i++)
-		xilinx_dma_chan_probe(xdev, node, xdev->chan_id++);
-
-	xdev->nr_channels += nr_channels;
+		xilinx_dma_chan_probe(xdev, node);
 
 	return 0;
 }
@@ -2583,7 +2938,7 @@
 	struct xilinx_dma_device *xdev = ofdma->of_dma_data;
 	int chan_id = dma_spec->args[0];
 
-	if (chan_id >= xdev->nr_channels || !xdev->chan[chan_id])
+	if (chan_id >= xdev->dma_config->max_channels || !xdev->chan[chan_id])
 		return NULL;
 
 	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
@@ -2592,22 +2947,35 @@
 static const struct xilinx_dma_config axidma_config = {
 	.dmatype = XDMA_TYPE_AXIDMA,
 	.clk_init = axidma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
+	.max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE,
 };
 
+static const struct xilinx_dma_config aximcdma_config = {
+	.dmatype = XDMA_TYPE_AXIMCDMA,
+	.clk_init = axidma_clk_init,
+	.irq_handler = xilinx_mcdma_irq_handler,
+	.max_channels = XILINX_MCDMA_MAX_CHANS_PER_DEVICE,
+};
 static const struct xilinx_dma_config axicdma_config = {
 	.dmatype = XDMA_TYPE_CDMA,
 	.clk_init = axicdma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
+	.max_channels = XILINX_CDMA_MAX_CHANS_PER_DEVICE,
 };
 
 static const struct xilinx_dma_config axivdma_config = {
 	.dmatype = XDMA_TYPE_VDMA,
 	.clk_init = axivdma_clk_init,
+	.irq_handler = xilinx_dma_irq_handler,
+	.max_channels = XILINX_DMA_MAX_CHANS_PER_DEVICE,
 };
 
 static const struct of_device_id xilinx_dma_of_ids[] = {
 	{ .compatible = "xlnx,axi-dma-1.00.a", .data = &axidma_config },
 	{ .compatible = "xlnx,axi-cdma-1.00.a", .data = &axicdma_config },
 	{ .compatible = "xlnx,axi-vdma-1.00.a", .data = &axivdma_config },
+	{ .compatible = "xlnx,axi-mcdma-1.00.a", .data = &aximcdma_config },
 	{}
 };
 MODULE_DEVICE_TABLE(of, xilinx_dma_of_ids);
@@ -2626,7 +2994,6 @@
 	struct device_node *node = pdev->dev.of_node;
 	struct xilinx_dma_device *xdev;
 	struct device_node *child, *np = pdev->dev.of_node;
-	struct resource *io;
 	u32 num_frames, addr_width, len_width;
 	int i, err;
 
@@ -2652,16 +3019,16 @@
 		return err;
 
 	/* Request and map I/O memory */
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	xdev->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xdev->regs))
 		return PTR_ERR(xdev->regs);
 
 	/* Retrieve the DMA engine properties from the device tree */
 	xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0);
+	xdev->s2mm_chan_id = xdev->dma_config->max_channels / 2;
 
-	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
-		xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma");
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA ||
+	    xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
 		if (!of_property_read_u32(node, "xlnx,sg-length-width",
 					  &len_width)) {
 			if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN ||
@@ -2703,7 +3070,7 @@
 		xdev->ext_addr = false;
 
 	/* Set the dma mask bits */
-	dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width));
+	dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
 
 	/* Initialize the DMA engine */
 	xdev->common.dev = &pdev->dev;
@@ -2726,14 +3093,17 @@
 		xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
 		xdev->common.device_prep_dma_cyclic =
 					  xilinx_dma_prep_dma_cyclic;
-		xdev->common.device_prep_interleaved_dma =
-					xilinx_dma_prep_interleaved;
-		/* Residue calculation is supported by only AXI DMA */
+		/* Residue calculation is supported by only AXI DMA and CDMA */
 		xdev->common.residue_granularity =
 					  DMA_RESIDUE_GRANULARITY_SEGMENT;
 	} else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA) {
 		dma_cap_set(DMA_MEMCPY, xdev->common.cap_mask);
 		xdev->common.device_prep_dma_memcpy = xilinx_cdma_prep_memcpy;
+		/* Residue calculation is supported by only AXI DMA and CDMA */
+		xdev->common.residue_granularity =
+					  DMA_RESIDUE_GRANULARITY_SEGMENT;
+	} else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA) {
+		xdev->common.device_prep_slave_sg = xilinx_mcdma_prep_slave_sg;
 	} else {
 		xdev->common.device_prep_interleaved_dma =
 				xilinx_vdma_dma_prep_interleaved;
@@ -2749,7 +3119,7 @@
 	}
 
 	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
-		for (i = 0; i < xdev->nr_channels; i++)
+		for (i = 0; i < xdev->dma_config->max_channels; i++)
 			if (xdev->chan[i])
 				xdev->chan[i]->num_frms = num_frames;
 	}
@@ -2773,6 +3143,8 @@
 		dev_info(&pdev->dev, "Xilinx AXI DMA Engine Driver Probed!!\n");
 	else if (xdev->dma_config->dmatype == XDMA_TYPE_CDMA)
 		dev_info(&pdev->dev, "Xilinx AXI CDMA Engine Driver Probed!!\n");
+	else if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA)
+		dev_info(&pdev->dev, "Xilinx AXI MCDMA Engine Driver Probed!!\n");
 	else
 		dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
 
@@ -2781,7 +3153,7 @@
 disable_clks:
 	xdma_disable_allclks(xdev);
 error:
-	for (i = 0; i < xdev->nr_channels; i++)
+	for (i = 0; i < xdev->dma_config->max_channels; i++)
 		if (xdev->chan[i])
 			xilinx_dma_chan_remove(xdev->chan[i]);
 
@@ -2803,7 +3175,7 @@
 
 	dma_async_device_unregister(&xdev->common);
 
-	for (i = 0; i < xdev->nr_channels; i++)
+	for (i = 0; i < xdev->dma_config->max_channels; i++)
 		if (xdev->chan[i])
 			xilinx_dma_chan_remove(xdev->chan[i]);
 
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
new file mode 100644
index 0000000..6c70980
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -0,0 +1,1775 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx ZynqMP DPDMA Engine driver
+ *
+ * Copyright (C) 2015 - 2020 Xilinx, Inc.
+ *
+ * Author: Hyun Woo Kwon <hyun.kwon@xilinx.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+
+#include <dt-bindings/dma/xlnx-zynqmp-dpdma.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/* DPDMA registers */
+#define XILINX_DPDMA_ERR_CTRL				0x000
+#define XILINX_DPDMA_ISR				0x004
+#define XILINX_DPDMA_IMR				0x008
+#define XILINX_DPDMA_IEN				0x00c
+#define XILINX_DPDMA_IDS				0x010
+#define XILINX_DPDMA_INTR_DESC_DONE(n)			BIT((n) + 0)
+#define XILINX_DPDMA_INTR_DESC_DONE_MASK		GENMASK(5, 0)
+#define XILINX_DPDMA_INTR_NO_OSTAND(n)			BIT((n) + 6)
+#define XILINX_DPDMA_INTR_NO_OSTAND_MASK		GENMASK(11, 6)
+#define XILINX_DPDMA_INTR_AXI_ERR(n)			BIT((n) + 12)
+#define XILINX_DPDMA_INTR_AXI_ERR_MASK			GENMASK(17, 12)
+#define XILINX_DPDMA_INTR_DESC_ERR(n)			BIT((n) + 16)
+#define XILINX_DPDMA_INTR_DESC_ERR_MASK			GENMASK(23, 18)
+#define XILINX_DPDMA_INTR_WR_CMD_FIFO_FULL		BIT(24)
+#define XILINX_DPDMA_INTR_WR_DATA_FIFO_FULL		BIT(25)
+#define XILINX_DPDMA_INTR_AXI_4K_CROSS			BIT(26)
+#define XILINX_DPDMA_INTR_VSYNC				BIT(27)
+#define XILINX_DPDMA_INTR_CHAN_ERR_MASK			0x00041000
+#define XILINX_DPDMA_INTR_CHAN_ERR			0x00fff000
+#define XILINX_DPDMA_INTR_GLOBAL_ERR			0x07000000
+#define XILINX_DPDMA_INTR_ERR_ALL			0x07fff000
+#define XILINX_DPDMA_INTR_CHAN_MASK			0x00041041
+#define XILINX_DPDMA_INTR_GLOBAL_MASK			0x0f000000
+#define XILINX_DPDMA_INTR_ALL				0x0fffffff
+#define XILINX_DPDMA_EISR				0x014
+#define XILINX_DPDMA_EIMR				0x018
+#define XILINX_DPDMA_EIEN				0x01c
+#define XILINX_DPDMA_EIDS				0x020
+#define XILINX_DPDMA_EINTR_INV_APB			BIT(0)
+#define XILINX_DPDMA_EINTR_RD_AXI_ERR(n)		BIT((n) + 1)
+#define XILINX_DPDMA_EINTR_RD_AXI_ERR_MASK		GENMASK(6, 1)
+#define XILINX_DPDMA_EINTR_PRE_ERR(n)			BIT((n) + 7)
+#define XILINX_DPDMA_EINTR_PRE_ERR_MASK			GENMASK(12, 7)
+#define XILINX_DPDMA_EINTR_CRC_ERR(n)			BIT((n) + 13)
+#define XILINX_DPDMA_EINTR_CRC_ERR_MASK			GENMASK(18, 13)
+#define XILINX_DPDMA_EINTR_WR_AXI_ERR(n)		BIT((n) + 19)
+#define XILINX_DPDMA_EINTR_WR_AXI_ERR_MASK		GENMASK(24, 19)
+#define XILINX_DPDMA_EINTR_DESC_DONE_ERR(n)		BIT((n) + 25)
+#define XILINX_DPDMA_EINTR_DESC_DONE_ERR_MASK		GENMASK(30, 25)
+#define XILINX_DPDMA_EINTR_RD_CMD_FIFO_FULL		BIT(32)
+#define XILINX_DPDMA_EINTR_CHAN_ERR_MASK		0x02082082
+#define XILINX_DPDMA_EINTR_CHAN_ERR			0x7ffffffe
+#define XILINX_DPDMA_EINTR_GLOBAL_ERR			0x80000001
+#define XILINX_DPDMA_EINTR_ALL				0xffffffff
+#define XILINX_DPDMA_CNTL				0x100
+#define XILINX_DPDMA_GBL				0x104
+#define XILINX_DPDMA_GBL_TRIG_MASK(n)			((n) << 0)
+#define XILINX_DPDMA_GBL_RETRIG_MASK(n)			((n) << 6)
+#define XILINX_DPDMA_ALC0_CNTL				0x108
+#define XILINX_DPDMA_ALC0_STATUS			0x10c
+#define XILINX_DPDMA_ALC0_MAX				0x110
+#define XILINX_DPDMA_ALC0_MIN				0x114
+#define XILINX_DPDMA_ALC0_ACC				0x118
+#define XILINX_DPDMA_ALC0_ACC_TRAN			0x11c
+#define XILINX_DPDMA_ALC1_CNTL				0x120
+#define XILINX_DPDMA_ALC1_STATUS			0x124
+#define XILINX_DPDMA_ALC1_MAX				0x128
+#define XILINX_DPDMA_ALC1_MIN				0x12c
+#define XILINX_DPDMA_ALC1_ACC				0x130
+#define XILINX_DPDMA_ALC1_ACC_TRAN			0x134
+
+/* Channel register */
+#define XILINX_DPDMA_CH_BASE				0x200
+#define XILINX_DPDMA_CH_OFFSET				0x100
+#define XILINX_DPDMA_CH_DESC_START_ADDRE		0x000
+#define XILINX_DPDMA_CH_DESC_START_ADDRE_MASK		GENMASK(15, 0)
+#define XILINX_DPDMA_CH_DESC_START_ADDR			0x004
+#define XILINX_DPDMA_CH_DESC_NEXT_ADDRE			0x008
+#define XILINX_DPDMA_CH_DESC_NEXT_ADDR			0x00c
+#define XILINX_DPDMA_CH_PYLD_CUR_ADDRE			0x010
+#define XILINX_DPDMA_CH_PYLD_CUR_ADDR			0x014
+#define XILINX_DPDMA_CH_CNTL				0x018
+#define XILINX_DPDMA_CH_CNTL_ENABLE			BIT(0)
+#define XILINX_DPDMA_CH_CNTL_PAUSE			BIT(1)
+#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK		GENMASK(5, 2)
+#define XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK		GENMASK(9, 6)
+#define XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK		GENMASK(13, 10)
+#define XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS		11
+#define XILINX_DPDMA_CH_STATUS				0x01c
+#define XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK		GENMASK(24, 21)
+#define XILINX_DPDMA_CH_VDO				0x020
+#define XILINX_DPDMA_CH_PYLD_SZ				0x024
+#define XILINX_DPDMA_CH_DESC_ID				0x028
+#define XILINX_DPDMA_CH_DESC_ID_MASK			GENMASK(15, 0)
+
+/* DPDMA descriptor fields */
+#define XILINX_DPDMA_DESC_CONTROL_PREEMBLE		0xa5
+#define XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR		BIT(8)
+#define XILINX_DPDMA_DESC_CONTROL_DESC_UPDATE		BIT(9)
+#define XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE		BIT(10)
+#define XILINX_DPDMA_DESC_CONTROL_FRAG_MODE		BIT(18)
+#define XILINX_DPDMA_DESC_CONTROL_LAST			BIT(19)
+#define XILINX_DPDMA_DESC_CONTROL_ENABLE_CRC		BIT(20)
+#define XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME		BIT(21)
+#define XILINX_DPDMA_DESC_ID_MASK			GENMASK(15, 0)
+#define XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK	GENMASK(17, 0)
+#define XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK	GENMASK(31, 18)
+#define XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK	GENMASK(15, 0)
+#define XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK	GENMASK(31, 16)
+
+#define XILINX_DPDMA_ALIGN_BYTES			256
+#define XILINX_DPDMA_LINESIZE_ALIGN_BITS		128
+
+#define XILINX_DPDMA_NUM_CHAN				6
+
+struct xilinx_dpdma_chan;
+
+/**
+ * struct xilinx_dpdma_hw_desc - DPDMA hardware descriptor
+ * @control: control configuration field
+ * @desc_id: descriptor ID
+ * @xfer_size: transfer size
+ * @hsize_stride: horizontal size and stride
+ * @timestamp_lsb: LSB of time stamp
+ * @timestamp_msb: MSB of time stamp
+ * @addr_ext: upper 16 bit of 48 bit address (next_desc and src_addr)
+ * @next_desc: next descriptor 32 bit address
+ * @src_addr: payload source address (1st page, 32 LSB)
+ * @addr_ext_23: payload source address (3nd and 3rd pages, 16 LSBs)
+ * @addr_ext_45: payload source address (4th and 5th pages, 16 LSBs)
+ * @src_addr2: payload source address (2nd page, 32 LSB)
+ * @src_addr3: payload source address (3rd page, 32 LSB)
+ * @src_addr4: payload source address (4th page, 32 LSB)
+ * @src_addr5: payload source address (5th page, 32 LSB)
+ * @crc: descriptor CRC
+ */
+struct xilinx_dpdma_hw_desc {
+	u32 control;
+	u32 desc_id;
+	u32 xfer_size;
+	u32 hsize_stride;
+	u32 timestamp_lsb;
+	u32 timestamp_msb;
+	u32 addr_ext;
+	u32 next_desc;
+	u32 src_addr;
+	u32 addr_ext_23;
+	u32 addr_ext_45;
+	u32 src_addr2;
+	u32 src_addr3;
+	u32 src_addr4;
+	u32 src_addr5;
+	u32 crc;
+} __aligned(XILINX_DPDMA_ALIGN_BYTES);
+
+/**
+ * struct xilinx_dpdma_sw_desc - DPDMA software descriptor
+ * @hw: DPDMA hardware descriptor
+ * @node: list node for software descriptors
+ * @dma_addr: DMA address of the software descriptor
+ */
+struct xilinx_dpdma_sw_desc {
+	struct xilinx_dpdma_hw_desc hw;
+	struct list_head node;
+	dma_addr_t dma_addr;
+};
+
+/**
+ * struct xilinx_dpdma_tx_desc - DPDMA transaction descriptor
+ * @vdesc: virtual DMA descriptor
+ * @chan: DMA channel
+ * @descriptors: list of software descriptors
+ * @error: an error has been detected with this descriptor
+ */
+struct xilinx_dpdma_tx_desc {
+	struct virt_dma_desc vdesc;
+	struct xilinx_dpdma_chan *chan;
+	struct list_head descriptors;
+	bool error;
+};
+
+#define to_dpdma_tx_desc(_desc) \
+	container_of(_desc, struct xilinx_dpdma_tx_desc, vdesc)
+
+/**
+ * struct xilinx_dpdma_chan - DPDMA channel
+ * @vchan: virtual DMA channel
+ * @reg: register base address
+ * @id: channel ID
+ * @wait_to_stop: queue to wait for outstanding transacitons before stopping
+ * @running: true if the channel is running
+ * @first_frame: flag for the first frame of stream
+ * @video_group: flag if multi-channel operation is needed for video channels
+ * @lock: lock to access struct xilinx_dpdma_chan
+ * @desc_pool: descriptor allocation pool
+ * @err_task: error IRQ bottom half handler
+ * @desc: References to descriptors being processed
+ * @desc.pending: Descriptor schedule to the hardware, pending execution
+ * @desc.active: Descriptor being executed by the hardware
+ * @xdev: DPDMA device
+ */
+struct xilinx_dpdma_chan {
+	struct virt_dma_chan vchan;
+	void __iomem *reg;
+	unsigned int id;
+
+	wait_queue_head_t wait_to_stop;
+	bool running;
+	bool first_frame;
+	bool video_group;
+
+	spinlock_t lock; /* lock to access struct xilinx_dpdma_chan */
+	struct dma_pool *desc_pool;
+	struct tasklet_struct err_task;
+
+	struct {
+		struct xilinx_dpdma_tx_desc *pending;
+		struct xilinx_dpdma_tx_desc *active;
+	} desc;
+
+	struct xilinx_dpdma_device *xdev;
+};
+
+#define to_xilinx_chan(_chan) \
+	container_of(_chan, struct xilinx_dpdma_chan, vchan.chan)
+
+/**
+ * struct xilinx_dpdma_device - DPDMA device
+ * @common: generic dma device structure
+ * @reg: register base address
+ * @dev: generic device structure
+ * @irq: the interrupt number
+ * @axi_clk: axi clock
+ * @chan: DPDMA channels
+ * @ext_addr: flag for 64 bit system (48 bit addressing)
+ */
+struct xilinx_dpdma_device {
+	struct dma_device common;
+	void __iomem *reg;
+	struct device *dev;
+	int irq;
+
+	struct clk *axi_clk;
+	struct xilinx_dpdma_chan *chan[XILINX_DPDMA_NUM_CHAN];
+
+	bool ext_addr;
+};
+
+/* -----------------------------------------------------------------------------
+ * DebugFS
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+#define XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE	32
+#define XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR	"65535"
+
+/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */
+enum xilinx_dpdma_testcases {
+	DPDMA_TC_INTR_DONE,
+	DPDMA_TC_NONE
+};
+
+struct xilinx_dpdma_debugfs {
+	enum xilinx_dpdma_testcases testcase;
+	u16 xilinx_dpdma_irq_done_count;
+	unsigned int chan_id;
+};
+
+static struct xilinx_dpdma_debugfs dpdma_debugfs;
+struct xilinx_dpdma_debugfs_request {
+	const char *name;
+	enum xilinx_dpdma_testcases tc;
+	ssize_t (*read)(char *buf);
+	int (*write)(char *args);
+};
+
+static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
+{
+	if (chan->id == dpdma_debugfs.chan_id)
+		dpdma_debugfs.xilinx_dpdma_irq_done_count++;
+}
+
+static ssize_t xilinx_dpdma_debugfs_desc_done_irq_read(char *buf)
+{
+	size_t out_str_len;
+
+	dpdma_debugfs.testcase = DPDMA_TC_NONE;
+
+	out_str_len = strlen(XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR);
+	out_str_len = min_t(size_t, XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE,
+			    out_str_len);
+	snprintf(buf, out_str_len, "%d",
+		 dpdma_debugfs.xilinx_dpdma_irq_done_count);
+
+	return 0;
+}
+
+static int xilinx_dpdma_debugfs_desc_done_irq_write(char *args)
+{
+	char *arg;
+	int ret;
+	u32 id;
+
+	arg = strsep(&args, " ");
+	if (!arg || strncasecmp(arg, "start", 5))
+		return -EINVAL;
+
+	arg = strsep(&args, " ");
+	if (!arg)
+		return -EINVAL;
+
+	ret = kstrtou32(arg, 0, &id);
+	if (ret < 0)
+		return ret;
+
+	if (id < ZYNQMP_DPDMA_VIDEO0 || id > ZYNQMP_DPDMA_AUDIO1)
+		return -EINVAL;
+
+	dpdma_debugfs.testcase = DPDMA_TC_INTR_DONE;
+	dpdma_debugfs.xilinx_dpdma_irq_done_count = 0;
+	dpdma_debugfs.chan_id = id;
+
+	return 0;
+}
+
+/* Match xilinx_dpdma_testcases vs dpdma_debugfs_reqs[] entry */
+static struct xilinx_dpdma_debugfs_request dpdma_debugfs_reqs[] = {
+	{
+		.name = "DESCRIPTOR_DONE_INTR",
+		.tc = DPDMA_TC_INTR_DONE,
+		.read = xilinx_dpdma_debugfs_desc_done_irq_read,
+		.write = xilinx_dpdma_debugfs_desc_done_irq_write,
+	},
+};
+
+static ssize_t xilinx_dpdma_debugfs_read(struct file *f, char __user *buf,
+					 size_t size, loff_t *pos)
+{
+	enum xilinx_dpdma_testcases testcase;
+	char *kern_buff;
+	int ret = 0;
+
+	if (*pos != 0 || size <= 0)
+		return -EINVAL;
+
+	kern_buff = kzalloc(XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE, GFP_KERNEL);
+	if (!kern_buff) {
+		dpdma_debugfs.testcase = DPDMA_TC_NONE;
+		return -ENOMEM;
+	}
+
+	testcase = READ_ONCE(dpdma_debugfs.testcase);
+	if (testcase != DPDMA_TC_NONE) {
+		ret = dpdma_debugfs_reqs[testcase].read(kern_buff);
+		if (ret < 0)
+			goto done;
+	} else {
+		strlcpy(kern_buff, "No testcase executed",
+			XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE);
+	}
+
+	size = min(size, strlen(kern_buff));
+	if (copy_to_user(buf, kern_buff, size))
+		ret = -EFAULT;
+
+done:
+	kfree(kern_buff);
+	if (ret)
+		return ret;
+
+	*pos = size + 1;
+	return size;
+}
+
+static ssize_t xilinx_dpdma_debugfs_write(struct file *f,
+					  const char __user *buf, size_t size,
+					  loff_t *pos)
+{
+	char *kern_buff, *kern_buff_start;
+	char *testcase;
+	unsigned int i;
+	int ret;
+
+	if (*pos != 0 || size <= 0)
+		return -EINVAL;
+
+	/* Supporting single instance of test as of now. */
+	if (dpdma_debugfs.testcase != DPDMA_TC_NONE)
+		return -EBUSY;
+
+	kern_buff = kzalloc(size, GFP_KERNEL);
+	if (!kern_buff)
+		return -ENOMEM;
+	kern_buff_start = kern_buff;
+
+	ret = strncpy_from_user(kern_buff, buf, size);
+	if (ret < 0)
+		goto done;
+
+	/* Read the testcase name from a user request. */
+	testcase = strsep(&kern_buff, " ");
+
+	for (i = 0; i < ARRAY_SIZE(dpdma_debugfs_reqs); i++) {
+		if (!strcasecmp(testcase, dpdma_debugfs_reqs[i].name))
+			break;
+	}
+
+	if (i == ARRAY_SIZE(dpdma_debugfs_reqs)) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	ret = dpdma_debugfs_reqs[i].write(kern_buff);
+	if (ret < 0)
+		goto done;
+
+	ret = size;
+
+done:
+	kfree(kern_buff_start);
+	return ret;
+}
+
+static const struct file_operations fops_xilinx_dpdma_dbgfs = {
+	.owner = THIS_MODULE,
+	.read = xilinx_dpdma_debugfs_read,
+	.write = xilinx_dpdma_debugfs_write,
+};
+
+static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
+{
+	struct dentry *dent;
+
+	dpdma_debugfs.testcase = DPDMA_TC_NONE;
+
+	dent = debugfs_create_file("testcase", 0444, xdev->common.dbg_dev_root,
+				   NULL, &fops_xilinx_dpdma_dbgfs);
+	if (IS_ERR(dent))
+		dev_err(xdev->dev, "Failed to create debugfs testcase file\n");
+}
+
+#else
+static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
+{
+}
+
+static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/* -----------------------------------------------------------------------------
+ * I/O Accessors
+ */
+
+static inline u32 dpdma_read(void __iomem *base, u32 offset)
+{
+	return ioread32(base + offset);
+}
+
+static inline void dpdma_write(void __iomem *base, u32 offset, u32 val)
+{
+	iowrite32(val, base + offset);
+}
+
+static inline void dpdma_clr(void __iomem *base, u32 offset, u32 clr)
+{
+	dpdma_write(base, offset, dpdma_read(base, offset) & ~clr);
+}
+
+static inline void dpdma_set(void __iomem *base, u32 offset, u32 set)
+{
+	dpdma_write(base, offset, dpdma_read(base, offset) | set);
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptor Operations
+ */
+
+/**
+ * xilinx_dpdma_sw_desc_set_dma_addrs - Set DMA addresses in the descriptor
+ * @xdev: DPDMA device
+ * @sw_desc: The software descriptor in which to set DMA addresses
+ * @prev: The previous descriptor
+ * @dma_addr: array of dma addresses
+ * @num_src_addr: number of addresses in @dma_addr
+ *
+ * Set all the DMA addresses in the hardware descriptor corresponding to @dev
+ * from @dma_addr. If a previous descriptor is specified in @prev, its next
+ * descriptor DMA address is set to the DMA address of @sw_desc. @prev may be
+ * identical to @sw_desc for cyclic transfers.
+ */
+static void xilinx_dpdma_sw_desc_set_dma_addrs(struct xilinx_dpdma_device *xdev,
+					       struct xilinx_dpdma_sw_desc *sw_desc,
+					       struct xilinx_dpdma_sw_desc *prev,
+					       dma_addr_t dma_addr[],
+					       unsigned int num_src_addr)
+{
+	struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw;
+	unsigned int i;
+
+	hw_desc->src_addr = lower_32_bits(dma_addr[0]);
+	if (xdev->ext_addr)
+		hw_desc->addr_ext |=
+			FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_SRC_ADDR_MASK,
+				   upper_32_bits(dma_addr[0]));
+
+	for (i = 1; i < num_src_addr; i++) {
+		u32 *addr = &hw_desc->src_addr2;
+
+		addr[i-1] = lower_32_bits(dma_addr[i]);
+
+		if (xdev->ext_addr) {
+			u32 *addr_ext = &hw_desc->addr_ext_23;
+			u32 addr_msb;
+
+			addr_msb = upper_32_bits(dma_addr[i]) & GENMASK(15, 0);
+			addr_msb <<= 16 * ((i - 1) % 2);
+			addr_ext[(i - 1) / 2] |= addr_msb;
+		}
+	}
+
+	if (!prev)
+		return;
+
+	prev->hw.next_desc = lower_32_bits(sw_desc->dma_addr);
+	if (xdev->ext_addr)
+		prev->hw.addr_ext |=
+			FIELD_PREP(XILINX_DPDMA_DESC_ADDR_EXT_NEXT_ADDR_MASK,
+				   upper_32_bits(sw_desc->dma_addr));
+}
+
+/**
+ * xilinx_dpdma_chan_alloc_sw_desc - Allocate a software descriptor
+ * @chan: DPDMA channel
+ *
+ * Allocate a software descriptor from the channel's descriptor pool.
+ *
+ * Return: a software descriptor or NULL.
+ */
+static struct xilinx_dpdma_sw_desc *
+xilinx_dpdma_chan_alloc_sw_desc(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_sw_desc *sw_desc;
+	dma_addr_t dma_addr;
+
+	sw_desc = dma_pool_zalloc(chan->desc_pool, GFP_ATOMIC, &dma_addr);
+	if (!sw_desc)
+		return NULL;
+
+	sw_desc->dma_addr = dma_addr;
+
+	return sw_desc;
+}
+
+/**
+ * xilinx_dpdma_chan_free_sw_desc - Free a software descriptor
+ * @chan: DPDMA channel
+ * @sw_desc: software descriptor to free
+ *
+ * Free a software descriptor from the channel's descriptor pool.
+ */
+static void
+xilinx_dpdma_chan_free_sw_desc(struct xilinx_dpdma_chan *chan,
+			       struct xilinx_dpdma_sw_desc *sw_desc)
+{
+	dma_pool_free(chan->desc_pool, sw_desc, sw_desc->dma_addr);
+}
+
+/**
+ * xilinx_dpdma_chan_dump_tx_desc - Dump a tx descriptor
+ * @chan: DPDMA channel
+ * @tx_desc: tx descriptor to dump
+ *
+ * Dump contents of a tx descriptor
+ */
+static void xilinx_dpdma_chan_dump_tx_desc(struct xilinx_dpdma_chan *chan,
+					   struct xilinx_dpdma_tx_desc *tx_desc)
+{
+	struct xilinx_dpdma_sw_desc *sw_desc;
+	struct device *dev = chan->xdev->dev;
+	unsigned int i = 0;
+
+	dev_dbg(dev, "------- TX descriptor dump start -------\n");
+	dev_dbg(dev, "------- channel ID = %d -------\n", chan->id);
+
+	list_for_each_entry(sw_desc, &tx_desc->descriptors, node) {
+		struct xilinx_dpdma_hw_desc *hw_desc = &sw_desc->hw;
+
+		dev_dbg(dev, "------- HW descriptor %d -------\n", i++);
+		dev_dbg(dev, "descriptor DMA addr: %pad\n", &sw_desc->dma_addr);
+		dev_dbg(dev, "control: 0x%08x\n", hw_desc->control);
+		dev_dbg(dev, "desc_id: 0x%08x\n", hw_desc->desc_id);
+		dev_dbg(dev, "xfer_size: 0x%08x\n", hw_desc->xfer_size);
+		dev_dbg(dev, "hsize_stride: 0x%08x\n", hw_desc->hsize_stride);
+		dev_dbg(dev, "timestamp_lsb: 0x%08x\n", hw_desc->timestamp_lsb);
+		dev_dbg(dev, "timestamp_msb: 0x%08x\n", hw_desc->timestamp_msb);
+		dev_dbg(dev, "addr_ext: 0x%08x\n", hw_desc->addr_ext);
+		dev_dbg(dev, "next_desc: 0x%08x\n", hw_desc->next_desc);
+		dev_dbg(dev, "src_addr: 0x%08x\n", hw_desc->src_addr);
+		dev_dbg(dev, "addr_ext_23: 0x%08x\n", hw_desc->addr_ext_23);
+		dev_dbg(dev, "addr_ext_45: 0x%08x\n", hw_desc->addr_ext_45);
+		dev_dbg(dev, "src_addr2: 0x%08x\n", hw_desc->src_addr2);
+		dev_dbg(dev, "src_addr3: 0x%08x\n", hw_desc->src_addr3);
+		dev_dbg(dev, "src_addr4: 0x%08x\n", hw_desc->src_addr4);
+		dev_dbg(dev, "src_addr5: 0x%08x\n", hw_desc->src_addr5);
+		dev_dbg(dev, "crc: 0x%08x\n", hw_desc->crc);
+	}
+
+	dev_dbg(dev, "------- TX descriptor dump end -------\n");
+}
+
+/**
+ * xilinx_dpdma_chan_alloc_tx_desc - Allocate a transaction descriptor
+ * @chan: DPDMA channel
+ *
+ * Allocate a tx descriptor.
+ *
+ * Return: a tx descriptor or NULL.
+ */
+static struct xilinx_dpdma_tx_desc *
+xilinx_dpdma_chan_alloc_tx_desc(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_tx_desc *tx_desc;
+
+	tx_desc = kzalloc(sizeof(*tx_desc), GFP_NOWAIT);
+	if (!tx_desc)
+		return NULL;
+
+	INIT_LIST_HEAD(&tx_desc->descriptors);
+	tx_desc->chan = chan;
+	tx_desc->error = false;
+
+	return tx_desc;
+}
+
+/**
+ * xilinx_dpdma_chan_free_tx_desc - Free a virtual DMA descriptor
+ * @vdesc: virtual DMA descriptor
+ *
+ * Free the virtual DMA descriptor @vdesc including its software descriptors.
+ */
+static void xilinx_dpdma_chan_free_tx_desc(struct virt_dma_desc *vdesc)
+{
+	struct xilinx_dpdma_sw_desc *sw_desc, *next;
+	struct xilinx_dpdma_tx_desc *desc;
+
+	if (!vdesc)
+		return;
+
+	desc = to_dpdma_tx_desc(vdesc);
+
+	list_for_each_entry_safe(sw_desc, next, &desc->descriptors, node) {
+		list_del(&sw_desc->node);
+		xilinx_dpdma_chan_free_sw_desc(desc->chan, sw_desc);
+	}
+
+	kfree(desc);
+}
+
+/**
+ * xilinx_dpdma_chan_prep_interleaved_dma - Prepare an interleaved dma
+ *					    descriptor
+ * @chan: DPDMA channel
+ * @xt: dma interleaved template
+ *
+ * Prepare a tx descriptor including internal software/hardware descriptors
+ * based on @xt.
+ *
+ * Return: A DPDMA TX descriptor on success, or NULL.
+ */
+static struct xilinx_dpdma_tx_desc *
+xilinx_dpdma_chan_prep_interleaved_dma(struct xilinx_dpdma_chan *chan,
+				       struct dma_interleaved_template *xt)
+{
+	struct xilinx_dpdma_tx_desc *tx_desc;
+	struct xilinx_dpdma_sw_desc *sw_desc;
+	struct xilinx_dpdma_hw_desc *hw_desc;
+	size_t hsize = xt->sgl[0].size;
+	size_t stride = hsize + xt->sgl[0].icg;
+
+	if (!IS_ALIGNED(xt->src_start, XILINX_DPDMA_ALIGN_BYTES)) {
+		dev_err(chan->xdev->dev, "buffer should be aligned at %d B\n",
+			XILINX_DPDMA_ALIGN_BYTES);
+		return NULL;
+	}
+
+	tx_desc = xilinx_dpdma_chan_alloc_tx_desc(chan);
+	if (!tx_desc)
+		return NULL;
+
+	sw_desc = xilinx_dpdma_chan_alloc_sw_desc(chan);
+	if (!sw_desc) {
+		xilinx_dpdma_chan_free_tx_desc(&tx_desc->vdesc);
+		return NULL;
+	}
+
+	xilinx_dpdma_sw_desc_set_dma_addrs(chan->xdev, sw_desc, sw_desc,
+					   &xt->src_start, 1);
+
+	hw_desc = &sw_desc->hw;
+	hsize = ALIGN(hsize, XILINX_DPDMA_LINESIZE_ALIGN_BITS / 8);
+	hw_desc->xfer_size = hsize * xt->numf;
+	hw_desc->hsize_stride =
+		FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_HSIZE_MASK, hsize) |
+		FIELD_PREP(XILINX_DPDMA_DESC_HSIZE_STRIDE_STRIDE_MASK,
+			   stride / 16);
+	hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_PREEMBLE;
+	hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_COMPLETE_INTR;
+	hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_IGNORE_DONE;
+	hw_desc->control |= XILINX_DPDMA_DESC_CONTROL_LAST_OF_FRAME;
+
+	list_add_tail(&sw_desc->node, &tx_desc->descriptors);
+
+	return tx_desc;
+}
+
+/* -----------------------------------------------------------------------------
+ * DPDMA Channel Operations
+ */
+
+/**
+ * xilinx_dpdma_chan_enable - Enable the channel
+ * @chan: DPDMA channel
+ *
+ * Enable the channel and its interrupts. Set the QoS values for video class.
+ */
+static void xilinx_dpdma_chan_enable(struct xilinx_dpdma_chan *chan)
+{
+	u32 reg;
+
+	reg = (XILINX_DPDMA_INTR_CHAN_MASK << chan->id)
+	    | XILINX_DPDMA_INTR_GLOBAL_MASK;
+	dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg);
+	reg = (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id)
+	    | XILINX_DPDMA_INTR_GLOBAL_ERR;
+	dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg);
+
+	reg = XILINX_DPDMA_CH_CNTL_ENABLE
+	    | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_WR_MASK,
+			 XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS)
+	    | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DSCR_RD_MASK,
+			 XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS)
+	    | FIELD_PREP(XILINX_DPDMA_CH_CNTL_QOS_DATA_RD_MASK,
+			 XILINX_DPDMA_CH_CNTL_QOS_VID_CLASS);
+	dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, reg);
+}
+
+/**
+ * xilinx_dpdma_chan_disable - Disable the channel
+ * @chan: DPDMA channel
+ *
+ * Disable the channel and its interrupts.
+ */
+static void xilinx_dpdma_chan_disable(struct xilinx_dpdma_chan *chan)
+{
+	u32 reg;
+
+	reg = XILINX_DPDMA_INTR_CHAN_MASK << chan->id;
+	dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN, reg);
+	reg = XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id;
+	dpdma_write(chan->xdev->reg, XILINX_DPDMA_EIEN, reg);
+
+	dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+}
+
+/**
+ * xilinx_dpdma_chan_pause - Pause the channel
+ * @chan: DPDMA channel
+ *
+ * Pause the channel.
+ */
+static void xilinx_dpdma_chan_pause(struct xilinx_dpdma_chan *chan)
+{
+	dpdma_set(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE);
+}
+
+/**
+ * xilinx_dpdma_chan_unpause - Unpause the channel
+ * @chan: DPDMA channel
+ *
+ * Unpause the channel.
+ */
+static void xilinx_dpdma_chan_unpause(struct xilinx_dpdma_chan *chan)
+{
+	dpdma_clr(chan->reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_PAUSE);
+}
+
+static u32 xilinx_dpdma_chan_video_group_ready(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_device *xdev = chan->xdev;
+	u32 channels = 0;
+	unsigned int i;
+
+	for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) {
+		if (xdev->chan[i]->video_group && !xdev->chan[i]->running)
+			return 0;
+
+		if (xdev->chan[i]->video_group)
+			channels |= BIT(i);
+	}
+
+	return channels;
+}
+
+/**
+ * xilinx_dpdma_chan_queue_transfer - Queue the next transfer
+ * @chan: DPDMA channel
+ *
+ * Queue the next descriptor, if any, to the hardware. If the channel is
+ * stopped, start it first. Otherwise retrigger it with the next descriptor.
+ */
+static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_device *xdev = chan->xdev;
+	struct xilinx_dpdma_sw_desc *sw_desc;
+	struct xilinx_dpdma_tx_desc *desc;
+	struct virt_dma_desc *vdesc;
+	u32 reg, channels;
+	bool first_frame;
+
+	lockdep_assert_held(&chan->lock);
+
+	if (chan->desc.pending)
+		return;
+
+	if (!chan->running) {
+		xilinx_dpdma_chan_unpause(chan);
+		xilinx_dpdma_chan_enable(chan);
+		chan->first_frame = true;
+		chan->running = true;
+	}
+
+	vdesc = vchan_next_desc(&chan->vchan);
+	if (!vdesc)
+		return;
+
+	desc = to_dpdma_tx_desc(vdesc);
+	chan->desc.pending = desc;
+	list_del(&desc->vdesc.node);
+
+	/*
+	 * Assign the cookie to descriptors in this transaction. Only 16 bit
+	 * will be used, but it should be enough.
+	 */
+	list_for_each_entry(sw_desc, &desc->descriptors, node)
+		sw_desc->hw.desc_id = desc->vdesc.tx.cookie
+				    & XILINX_DPDMA_CH_DESC_ID_MASK;
+
+	sw_desc = list_first_entry(&desc->descriptors,
+				   struct xilinx_dpdma_sw_desc, node);
+	dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR,
+		    lower_32_bits(sw_desc->dma_addr));
+	if (xdev->ext_addr)
+		dpdma_write(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE,
+			    FIELD_PREP(XILINX_DPDMA_CH_DESC_START_ADDRE_MASK,
+				       upper_32_bits(sw_desc->dma_addr)));
+
+	first_frame = chan->first_frame;
+	chan->first_frame = false;
+
+	if (chan->video_group) {
+		channels = xilinx_dpdma_chan_video_group_ready(chan);
+		/*
+		 * Trigger the transfer only when all channels in the group are
+		 * ready.
+		 */
+		if (!channels)
+			return;
+	} else {
+		channels = BIT(chan->id);
+	}
+
+	if (first_frame)
+		reg = XILINX_DPDMA_GBL_TRIG_MASK(channels);
+	else
+		reg = XILINX_DPDMA_GBL_RETRIG_MASK(channels);
+
+	dpdma_write(xdev->reg, XILINX_DPDMA_GBL, reg);
+}
+
+/**
+ * xilinx_dpdma_chan_ostand - Number of outstanding transactions
+ * @chan: DPDMA channel
+ *
+ * Read and return the number of outstanding transactions from register.
+ *
+ * Return: Number of outstanding transactions from the status register.
+ */
+static u32 xilinx_dpdma_chan_ostand(struct xilinx_dpdma_chan *chan)
+{
+	return FIELD_GET(XILINX_DPDMA_CH_STATUS_OTRAN_CNT_MASK,
+			 dpdma_read(chan->reg, XILINX_DPDMA_CH_STATUS));
+}
+
+/**
+ * xilinx_dpdma_chan_no_ostand - Notify no outstanding transaction event
+ * @chan: DPDMA channel
+ *
+ * Notify waiters for no outstanding event, so waiters can stop the channel
+ * safely. This function is supposed to be called when 'no outstanding'
+ * interrupt is generated. The 'no outstanding' interrupt is disabled and
+ * should be re-enabled when this event is handled. If the channel status
+ * register still shows some number of outstanding transactions, the interrupt
+ * remains enabled.
+ *
+ * Return: 0 on success. On failure, -EWOULDBLOCK if there's still outstanding
+ * transaction(s).
+ */
+static int xilinx_dpdma_chan_notify_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+	u32 cnt;
+
+	cnt = xilinx_dpdma_chan_ostand(chan);
+	if (cnt) {
+		dev_dbg(chan->xdev->dev, "%d outstanding transactions\n", cnt);
+		return -EWOULDBLOCK;
+	}
+
+	/* Disable 'no outstanding' interrupt */
+	dpdma_write(chan->xdev->reg, XILINX_DPDMA_IDS,
+		    XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+	wake_up(&chan->wait_to_stop);
+
+	return 0;
+}
+
+/**
+ * xilinx_dpdma_chan_wait_no_ostand - Wait for the no outstanding irq
+ * @chan: DPDMA channel
+ *
+ * Wait for the no outstanding transaction interrupt. This functions can sleep
+ * for 50ms.
+ *
+ * Return: 0 on success. On failure, -ETIMEOUT for time out, or the error code
+ * from wait_event_interruptible_timeout().
+ */
+static int xilinx_dpdma_chan_wait_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+	int ret;
+
+	/* Wait for a no outstanding transaction interrupt upto 50msec */
+	ret = wait_event_interruptible_timeout(chan->wait_to_stop,
+					       !xilinx_dpdma_chan_ostand(chan),
+					       msecs_to_jiffies(50));
+	if (ret > 0) {
+		dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN,
+			    XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+		return 0;
+	}
+
+	dev_err(chan->xdev->dev, "not ready to stop: %d trans\n",
+		xilinx_dpdma_chan_ostand(chan));
+
+	if (ret == 0)
+		return -ETIMEDOUT;
+
+	return ret;
+}
+
+/**
+ * xilinx_dpdma_chan_poll_no_ostand - Poll the outstanding transaction status
+ * @chan: DPDMA channel
+ *
+ * Poll the outstanding transaction status, and return when there's no
+ * outstanding transaction. This functions can be used in the interrupt context
+ * or where the atomicity is required. Calling thread may wait more than 50ms.
+ *
+ * Return: 0 on success, or -ETIMEDOUT.
+ */
+static int xilinx_dpdma_chan_poll_no_ostand(struct xilinx_dpdma_chan *chan)
+{
+	u32 cnt, loop = 50000;
+
+	/* Poll at least for 50ms (20 fps). */
+	do {
+		cnt = xilinx_dpdma_chan_ostand(chan);
+		udelay(1);
+	} while (loop-- > 0 && cnt);
+
+	if (loop) {
+		dpdma_write(chan->xdev->reg, XILINX_DPDMA_IEN,
+			    XILINX_DPDMA_INTR_NO_OSTAND(chan->id));
+		return 0;
+	}
+
+	dev_err(chan->xdev->dev, "not ready to stop: %d trans\n",
+		xilinx_dpdma_chan_ostand(chan));
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * xilinx_dpdma_chan_stop - Stop the channel
+ * @chan: DPDMA channel
+ *
+ * Stop a previously paused channel by first waiting for completion of all
+ * outstanding transaction and then disabling the channel.
+ *
+ * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop.
+ */
+static int xilinx_dpdma_chan_stop(struct xilinx_dpdma_chan *chan)
+{
+	unsigned long flags;
+	int ret;
+
+	ret = xilinx_dpdma_chan_wait_no_ostand(chan);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	xilinx_dpdma_chan_disable(chan);
+	chan->running = false;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return 0;
+}
+
+/**
+ * xilinx_dpdma_chan_done_irq - Handle hardware descriptor completion
+ * @chan: DPDMA channel
+ *
+ * Handle completion of the currently active descriptor (@chan->desc.active). As
+ * we currently support cyclic transfers only, this just invokes the cyclic
+ * callback. The descriptor will be completed at the VSYNC interrupt when a new
+ * descriptor replaces it.
+ */
+static void xilinx_dpdma_chan_done_irq(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_tx_desc *active;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	xilinx_dpdma_debugfs_desc_done_irq(chan);
+
+	active = chan->desc.active;
+	if (active)
+		vchan_cyclic_callback(&active->vdesc);
+	else
+		dev_warn(chan->xdev->dev,
+			 "DONE IRQ with no active descriptor!\n");
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dpdma_chan_vsync_irq - Handle hardware descriptor scheduling
+ * @chan: DPDMA channel
+ *
+ * At VSYNC the active descriptor may have been replaced by the pending
+ * descriptor. Detect this through the DESC_ID and perform appropriate
+ * bookkeeping.
+ */
+static void xilinx_dpdma_chan_vsync_irq(struct  xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_tx_desc *pending;
+	struct xilinx_dpdma_sw_desc *sw_desc;
+	unsigned long flags;
+	u32 desc_id;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	pending = chan->desc.pending;
+	if (!chan->running || !pending)
+		goto out;
+
+	desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID)
+		& XILINX_DPDMA_CH_DESC_ID_MASK;
+
+	/* If the retrigger raced with vsync, retry at the next frame. */
+	sw_desc = list_first_entry(&pending->descriptors,
+				   struct xilinx_dpdma_sw_desc, node);
+	if (sw_desc->hw.desc_id != desc_id)
+		goto out;
+
+	/*
+	 * Complete the active descriptor, if any, promote the pending
+	 * descriptor to active, and queue the next transfer, if any.
+	 */
+	if (chan->desc.active)
+		vchan_cookie_complete(&chan->desc.active->vdesc);
+	chan->desc.active = pending;
+	chan->desc.pending = NULL;
+
+	xilinx_dpdma_chan_queue_transfer(chan);
+
+out:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_dpdma_chan_err - Detect any channel error
+ * @chan: DPDMA channel
+ * @isr: masked Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Return: true if any channel error occurs, or false otherwise.
+ */
+static bool
+xilinx_dpdma_chan_err(struct xilinx_dpdma_chan *chan, u32 isr, u32 eisr)
+{
+	if (!chan)
+		return false;
+
+	if (chan->running &&
+	    ((isr & (XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id)) ||
+	    (eisr & (XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id))))
+		return true;
+
+	return false;
+}
+
+/**
+ * xilinx_dpdma_chan_handle_err - DPDMA channel error handling
+ * @chan: DPDMA channel
+ *
+ * This function is called when any channel error or any global error occurs.
+ * The function disables the paused channel by errors and determines
+ * if the current active descriptor can be rescheduled depending on
+ * the descriptor status.
+ */
+static void xilinx_dpdma_chan_handle_err(struct xilinx_dpdma_chan *chan)
+{
+	struct xilinx_dpdma_device *xdev = chan->xdev;
+	struct xilinx_dpdma_tx_desc *active;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	dev_dbg(xdev->dev, "cur desc addr = 0x%04x%08x\n",
+		dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDRE),
+		dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_START_ADDR));
+	dev_dbg(xdev->dev, "cur payload addr = 0x%04x%08x\n",
+		dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDRE),
+		dpdma_read(chan->reg, XILINX_DPDMA_CH_PYLD_CUR_ADDR));
+
+	xilinx_dpdma_chan_disable(chan);
+	chan->running = false;
+
+	if (!chan->desc.active)
+		goto out_unlock;
+
+	active = chan->desc.active;
+	chan->desc.active = NULL;
+
+	xilinx_dpdma_chan_dump_tx_desc(chan, active);
+
+	if (active->error)
+		dev_dbg(xdev->dev, "repeated error on desc\n");
+
+	/* Reschedule if there's no new descriptor */
+	if (!chan->desc.pending &&
+	    list_empty(&chan->vchan.desc_issued)) {
+		active->error = true;
+		list_add_tail(&active->vdesc.node,
+			      &chan->vchan.desc_issued);
+	} else {
+		xilinx_dpdma_chan_free_tx_desc(&active->vdesc);
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA Engine Operations
+ */
+
+static struct dma_async_tx_descriptor *
+xilinx_dpdma_prep_interleaved_dma(struct dma_chan *dchan,
+				  struct dma_interleaved_template *xt,
+				  unsigned long flags)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dpdma_tx_desc *desc;
+
+	if (xt->dir != DMA_MEM_TO_DEV)
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	if (!(flags & DMA_PREP_REPEAT) || !(flags & DMA_PREP_LOAD_EOT))
+		return NULL;
+
+	desc = xilinx_dpdma_chan_prep_interleaved_dma(chan, xt);
+	if (!desc)
+		return NULL;
+
+	vchan_tx_prep(&chan->vchan, &desc->vdesc, flags | DMA_CTRL_ACK);
+
+	return &desc->vdesc.tx;
+}
+
+/**
+ * xilinx_dpdma_alloc_chan_resources - Allocate resources for the channel
+ * @dchan: DMA channel
+ *
+ * Allocate a descriptor pool for the channel.
+ *
+ * Return: 0 on success, or -ENOMEM if failed to allocate a pool.
+ */
+static int xilinx_dpdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	size_t align = __alignof__(struct xilinx_dpdma_sw_desc);
+
+	chan->desc_pool = dma_pool_create(dev_name(chan->xdev->dev),
+					  chan->xdev->dev,
+					  sizeof(struct xilinx_dpdma_sw_desc),
+					  align, 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->xdev->dev,
+			"failed to allocate a descriptor pool\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * xilinx_dpdma_free_chan_resources - Free all resources for the channel
+ * @dchan: DMA channel
+ *
+ * Free resources associated with the virtual DMA channel, and destroy the
+ * descriptor pool.
+ */
+static void xilinx_dpdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+
+	vchan_free_chan_resources(&chan->vchan);
+
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+static void xilinx_dpdma_issue_pending(struct dma_chan *dchan)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (vchan_issue_pending(&chan->vchan))
+		xilinx_dpdma_chan_queue_transfer(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static int xilinx_dpdma_config(struct dma_chan *dchan,
+			       struct dma_slave_config *config)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	unsigned long flags;
+
+	/*
+	 * The destination address doesn't need to be specified as the DPDMA is
+	 * hardwired to the destination (the DP controller). The transfer
+	 * width, burst size and port window size are thus meaningless, they're
+	 * fixed both on the DPDMA side and on the DP controller side.
+	 */
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/*
+	 * Abuse the slave_id to indicate that the channel is part of a video
+	 * group.
+	 */
+	if (chan->id <= ZYNQMP_DPDMA_VIDEO2)
+		chan->video_group = config->slave_id != 0;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return 0;
+}
+
+static int xilinx_dpdma_pause(struct dma_chan *dchan)
+{
+	xilinx_dpdma_chan_pause(to_xilinx_chan(dchan));
+
+	return 0;
+}
+
+static int xilinx_dpdma_resume(struct dma_chan *dchan)
+{
+	xilinx_dpdma_chan_unpause(to_xilinx_chan(dchan));
+
+	return 0;
+}
+
+/**
+ * xilinx_dpdma_terminate_all - Terminate the channel and descriptors
+ * @dchan: DMA channel
+ *
+ * Pause the channel without waiting for ongoing transfers to complete. Waiting
+ * for completion is performed by xilinx_dpdma_synchronize() that will disable
+ * the channel to complete the stop.
+ *
+ * All the descriptors associated with the channel that are guaranteed not to
+ * be touched by the hardware. The pending and active descriptor are not
+ * touched, and will be freed either upon completion, or by
+ * xilinx_dpdma_synchronize().
+ *
+ * Return: 0 on success, or -ETIMEDOUT if the channel failed to stop.
+ */
+static int xilinx_dpdma_terminate_all(struct dma_chan *dchan)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_dpdma_device *xdev = chan->xdev;
+	LIST_HEAD(descriptors);
+	unsigned long flags;
+	unsigned int i;
+
+	/* Pause the channel (including the whole video group if applicable). */
+	if (chan->video_group) {
+		for (i = ZYNQMP_DPDMA_VIDEO0; i <= ZYNQMP_DPDMA_VIDEO2; i++) {
+			if (xdev->chan[i]->video_group &&
+			    xdev->chan[i]->running) {
+				xilinx_dpdma_chan_pause(xdev->chan[i]);
+				xdev->chan[i]->video_group = false;
+			}
+		}
+	} else {
+		xilinx_dpdma_chan_pause(chan);
+	}
+
+	/* Gather all the descriptors we can free and free them. */
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	vchan_get_all_descriptors(&chan->vchan, &descriptors);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vchan, &descriptors);
+
+	return 0;
+}
+
+/**
+ * xilinx_dpdma_synchronize - Synchronize callback execution
+ * @dchan: DMA channel
+ *
+ * Synchronizing callback execution ensures that all previously issued
+ * transfers have completed and all associated callbacks have been called and
+ * have returned.
+ *
+ * This function waits for the DMA channel to stop. It assumes it has been
+ * paused by a previous call to dmaengine_terminate_async(), and that no new
+ * pending descriptors have been issued with dma_async_issue_pending(). The
+ * behaviour is undefined otherwise.
+ */
+static void xilinx_dpdma_synchronize(struct dma_chan *dchan)
+{
+	struct xilinx_dpdma_chan *chan = to_xilinx_chan(dchan);
+	unsigned long flags;
+
+	xilinx_dpdma_chan_stop(chan);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (chan->desc.pending) {
+		vchan_terminate_vdesc(&chan->desc.pending->vdesc);
+		chan->desc.pending = NULL;
+	}
+	if (chan->desc.active) {
+		vchan_terminate_vdesc(&chan->desc.active->vdesc);
+		chan->desc.active = NULL;
+	}
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_synchronize(&chan->vchan);
+}
+
+/* -----------------------------------------------------------------------------
+ * Interrupt and Tasklet Handling
+ */
+
+/**
+ * xilinx_dpdma_err - Detect any global error
+ * @isr: Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Return: True if any global error occurs, or false otherwise.
+ */
+static bool xilinx_dpdma_err(u32 isr, u32 eisr)
+{
+	if (isr & XILINX_DPDMA_INTR_GLOBAL_ERR ||
+	    eisr & XILINX_DPDMA_EINTR_GLOBAL_ERR)
+		return true;
+
+	return false;
+}
+
+/**
+ * xilinx_dpdma_handle_err_irq - Handle DPDMA error interrupt
+ * @xdev: DPDMA device
+ * @isr: masked Interrupt Status Register
+ * @eisr: Error Interrupt Status Register
+ *
+ * Handle if any error occurs based on @isr and @eisr. This function disables
+ * corresponding error interrupts, and those should be re-enabled once handling
+ * is done.
+ */
+static void xilinx_dpdma_handle_err_irq(struct xilinx_dpdma_device *xdev,
+					u32 isr, u32 eisr)
+{
+	bool err = xilinx_dpdma_err(isr, eisr);
+	unsigned int i;
+
+	dev_dbg_ratelimited(xdev->dev,
+			    "error irq: isr = 0x%08x, eisr = 0x%08x\n",
+			    isr, eisr);
+
+	/* Disable channel error interrupts until errors are handled. */
+	dpdma_write(xdev->reg, XILINX_DPDMA_IDS,
+		    isr & ~XILINX_DPDMA_INTR_GLOBAL_ERR);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EIDS,
+		    eisr & ~XILINX_DPDMA_EINTR_GLOBAL_ERR);
+
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+		if (err || xilinx_dpdma_chan_err(xdev->chan[i], isr, eisr))
+			tasklet_schedule(&xdev->chan[i]->err_task);
+}
+
+/**
+ * xilinx_dpdma_enable_irq - Enable interrupts
+ * @xdev: DPDMA device
+ *
+ * Enable interrupts.
+ */
+static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev)
+{
+	dpdma_write(xdev->reg, XILINX_DPDMA_IEN, XILINX_DPDMA_INTR_ALL);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EIEN, XILINX_DPDMA_EINTR_ALL);
+}
+
+/**
+ * xilinx_dpdma_disable_irq - Disable interrupts
+ * @xdev: DPDMA device
+ *
+ * Disable interrupts.
+ */
+static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev)
+{
+	dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL);
+}
+
+/**
+ * xilinx_dpdma_chan_err_task - Per channel tasklet for error handling
+ * @t: pointer to the tasklet associated with this handler
+ *
+ * Per channel error handling tasklet. This function waits for the outstanding
+ * transaction to complete and triggers error handling. After error handling,
+ * re-enable channel error interrupts, and restart the channel if needed.
+ */
+static void xilinx_dpdma_chan_err_task(struct tasklet_struct *t)
+{
+	struct xilinx_dpdma_chan *chan = from_tasklet(chan, t, err_task);
+	struct xilinx_dpdma_device *xdev = chan->xdev;
+	unsigned long flags;
+
+	/* Proceed error handling even when polling fails. */
+	xilinx_dpdma_chan_poll_no_ostand(chan);
+
+	xilinx_dpdma_chan_handle_err(chan);
+
+	dpdma_write(xdev->reg, XILINX_DPDMA_IEN,
+		    XILINX_DPDMA_INTR_CHAN_ERR_MASK << chan->id);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EIEN,
+		    XILINX_DPDMA_EINTR_CHAN_ERR_MASK << chan->id);
+
+	spin_lock_irqsave(&chan->lock, flags);
+	xilinx_dpdma_chan_queue_transfer(chan);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static irqreturn_t xilinx_dpdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_dpdma_device *xdev = data;
+	unsigned long mask;
+	unsigned int i;
+	u32 status;
+	u32 error;
+
+	status = dpdma_read(xdev->reg, XILINX_DPDMA_ISR);
+	error = dpdma_read(xdev->reg, XILINX_DPDMA_EISR);
+	if (!status && !error)
+		return IRQ_NONE;
+
+	dpdma_write(xdev->reg, XILINX_DPDMA_ISR, status);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EISR, error);
+
+	if (status & XILINX_DPDMA_INTR_VSYNC) {
+		/*
+		 * There's a single VSYNC interrupt that needs to be processed
+		 * by each running channel to update the active descriptor.
+		 */
+		for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+			struct xilinx_dpdma_chan *chan = xdev->chan[i];
+
+			if (chan)
+				xilinx_dpdma_chan_vsync_irq(chan);
+		}
+	}
+
+	mask = FIELD_GET(XILINX_DPDMA_INTR_DESC_DONE_MASK, status);
+	if (mask) {
+		for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan))
+			xilinx_dpdma_chan_done_irq(xdev->chan[i]);
+	}
+
+	mask = FIELD_GET(XILINX_DPDMA_INTR_NO_OSTAND_MASK, status);
+	if (mask) {
+		for_each_set_bit(i, &mask, ARRAY_SIZE(xdev->chan))
+			xilinx_dpdma_chan_notify_no_ostand(xdev->chan[i]);
+	}
+
+	mask = status & XILINX_DPDMA_INTR_ERR_ALL;
+	if (mask || error)
+		xilinx_dpdma_handle_err_irq(xdev, mask, error);
+
+	return IRQ_HANDLED;
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization & Cleanup
+ */
+
+static int xilinx_dpdma_chan_init(struct xilinx_dpdma_device *xdev,
+				  unsigned int chan_id)
+{
+	struct xilinx_dpdma_chan *chan;
+
+	chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	chan->id = chan_id;
+	chan->reg = xdev->reg + XILINX_DPDMA_CH_BASE
+		  + XILINX_DPDMA_CH_OFFSET * chan->id;
+	chan->running = false;
+	chan->xdev = xdev;
+
+	spin_lock_init(&chan->lock);
+	init_waitqueue_head(&chan->wait_to_stop);
+
+	tasklet_setup(&chan->err_task, xilinx_dpdma_chan_err_task);
+
+	chan->vchan.desc_free = xilinx_dpdma_chan_free_tx_desc;
+	vchan_init(&chan->vchan, &xdev->common);
+
+	xdev->chan[chan->id] = chan;
+
+	return 0;
+}
+
+static void xilinx_dpdma_chan_remove(struct xilinx_dpdma_chan *chan)
+{
+	if (!chan)
+		return;
+
+	tasklet_kill(&chan->err_task);
+	list_del(&chan->vchan.chan.device_node);
+}
+
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct xilinx_dpdma_device *xdev = ofdma->of_dma_data;
+	uint32_t chan_id = dma_spec->args[0];
+
+	if (chan_id >= ARRAY_SIZE(xdev->chan))
+		return NULL;
+
+	if (!xdev->chan[chan_id])
+		return NULL;
+
+	return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan);
+}
+
+static void dpdma_hw_init(struct xilinx_dpdma_device *xdev)
+{
+	unsigned int i;
+	void __iomem *reg;
+
+	/* Disable all interrupts */
+	xilinx_dpdma_disable_irq(xdev);
+
+	/* Stop all channels */
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) {
+		reg = xdev->reg + XILINX_DPDMA_CH_BASE
+				+ XILINX_DPDMA_CH_OFFSET * i;
+		dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE);
+	}
+
+	/* Clear the interrupt status registers */
+	dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL);
+	dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL);
+}
+
+static int xilinx_dpdma_probe(struct platform_device *pdev)
+{
+	struct xilinx_dpdma_device *xdev;
+	struct dma_device *ddev;
+	unsigned int i;
+	int ret;
+
+	xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+	if (!xdev)
+		return -ENOMEM;
+
+	xdev->dev = &pdev->dev;
+	xdev->ext_addr = sizeof(dma_addr_t) > 4;
+
+	INIT_LIST_HEAD(&xdev->common.channels);
+
+	platform_set_drvdata(pdev, xdev);
+
+	xdev->axi_clk = devm_clk_get(xdev->dev, "axi_clk");
+	if (IS_ERR(xdev->axi_clk))
+		return PTR_ERR(xdev->axi_clk);
+
+	xdev->reg = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(xdev->reg))
+		return PTR_ERR(xdev->reg);
+
+	dpdma_hw_init(xdev);
+
+	xdev->irq = platform_get_irq(pdev, 0);
+	if (xdev->irq < 0) {
+		dev_err(xdev->dev, "failed to get platform irq\n");
+		return xdev->irq;
+	}
+
+	ret = request_irq(xdev->irq, xilinx_dpdma_irq_handler, IRQF_SHARED,
+			  dev_name(xdev->dev), xdev);
+	if (ret) {
+		dev_err(xdev->dev, "failed to request IRQ\n");
+		return ret;
+	}
+
+	ddev = &xdev->common;
+	ddev->dev = &pdev->dev;
+
+	dma_cap_set(DMA_SLAVE, ddev->cap_mask);
+	dma_cap_set(DMA_PRIVATE, ddev->cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, ddev->cap_mask);
+	dma_cap_set(DMA_REPEAT, ddev->cap_mask);
+	dma_cap_set(DMA_LOAD_EOT, ddev->cap_mask);
+	ddev->copy_align = fls(XILINX_DPDMA_ALIGN_BYTES - 1);
+
+	ddev->device_alloc_chan_resources = xilinx_dpdma_alloc_chan_resources;
+	ddev->device_free_chan_resources = xilinx_dpdma_free_chan_resources;
+	ddev->device_prep_interleaved_dma = xilinx_dpdma_prep_interleaved_dma;
+	/* TODO: Can we achieve better granularity ? */
+	ddev->device_tx_status = dma_cookie_status;
+	ddev->device_issue_pending = xilinx_dpdma_issue_pending;
+	ddev->device_config = xilinx_dpdma_config;
+	ddev->device_pause = xilinx_dpdma_pause;
+	ddev->device_resume = xilinx_dpdma_resume;
+	ddev->device_terminate_all = xilinx_dpdma_terminate_all;
+	ddev->device_synchronize = xilinx_dpdma_synchronize;
+	ddev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED);
+	ddev->directions = BIT(DMA_MEM_TO_DEV);
+	ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); ++i) {
+		ret = xilinx_dpdma_chan_init(xdev, i);
+		if (ret < 0) {
+			dev_err(xdev->dev, "failed to initialize channel %u\n",
+				i);
+			goto error;
+		}
+	}
+
+	ret = clk_prepare_enable(xdev->axi_clk);
+	if (ret) {
+		dev_err(xdev->dev, "failed to enable the axi clock\n");
+		goto error;
+	}
+
+	ret = dma_async_device_register(ddev);
+	if (ret) {
+		dev_err(xdev->dev, "failed to register the dma device\n");
+		goto error_dma_async;
+	}
+
+	ret = of_dma_controller_register(xdev->dev->of_node,
+					 of_dma_xilinx_xlate, ddev);
+	if (ret) {
+		dev_err(xdev->dev, "failed to register DMA to DT DMA helper\n");
+		goto error_of_dma;
+	}
+
+	xilinx_dpdma_enable_irq(xdev);
+
+	xilinx_dpdma_debugfs_init(xdev);
+
+	dev_info(&pdev->dev, "Xilinx DPDMA engine is probed\n");
+
+	return 0;
+
+error_of_dma:
+	dma_async_device_unregister(ddev);
+error_dma_async:
+	clk_disable_unprepare(xdev->axi_clk);
+error:
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+		xilinx_dpdma_chan_remove(xdev->chan[i]);
+
+	free_irq(xdev->irq, xdev);
+
+	return ret;
+}
+
+static int xilinx_dpdma_remove(struct platform_device *pdev)
+{
+	struct xilinx_dpdma_device *xdev = platform_get_drvdata(pdev);
+	unsigned int i;
+
+	/* Start by disabling the IRQ to avoid races during cleanup. */
+	free_irq(xdev->irq, xdev);
+
+	xilinx_dpdma_disable_irq(xdev);
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&xdev->common);
+	clk_disable_unprepare(xdev->axi_clk);
+
+	for (i = 0; i < ARRAY_SIZE(xdev->chan); i++)
+		xilinx_dpdma_chan_remove(xdev->chan[i]);
+
+	return 0;
+}
+
+static const struct of_device_id xilinx_dpdma_of_match[] = {
+	{ .compatible = "xlnx,zynqmp-dpdma",},
+	{ /* end of table */ },
+};
+MODULE_DEVICE_TABLE(of, xilinx_dpdma_of_match);
+
+static struct platform_driver xilinx_dpdma_driver = {
+	.probe			= xilinx_dpdma_probe,
+	.remove			= xilinx_dpdma_remove,
+	.driver			= {
+		.name		= "xilinx-zynqmp-dpdma",
+		.of_match_table	= xilinx_dpdma_of_match,
+	},
+};
+
+module_platform_driver(xilinx_dpdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx ZynqMP DPDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c
index 84009c5..5fecf5a 100644
--- a/drivers/dma/xilinx/zynqmp_dma.c
+++ b/drivers/dma/xilinx/zynqmp_dma.c
@@ -434,6 +434,7 @@
 	struct zynqmp_dma_desc_sw *child, *next;
 
 	chan->desc_free_cnt++;
+	list_del(&sdesc->node);
 	list_add_tail(&sdesc->node, &chan->free_list);
 	list_for_each_entry_safe(child, next, &sdesc->tx_list, node) {
 		chan->desc_free_cnt++;
@@ -608,8 +609,6 @@
 		dma_async_tx_callback callback;
 		void *callback_param;
 
-		list_del(&desc->node);
-
 		callback = desc->async_tx.callback;
 		callback_param = desc->async_tx.callback_param;
 		if (callback) {
@@ -743,11 +742,11 @@
 
 /**
  * zynqmp_dma_do_tasklet - Schedule completion tasklet
- * @data: Pointer to the ZynqMP DMA channel structure
+ * @t: Pointer to the ZynqMP DMA channel structure
  */
-static void zynqmp_dma_do_tasklet(unsigned long data)
+static void zynqmp_dma_do_tasklet(struct tasklet_struct *t)
 {
-	struct zynqmp_dma_chan *chan = (struct zynqmp_dma_chan *)data;
+	struct zynqmp_dma_chan *chan = from_tasklet(chan, t, tasklet);
 	u32 count;
 	unsigned long irqflags;
 
@@ -909,7 +908,7 @@
 
 	chan->is_dmacoherent =  of_property_read_bool(node, "dma-coherent");
 	zdev->chan = chan;
-	tasklet_init(&chan->tasklet, zynqmp_dma_do_tasklet, (ulong)chan);
+	tasklet_setup(&chan->tasklet, zynqmp_dma_do_tasklet);
 	spin_lock_init(&chan->lock);
 	INIT_LIST_HEAD(&chan->active_list);
 	INIT_LIST_HEAD(&chan->pending_list);