Update Linux to v5.10.109

Sourced from [1]

[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz

Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 5bf7542..aadaea0 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -59,9 +59,17 @@
 
 config SPI_ALTERA
 	tristate "Altera SPI Controller"
+	select REGMAP_MMIO
 	help
 	  This is the driver for the Altera SPI Controller.
 
+config SPI_AR934X
+	tristate "Qualcomm Atheros AR934X/QCA95XX SPI controller driver"
+	depends on ATH79 || COMPILE_TEST
+	help
+	  This enables support for the SPI controller present on the
+	  Qualcomm Atheros AR934X/QCA95XX SoCs.
+
 config SPI_ATH79
 	tristate "Atheros AR71XX/AR724X/AR913X SPI controller driver"
 	depends on ATH79 || COMPILE_TEST
@@ -80,6 +88,7 @@
 config SPI_ATMEL
 	tristate "Atmel SPI Controller"
 	depends on ARCH_AT91 || COMPILE_TEST
+	depends on OF
 	help
 	  This selects a driver for the Atmel SPI Controller, present on
 	  many AT91 ARM chips.
@@ -94,7 +103,7 @@
 
 config SPI_ATMEL_QUADSPI
 	tristate "Atmel Quad SPI Controller"
-	depends on ARCH_AT91 || (ARM && COMPILE_TEST && !ARCH_EBSA110)
+	depends on ARCH_AT91 || COMPILE_TEST
 	depends on OF && HAS_IOMEM
 	help
 	  This enables support for the Quad SPI controller in master mode.
@@ -141,13 +150,13 @@
 
 config SPI_BCM63XX
 	tristate "Broadcom BCM63xx SPI controller"
-	depends on BCM63XX || COMPILE_TEST
+	depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST
 	help
-          Enable support for the SPI controller on the Broadcom BCM63xx SoCs.
+	  Enable support for the SPI controller on the Broadcom BCM63xx SoCs.
 
 config SPI_BCM63XX_HSSPI
 	tristate "Broadcom BCM63XX HS SPI controller driver"
-	depends on BCM63XX || ARCH_BCM_63XX || COMPILE_TEST
+	depends on BCM63XX || BMIPS_GENERIC || ARCH_BCM_63XX || COMPILE_TEST
 	help
 	  This enables support for the High Speed SPI controller present on
 	  newer Broadcom BCM63XX SoCs.
@@ -160,7 +169,7 @@
 	help
 	  Enables support for the Broadcom SPI flash and MSPI controller.
 	  Select this option for any one of BRCMSTB, iProc NSP and NS2 SoCs
-	  based platforms. This driver works for both SPI master for spi-nor
+	  based platforms. This driver works for both SPI master for SPI NOR
 	  flash device as well as MSPI device.
 
 config SPI_BITBANG
@@ -192,6 +201,17 @@
 	  This selects the Cadence SPI controller master driver
 	  used by Xilinx Zynq and ZynqMP.
 
+config SPI_CADENCE_QUADSPI
+	tristate "Cadence Quad SPI controller"
+	depends on OF && (ARM || ARM64 || COMPILE_TEST)
+	help
+	  Enable support for the Cadence Quad SPI Flash controller.
+
+	  Cadence QSPI is a specialized controller for connecting an SPI
+	  Flash over 1/2/4-bit wide bus. Enable this option if you have a
+	  device with a Cadence QSPI controller and want to access the
+	  Flash as an MTD device.
+
 config SPI_CLPS711X
 	tristate "CLPS711X host SPI controller"
 	depends on ARCH_CLPS711X || COMPILE_TEST
@@ -215,30 +235,62 @@
 
 config SPI_DESIGNWARE
 	tristate "DesignWare SPI controller core support"
+	imply SPI_MEM
 	help
 	  general driver for SPI controller core from DesignWare
 
+if SPI_DESIGNWARE
+
+config SPI_DW_DMA
+	bool "DMA support for DW SPI controller"
+
 config SPI_DW_PCI
 	tristate "PCI interface driver for DW SPI core"
-	depends on SPI_DESIGNWARE && PCI
-
-config SPI_DW_MID_DMA
-	bool "DMA support for DW SPI controller on Intel MID platform"
-	depends on SPI_DW_PCI && DW_DMAC_PCI
+	depends on PCI
 
 config SPI_DW_MMIO
 	tristate "Memory-mapped io interface driver for DW SPI core"
-	depends on SPI_DESIGNWARE
+	depends on HAS_IOMEM
+
+config SPI_DW_BT1
+	tristate "Baikal-T1 SPI driver for DW SPI core"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	select MULTIPLEXER
+	select MUX_MMIO
+	help
+	  Baikal-T1 SoC is equipped with three DW APB SSI-based MMIO SPI
+	  controllers. Two of them are pretty much normal: with IRQ, DMA,
+	  FIFOs of 64 words depth, 4x CSs, but the third one as being a
+	  part of the Baikal-T1 System Boot Controller has got a very
+	  limited resources: no IRQ, no DMA, only a single native
+	  chip-select and Tx/Rx FIFO with just 8 words depth available.
+	  The later one is normally connected to an external SPI-nor flash
+	  of 128Mb (in general can be of bigger size).
+
+config SPI_DW_BT1_DIRMAP
+	bool "Directly mapped Baikal-T1 Boot SPI flash support"
+	depends on SPI_DW_BT1
+	help
+	  Directly mapped SPI flash memory is an interface specific to the
+	  Baikal-T1 System Boot Controller. It is a 16MB MMIO region, which
+	  can be used to access a peripheral memory device just by
+	  reading/writing data from/to it. Note that the system APB bus
+	  will stall during each IO from/to the dirmap region until the
+	  operation is finished. So try not to use it concurrently with
+	  time-critical tasks (like the SPI memory operations implemented
+	  in this driver).
+
+endif
 
 config SPI_DLN2
        tristate "Diolan DLN-2 USB SPI adapter"
        depends on MFD_DLN2
        help
-         If you say yes to this option, support will be included for Diolan
-         DLN2, a USB to SPI interface.
+	 If you say yes to this option, support will be included for Diolan
+	 DLN2, a USB to SPI interface.
 
-         This driver can also be built as a module.  If so, the module
-         will be called spi-dln2.
+	 This driver can also be built as a module.  If so, the module
+	 will be called spi-dln2.
 
 config SPI_EFM32
 	tristate "EFM32 SPI controller"
@@ -263,6 +315,13 @@
 	  has only been tested with m25p80 type chips. The hardware has no
 	  support for other types of SPI peripherals.
 
+config SPI_FSI
+	tristate "FSI SPI driver"
+	depends on FSI
+	help
+	  This enables support for the driver for FSI bus attached SPI
+	  controllers.
+
 config SPI_FSL_LPSPI
 	tristate "Freescale i.MX LPSPI controller"
 	depends on ARCH_MXC || COMPILE_TEST
@@ -280,6 +339,14 @@
 	  This controller does not support generic SPI messages. It only
 	  supports the high-level SPI memory interface.
 
+config SPI_HISI_SFC_V3XX
+	tristate "HiSilicon SPI NOR Flash Controller for Hi16XX chipsets"
+	depends on (ARM64 && ACPI) || COMPILE_TEST
+	depends on HAS_IOMEM
+	help
+	  This enables support for HiSilicon v3xx SPI NOR flash controller
+	  found in hi16xx chipsets.
+
 config SPI_NXP_FLEXSPI
 	tristate "NXP Flex SPI controller"
 	depends on ARCH_LAYERSCAPE || HAS_IOMEM
@@ -405,6 +472,7 @@
 
 config SPI_MESON_SPICC
 	tristate "Amlogic Meson SPICC controller"
+	depends on COMMON_CLK
 	depends on ARCH_MESON || COMPILE_TEST
 	help
 	  This enables master mode support for the SPICC (SPI communication
@@ -433,6 +501,16 @@
 	help
 	  This selects a driver for the MediaTek MT7621 SPI Controller.
 
+config SPI_MTK_NOR
+	tristate "MediaTek SPI NOR controller"
+	depends on ARCH_MEDIATEK || COMPILE_TEST
+	help
+	  This enables support for SPI NOR controller found on MediaTek
+	  ARM SoCs. This is a controller specifically for SPI NOR flash.
+	  It can perform generic SPI transfers up to 6 bytes via generic
+	  SPI interface as well as several SPI NOR specific instructions
+	  via SPI MEM interface.
+
 config SPI_NPCM_FIU
 	tristate "Nuvoton NPCM FLASH Interface Unit"
 	depends on ARCH_NPCM || COMPILE_TEST
@@ -452,11 +530,11 @@
 
 config SPI_LANTIQ_SSC
 	tristate "Lantiq SSC SPI controller"
-	depends on LANTIQ || COMPILE_TEST
+	depends on LANTIQ || X86 || COMPILE_TEST
 	help
 	  This driver supports the Lantiq SSC SPI controller in master
 	  mode. This controller is found on Intel (former Lantiq) SoCs like
-	  the Danube, Falcon, xRX200, xRX300.
+	  the Danube, Falcon, xRX200, xRX300, Lightning Mountain.
 
 config SPI_OC_TINY
 	tristate "OpenCores tiny SPI"
@@ -541,7 +619,7 @@
 
 config SPI_PXA2XX
 	tristate "PXA2xx SSP SPI master"
-	depends on (ARCH_PXA || ARCH_MMP || PCI || ACPI)
+	depends on ARCH_PXA || ARCH_MMP || PCI || ACPI || COMPILE_TEST
 	select PXA_SSP if ARCH_PXA || ARCH_MMP
 	help
 	  This enables using a PXA2xx or Sodaville SSP port as a SPI master
@@ -568,6 +646,12 @@
 	help
 	  SPI controller driver for the Mikrotik RB4xx series boards.
 
+config SPI_RPCIF
+	tristate "Renesas RPC-IF SPI driver"
+	depends on RENESAS_RPCIF
+	help
+	  SPI driver for Renesas R-Car Gen3 RPC-IF.
+
 config SPI_RSPI
 	tristate "Renesas RSPI/QSPI controller"
 	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
@@ -582,7 +666,7 @@
 
 config SPI_QUP
 	tristate "Qualcomm SPI controller with QUP interface"
-	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
+	depends on ARCH_QCOM || COMPILE_TEST
 	help
 	  Qualcomm Universal Peripheral (QUP) core is an AHB slave that
 	  provides a common data path (an output FIFO and an input FIFO)
@@ -625,7 +709,7 @@
 
 config SPI_S3C64XX
 	tristate "Samsung S3C64XX series type SPI"
-	depends on (PLAT_SAMSUNG || ARCH_EXYNOS || COMPILE_TEST)
+	depends on (PLAT_SAMSUNG || ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST)
 	help
 	  SPI driver for Samsung S3C64XX and newer SoCs.
 
@@ -747,10 +831,10 @@
 	  It also supports the new dual-bit and quad-bit SPI protocol.
 
 config SPI_MXIC
-        tristate "Macronix MX25F0A SPI controller"
-        depends on SPI_MASTER
-        help
-          This selects the Macronix MX25F0A SPI controller driver.
+	tristate "Macronix MX25F0A SPI controller"
+	depends on SPI_MASTER
+	help
+	  This selects the Macronix MX25F0A SPI controller driver.
 
 config SPI_MXS
 	tristate "Freescale MXS SPI controller"
@@ -810,6 +894,7 @@
 config SPI_UNIPHIER
 	tristate "Socionext UniPhier SPI Controller"
 	depends on (ARCH_UNIPHIER || COMPILE_TEST) && OF
+	depends on HAS_IOMEM
 	help
 	  This enables a driver for the Socionext UniPhier SoC SCSSI SPI controller.
 
@@ -876,10 +961,27 @@
 	help
 	  Enables Xilinx GQSPI controller driver for Zynq UltraScale+ MPSoC.
 
+config SPI_AMD
+	tristate "AMD SPI controller"
+	depends on SPI_MASTER || COMPILE_TEST
+	help
+	  Enables SPI controller driver for AMD SoC.
+
 #
 # Add new SPI master controllers in alphabetical order above this line
 #
 
+comment "SPI Multiplexer support"
+
+config SPI_MUX
+	tristate "SPI multiplexer support"
+	select MULTIPLEXER
+	help
+	  This adds support for SPI multiplexers. Each SPI mux will be
+	  accessible as a SPI controller, the devices behind the mux will appear
+	  to be chip selects on this controller. It is still necessary to
+	  select one or more specific mux-controller drivers.
+
 #
 # There are lots of SPI device types, with sensors and memory
 # being probably the most widely used ones.
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index bb49c9e..6fea582 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -9,11 +9,13 @@
 # config declarations into driver model code
 obj-$(CONFIG_SPI_MASTER)		+= spi.o
 obj-$(CONFIG_SPI_MEM)			+= spi-mem.o
+obj-$(CONFIG_SPI_MUX)			+= spi-mux.o
 obj-$(CONFIG_SPI_SPIDEV)		+= spidev.o
 obj-$(CONFIG_SPI_LOOPBACK_TEST)		+= spi-loopback-test.o
 
 # SPI master controller drivers (bus)
 obj-$(CONFIG_SPI_ALTERA)		+= spi-altera.o
+obj-$(CONFIG_SPI_AR934X)		+= spi-ar934x.o
 obj-$(CONFIG_SPI_ARMADA_3700)		+= spi-armada-3700.o
 obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
 obj-$(CONFIG_SPI_ATMEL_QUADSPI)		+= atmel-quadspi.o
@@ -29,17 +31,21 @@
 obj-$(CONFIG_SPI_BITBANG)		+= spi-bitbang.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi-butterfly.o
 obj-$(CONFIG_SPI_CADENCE)		+= spi-cadence.o
+obj-$(CONFIG_SPI_CADENCE_QUADSPI)	+= spi-cadence-quadspi.o
 obj-$(CONFIG_SPI_CLPS711X)		+= spi-clps711x.o
 obj-$(CONFIG_SPI_COLDFIRE_QSPI)		+= spi-coldfire-qspi.o
 obj-$(CONFIG_SPI_DAVINCI)		+= spi-davinci.o
 obj-$(CONFIG_SPI_DLN2)			+= spi-dln2.o
 obj-$(CONFIG_SPI_DESIGNWARE)		+= spi-dw.o
+spi-dw-y				:= spi-dw-core.o
+spi-dw-$(CONFIG_SPI_DW_DMA)		+= spi-dw-dma.o
+obj-$(CONFIG_SPI_DW_BT1)		+= spi-dw-bt1.o
 obj-$(CONFIG_SPI_DW_MMIO)		+= spi-dw-mmio.o
-obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-midpci.o
-spi-dw-midpci-objs			:= spi-dw-pci.o spi-dw-mid.o
+obj-$(CONFIG_SPI_DW_PCI)		+= spi-dw-pci.o
 obj-$(CONFIG_SPI_EFM32)			+= spi-efm32.o
 obj-$(CONFIG_SPI_EP93XX)		+= spi-ep93xx.o
 obj-$(CONFIG_SPI_FALCON)		+= spi-falcon.o
+obj-$(CONFIG_SPI_FSI)			+= spi-fsi.o
 obj-$(CONFIG_SPI_FSL_CPM)		+= spi-fsl-cpm.o
 obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
@@ -48,6 +54,7 @@
 obj-$(CONFIG_SPI_FSL_QUADSPI)		+= spi-fsl-qspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
+obj-$(CONFIG_SPI_HISI_SFC_V3XX)		+= spi-hisi-sfc-v3xx.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
 obj-$(CONFIG_SPI_IMX)			+= spi-imx.o
 obj-$(CONFIG_SPI_LANTIQ_SSC)		+= spi-lantiq-ssc.o
@@ -61,6 +68,7 @@
 obj-$(CONFIG_SPI_MPC52xx)		+= spi-mpc52xx.o
 obj-$(CONFIG_SPI_MT65XX)                += spi-mt65xx.o
 obj-$(CONFIG_SPI_MT7621)		+= spi-mt7621.o
+obj-$(CONFIG_SPI_MTK_NOR)		+= spi-mtk-nor.o
 obj-$(CONFIG_SPI_MXIC)			+= spi-mxic.o
 obj-$(CONFIG_SPI_MXS)			+= spi-mxs.o
 obj-$(CONFIG_SPI_NPCM_FIU)		+= spi-npcm-fiu.o
@@ -86,10 +94,10 @@
 obj-$(CONFIG_SPI_QUP)			+= spi-qup.o
 obj-$(CONFIG_SPI_ROCKCHIP)		+= spi-rockchip.o
 obj-$(CONFIG_SPI_RB4XX)			+= spi-rb4xx.o
+obj-$(CONFIG_SPI_RPCIF)			+= spi-rpc-if.o
 obj-$(CONFIG_SPI_RSPI)			+= spi-rspi.o
 obj-$(CONFIG_SPI_S3C24XX)		+= spi-s3c24xx-hw.o
 spi-s3c24xx-hw-y			:= spi-s3c24xx.o
-spi-s3c24xx-hw-$(CONFIG_SPI_S3C24XX_FIQ) += spi-s3c24xx-fiq.o
 obj-$(CONFIG_SPI_S3C64XX)		+= spi-s3c64xx.o
 obj-$(CONFIG_SPI_SC18IS602)		+= spi-sc18is602.o
 obj-$(CONFIG_SPI_SH)			+= spi-sh.o
@@ -122,6 +130,7 @@
 obj-$(CONFIG_SPI_XTENSA_XTFPGA)		+= spi-xtensa-xtfpga.o
 obj-$(CONFIG_SPI_ZYNQ_QSPI)		+= spi-zynq-qspi.o
 obj-$(CONFIG_SPI_ZYNQMP_GQSPI)		+= spi-zynqmp-gqspi.o
+obj-$(CONFIG_SPI_AMD)			+= spi-amd.o
 
 # SPI slave protocol handlers
 obj-$(CONFIG_SPI_SLAVE_TIME)		+= spi-slave-time.o
diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c
index 5fd929e..1e63fd4 100644
--- a/drivers/spi/atmel-quadspi.c
+++ b/drivers/spi/atmel-quadspi.c
@@ -173,6 +173,81 @@
 	{ 4, 4, 4, QSPI_IFR_WIDTH_QUAD_CMD },
 };
 
+#ifdef VERBOSE_DEBUG
+static const char *atmel_qspi_reg_name(u32 offset, char *tmp, size_t sz)
+{
+	switch (offset) {
+	case QSPI_CR:
+		return "CR";
+	case QSPI_MR:
+		return "MR";
+	case QSPI_RD:
+		return "MR";
+	case QSPI_TD:
+		return "TD";
+	case QSPI_SR:
+		return "SR";
+	case QSPI_IER:
+		return "IER";
+	case QSPI_IDR:
+		return "IDR";
+	case QSPI_IMR:
+		return "IMR";
+	case QSPI_SCR:
+		return "SCR";
+	case QSPI_IAR:
+		return "IAR";
+	case QSPI_ICR:
+		return "ICR/WICR";
+	case QSPI_IFR:
+		return "IFR";
+	case QSPI_RICR:
+		return "RICR";
+	case QSPI_SMR:
+		return "SMR";
+	case QSPI_SKR:
+		return "SKR";
+	case QSPI_WPMR:
+		return "WPMR";
+	case QSPI_WPSR:
+		return "WPSR";
+	case QSPI_VERSION:
+		return "VERSION";
+	default:
+		snprintf(tmp, sz, "0x%02x", offset);
+		break;
+	}
+
+	return tmp;
+}
+#endif /* VERBOSE_DEBUG */
+
+static u32 atmel_qspi_read(struct atmel_qspi *aq, u32 offset)
+{
+	u32 value = readl_relaxed(aq->regs + offset);
+
+#ifdef VERBOSE_DEBUG
+	char tmp[8];
+
+	dev_vdbg(&aq->pdev->dev, "read 0x%08x from %s\n", value,
+		 atmel_qspi_reg_name(offset, tmp, sizeof(tmp)));
+#endif /* VERBOSE_DEBUG */
+
+	return value;
+}
+
+static void atmel_qspi_write(u32 value, struct atmel_qspi *aq, u32 offset)
+{
+#ifdef VERBOSE_DEBUG
+	char tmp[8];
+
+	dev_vdbg(&aq->pdev->dev, "write 0x%08x into %s\n", value,
+		 atmel_qspi_reg_name(offset, tmp, sizeof(tmp)));
+#endif /* VERBOSE_DEBUG */
+
+	writel_relaxed(value, aq->regs + offset);
+}
+
 static inline bool atmel_qspi_is_compatible(const struct spi_mem_op *op,
 					    const struct atmel_qspi_mode *mode)
 {
@@ -210,6 +285,12 @@
 		op->dummy.nbytes == 0)
 		return false;
 
+	/* DTR ops not supported. */
+	if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
+		return false;
+	if (op->cmd.nbytes != 1)
+		return false;
+
 	return true;
 }
 
@@ -297,32 +378,32 @@
 	 * Serial Memory Mode (SMM).
 	 */
 	if (aq->mr != QSPI_MR_SMM) {
-		writel_relaxed(QSPI_MR_SMM, aq->regs + QSPI_MR);
+		atmel_qspi_write(QSPI_MR_SMM, aq, QSPI_MR);
 		aq->mr = QSPI_MR_SMM;
 	}
 
 	/* Clear pending interrupts */
-	(void)readl_relaxed(aq->regs + QSPI_SR);
+	(void)atmel_qspi_read(aq, QSPI_SR);
 
 	if (aq->caps->has_ricr) {
 		if (!op->addr.nbytes && op->data.dir == SPI_MEM_DATA_IN)
 			ifr |= QSPI_IFR_APBTFRTYP_READ;
 
 		/* Set QSPI Instruction Frame registers */
-		writel_relaxed(iar, aq->regs + QSPI_IAR);
+		atmel_qspi_write(iar, aq, QSPI_IAR);
 		if (op->data.dir == SPI_MEM_DATA_IN)
-			writel_relaxed(icr, aq->regs + QSPI_RICR);
+			atmel_qspi_write(icr, aq, QSPI_RICR);
 		else
-			writel_relaxed(icr, aq->regs + QSPI_WICR);
-		writel_relaxed(ifr, aq->regs + QSPI_IFR);
+			atmel_qspi_write(icr, aq, QSPI_WICR);
+		atmel_qspi_write(ifr, aq, QSPI_IFR);
 	} else {
 		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
 			ifr |= QSPI_IFR_SAMA5D2_WRITE_TRSFR;
 
 		/* Set QSPI Instruction Frame registers */
-		writel_relaxed(iar, aq->regs + QSPI_IAR);
-		writel_relaxed(icr, aq->regs + QSPI_ICR);
-		writel_relaxed(ifr, aq->regs + QSPI_IFR);
+		atmel_qspi_write(iar, aq, QSPI_IAR);
+		atmel_qspi_write(icr, aq, QSPI_ICR);
+		atmel_qspi_write(ifr, aq, QSPI_IFR);
 	}
 
 	return 0;
@@ -349,33 +430,33 @@
 	/* Skip to the final steps if there is no data */
 	if (op->data.nbytes) {
 		/* Dummy read of QSPI_IFR to synchronize APB and AHB accesses */
-		(void)readl_relaxed(aq->regs + QSPI_IFR);
+		(void)atmel_qspi_read(aq, QSPI_IFR);
 
 		/* Send/Receive data */
 		if (op->data.dir == SPI_MEM_DATA_IN)
-			_memcpy_fromio(op->data.buf.in, aq->mem + offset,
-				       op->data.nbytes);
+			memcpy_fromio(op->data.buf.in, aq->mem + offset,
+				      op->data.nbytes);
 		else
-			_memcpy_toio(aq->mem + offset, op->data.buf.out,
-				     op->data.nbytes);
+			memcpy_toio(aq->mem + offset, op->data.buf.out,
+				    op->data.nbytes);
 
 		/* Release the chip-select */
-		writel_relaxed(QSPI_CR_LASTXFER, aq->regs + QSPI_CR);
+		atmel_qspi_write(QSPI_CR_LASTXFER, aq, QSPI_CR);
 	}
 
 	/* Poll INSTRuction End status */
-	sr = readl_relaxed(aq->regs + QSPI_SR);
+	sr = atmel_qspi_read(aq, QSPI_SR);
 	if ((sr & QSPI_SR_CMD_COMPLETED) == QSPI_SR_CMD_COMPLETED)
 		return err;
 
 	/* Wait for INSTRuction End interrupt */
 	reinit_completion(&aq->cmd_completion);
 	aq->pending = sr & QSPI_SR_CMD_COMPLETED;
-	writel_relaxed(QSPI_SR_CMD_COMPLETED, aq->regs + QSPI_IER);
+	atmel_qspi_write(QSPI_SR_CMD_COMPLETED, aq, QSPI_IER);
 	if (!wait_for_completion_timeout(&aq->cmd_completion,
 					 msecs_to_jiffies(1000)))
 		err = -ETIMEDOUT;
-	writel_relaxed(QSPI_SR_CMD_COMPLETED, aq->regs + QSPI_IDR);
+	atmel_qspi_write(QSPI_SR_CMD_COMPLETED, aq, QSPI_IDR);
 
 	return err;
 }
@@ -414,7 +495,7 @@
 		scbr--;
 
 	aq->scr = QSPI_SCR_SCBR(scbr);
-	writel_relaxed(aq->scr, aq->regs + QSPI_SCR);
+	atmel_qspi_write(aq->scr, aq, QSPI_SCR);
 
 	return 0;
 }
@@ -422,14 +503,14 @@
 static void atmel_qspi_init(struct atmel_qspi *aq)
 {
 	/* Reset the QSPI controller */
-	writel_relaxed(QSPI_CR_SWRST, aq->regs + QSPI_CR);
+	atmel_qspi_write(QSPI_CR_SWRST, aq, QSPI_CR);
 
 	/* Set the QSPI controller by default in Serial Memory Mode */
-	writel_relaxed(QSPI_MR_SMM, aq->regs + QSPI_MR);
+	atmel_qspi_write(QSPI_MR_SMM, aq, QSPI_MR);
 	aq->mr = QSPI_MR_SMM;
 
 	/* Enable the QSPI controller */
-	writel_relaxed(QSPI_CR_QSPIEN, aq->regs + QSPI_CR);
+	atmel_qspi_write(QSPI_CR_QSPIEN, aq, QSPI_CR);
 }
 
 static irqreturn_t atmel_qspi_interrupt(int irq, void *dev_id)
@@ -437,8 +518,8 @@
 	struct atmel_qspi *aq = dev_id;
 	u32 status, mask, pending;
 
-	status = readl_relaxed(aq->regs + QSPI_SR);
-	mask = readl_relaxed(aq->regs + QSPI_IMR);
+	status = atmel_qspi_read(aq, QSPI_SR);
+	mask = atmel_qspi_read(aq, QSPI_IMR);
 	pending = status & mask;
 
 	if (!pending)
@@ -568,7 +649,7 @@
 	struct atmel_qspi *aq = spi_controller_get_devdata(ctrl);
 
 	spi_unregister_controller(ctrl);
-	writel_relaxed(QSPI_CR_QSPIDIS, aq->regs + QSPI_CR);
+	atmel_qspi_write(QSPI_CR_QSPIDIS, aq, QSPI_CR);
 	clk_disable_unprepare(aq->qspick);
 	clk_disable_unprepare(aq->pclk);
 	return 0;
@@ -595,7 +676,7 @@
 
 	atmel_qspi_init(aq);
 
-	writel_relaxed(aq->scr, aq->regs + QSPI_SCR);
+	atmel_qspi_write(aq->scr, aq, QSPI_SCR);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-altera.c b/drivers/spi/spi-altera.c
index 41d71ba..62ea0c9 100644
--- a/drivers/spi/spi-altera.c
+++ b/drivers/spi/spi-altera.c
@@ -14,6 +14,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/spi/altera.h>
 #include <linux/spi/spi.h>
 #include <linux/io.h>
 #include <linux/of.h>
@@ -40,19 +41,61 @@
 #define ALTERA_SPI_CONTROL_IE_MSK	0x100
 #define ALTERA_SPI_CONTROL_SSO_MSK	0x400
 
+#define ALTERA_SPI_MAX_CS		32
+
+enum altera_spi_type {
+	ALTERA_SPI_TYPE_UNKNOWN,
+	ALTERA_SPI_TYPE_SUBDEV,
+};
+
 struct altera_spi {
-	void __iomem *base;
 	int irq;
 	int len;
 	int count;
 	int bytes_per_word;
-	unsigned long imr;
+	u32 imr;
 
 	/* data buffers */
 	const unsigned char *tx;
 	unsigned char *rx;
+
+	struct regmap *regmap;
+	u32 regoff;
+	struct device *dev;
 };
 
+static const struct regmap_config spi_altera_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+};
+
+static int altr_spi_writel(struct altera_spi *hw, unsigned int reg,
+			   unsigned int val)
+{
+	int ret;
+
+	ret = regmap_write(hw->regmap, hw->regoff + reg, val);
+	if (ret)
+		dev_err(hw->dev, "fail to write reg 0x%x val 0x%x: %d\n",
+			reg, val, ret);
+
+	return ret;
+}
+
+static int altr_spi_readl(struct altera_spi *hw, unsigned int reg,
+			  unsigned int *val)
+{
+	int ret;
+
+	ret = regmap_read(hw->regmap, hw->regoff + reg, val);
+	if (ret)
+		dev_err(hw->dev, "fail to read reg 0x%x: %d\n", reg, ret);
+
+	return ret;
+}
+
 static inline struct altera_spi *altera_spi_to_hw(struct spi_device *sdev)
 {
 	return spi_master_get_devdata(sdev->master);
@@ -64,12 +107,13 @@
 
 	if (is_high) {
 		hw->imr &= ~ALTERA_SPI_CONTROL_SSO_MSK;
-		writel(hw->imr, hw->base + ALTERA_SPI_CONTROL);
-		writel(0, hw->base + ALTERA_SPI_SLAVE_SEL);
+		altr_spi_writel(hw, ALTERA_SPI_CONTROL, hw->imr);
+		altr_spi_writel(hw, ALTERA_SPI_SLAVE_SEL, 0);
 	} else {
-		writel(BIT(spi->chip_select), hw->base + ALTERA_SPI_SLAVE_SEL);
+		altr_spi_writel(hw, ALTERA_SPI_SLAVE_SEL,
+				BIT(spi->chip_select));
 		hw->imr |= ALTERA_SPI_CONTROL_SSO_MSK;
-		writel(hw->imr, hw->base + ALTERA_SPI_CONTROL);
+		altr_spi_writel(hw, ALTERA_SPI_CONTROL, hw->imr);
 	}
 }
 
@@ -86,17 +130,24 @@
 			txd = (hw->tx[hw->count * 2]
 				| (hw->tx[hw->count * 2 + 1] << 8));
 			break;
+		case 4:
+			txd = (hw->tx[hw->count * 4]
+				| (hw->tx[hw->count * 4 + 1] << 8)
+				| (hw->tx[hw->count * 4 + 2] << 16)
+				| (hw->tx[hw->count * 4 + 3] << 24));
+			break;
+
 		}
 	}
 
-	writel(txd, hw->base + ALTERA_SPI_TXDATA);
+	altr_spi_writel(hw, ALTERA_SPI_TXDATA, txd);
 }
 
 static void altera_spi_rx_word(struct altera_spi *hw)
 {
 	unsigned int rxd;
 
-	rxd = readl(hw->base + ALTERA_SPI_RXDATA);
+	altr_spi_readl(hw, ALTERA_SPI_RXDATA, &rxd);
 	if (hw->rx) {
 		switch (hw->bytes_per_word) {
 		case 1:
@@ -106,6 +157,13 @@
 			hw->rx[hw->count * 2] = rxd;
 			hw->rx[hw->count * 2 + 1] = rxd >> 8;
 			break;
+		case 4:
+			hw->rx[hw->count * 4] = rxd;
+			hw->rx[hw->count * 4 + 1] = rxd >> 8;
+			hw->rx[hw->count * 4 + 2] = rxd >> 16;
+			hw->rx[hw->count * 4 + 3] = rxd >> 24;
+			break;
+
 		}
 	}
 
@@ -116,6 +174,7 @@
 	struct spi_device *spi, struct spi_transfer *t)
 {
 	struct altera_spi *hw = spi_master_get_devdata(master);
+	u32 val;
 
 	hw->tx = t->tx_buf;
 	hw->rx = t->rx_buf;
@@ -126,24 +185,30 @@
 	if (hw->irq >= 0) {
 		/* enable receive interrupt */
 		hw->imr |= ALTERA_SPI_CONTROL_IRRDY_MSK;
-		writel(hw->imr, hw->base + ALTERA_SPI_CONTROL);
+		altr_spi_writel(hw, ALTERA_SPI_CONTROL, hw->imr);
 
 		/* send the first byte */
 		altera_spi_tx_word(hw);
-	} else {
-		while (hw->count < hw->len) {
-			altera_spi_tx_word(hw);
 
-			while (!(readl(hw->base + ALTERA_SPI_STATUS) &
-				 ALTERA_SPI_STATUS_RRDY_MSK))
-				cpu_relax();
-
-			altera_spi_rx_word(hw);
-		}
-		spi_finalize_current_transfer(master);
+		return 1;
 	}
 
-	return t->len;
+	while (hw->count < hw->len) {
+		altera_spi_tx_word(hw);
+
+		for (;;) {
+			altr_spi_readl(hw, ALTERA_SPI_STATUS, &val);
+			if (val & ALTERA_SPI_STATUS_RRDY_MSK)
+				break;
+
+			cpu_relax();
+		}
+
+		altera_spi_rx_word(hw);
+	}
+	spi_finalize_current_transfer(master);
+
+	return 0;
 }
 
 static irqreturn_t altera_spi_irq(int irq, void *dev)
@@ -158,7 +223,7 @@
 	} else {
 		/* disable receive interrupt */
 		hw->imr &= ~ALTERA_SPI_CONTROL_IRRDY_MSK;
-		writel(hw->imr, hw->base + ALTERA_SPI_CONTROL);
+		altr_spi_writel(hw, ALTERA_SPI_CONTROL, hw->imr);
 
 		spi_finalize_current_transfer(master);
 	}
@@ -168,9 +233,14 @@
 
 static int altera_spi_probe(struct platform_device *pdev)
 {
+	const struct platform_device_id *platid = platform_get_device_id(pdev);
+	struct altera_spi_platform_data *pdata = dev_get_platdata(&pdev->dev);
+	enum altera_spi_type type = ALTERA_SPI_TYPE_UNKNOWN;
 	struct altera_spi *hw;
 	struct spi_master *master;
 	int err = -ENODEV;
+	u32 val;
+	u16 i;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct altera_spi));
 	if (!master)
@@ -178,27 +248,73 @@
 
 	/* setup the master state. */
 	master->bus_num = pdev->id;
-	master->num_chipselect = 16;
-	master->mode_bits = SPI_CS_HIGH;
-	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 16);
+
+	if (pdata) {
+		if (pdata->num_chipselect > ALTERA_SPI_MAX_CS) {
+			dev_err(&pdev->dev,
+				"Invalid number of chipselect: %hu\n",
+				pdata->num_chipselect);
+			err = -EINVAL;
+			goto exit;
+		}
+
+		master->num_chipselect = pdata->num_chipselect;
+		master->mode_bits = pdata->mode_bits;
+		master->bits_per_word_mask = pdata->bits_per_word_mask;
+	} else {
+		master->num_chipselect = 16;
+		master->mode_bits = SPI_CS_HIGH;
+		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 16);
+	}
+
 	master->dev.of_node = pdev->dev.of_node;
 	master->transfer_one = altera_spi_txrx;
 	master->set_cs = altera_spi_set_cs;
 
 	hw = spi_master_get_devdata(master);
+	hw->dev = &pdev->dev;
+
+	if (platid)
+		type = platid->driver_data;
 
 	/* find and map our resources */
-	hw->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(hw->base)) {
-		err = PTR_ERR(hw->base);
-		goto exit;
+	if (type == ALTERA_SPI_TYPE_SUBDEV) {
+		struct resource *regoff;
+
+		hw->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+		if (!hw->regmap) {
+			dev_err(&pdev->dev, "get regmap failed\n");
+			goto exit;
+		}
+
+		regoff = platform_get_resource(pdev, IORESOURCE_REG, 0);
+		if (regoff)
+			hw->regoff = regoff->start;
+	} else {
+		void __iomem *res;
+
+		res = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(res)) {
+			err = PTR_ERR(res);
+			goto exit;
+		}
+
+		hw->regmap = devm_regmap_init_mmio(&pdev->dev, res,
+						   &spi_altera_config);
+		if (IS_ERR(hw->regmap)) {
+			dev_err(&pdev->dev, "regmap mmio init failed\n");
+			err = PTR_ERR(hw->regmap);
+			goto exit;
+		}
 	}
+
 	/* program defaults into the registers */
 	hw->imr = 0;		/* disable spi interrupts */
-	writel(hw->imr, hw->base + ALTERA_SPI_CONTROL);
-	writel(0, hw->base + ALTERA_SPI_STATUS);	/* clear status reg */
-	if (readl(hw->base + ALTERA_SPI_STATUS) & ALTERA_SPI_STATUS_RRDY_MSK)
-		readl(hw->base + ALTERA_SPI_RXDATA);	/* flush rxdata */
+	altr_spi_writel(hw, ALTERA_SPI_CONTROL, hw->imr);
+	altr_spi_writel(hw, ALTERA_SPI_STATUS, 0);	/* clear status reg */
+	altr_spi_readl(hw, ALTERA_SPI_STATUS, &val);
+	if (val & ALTERA_SPI_STATUS_RRDY_MSK)
+		altr_spi_readl(hw, ALTERA_SPI_RXDATA, &val); /* flush rxdata */
 	/* irq is optional */
 	hw->irq = platform_get_irq(pdev, 0);
 	if (hw->irq >= 0) {
@@ -211,7 +327,17 @@
 	err = devm_spi_register_master(&pdev->dev, master);
 	if (err)
 		goto exit;
-	dev_info(&pdev->dev, "base %p, irq %d\n", hw->base, hw->irq);
+
+	if (pdata) {
+		for (i = 0; i < pdata->num_devices; i++) {
+			if (!spi_new_device(master, pdata->devices + i))
+				dev_warn(&pdev->dev,
+					 "unable to create SPI device: %s\n",
+					 pdata->devices[i].modalias);
+		}
+	}
+
+	dev_info(&pdev->dev, "regoff %u, irq %d\n", hw->regoff, hw->irq);
 
 	return 0;
 exit:
@@ -228,6 +354,13 @@
 MODULE_DEVICE_TABLE(of, altera_spi_match);
 #endif /* CONFIG_OF */
 
+static const struct platform_device_id altera_spi_ids[] = {
+	{ DRV_NAME,		ALTERA_SPI_TYPE_UNKNOWN },
+	{ "subdev_spi_altera",	ALTERA_SPI_TYPE_SUBDEV },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, altera_spi_ids);
+
 static struct platform_driver altera_spi_driver = {
 	.probe = altera_spi_probe,
 	.driver = {
@@ -235,6 +368,7 @@
 		.pm = NULL,
 		.of_match_table = of_match_ptr(altera_spi_match),
 	},
+	.id_table	= altera_spi_ids,
 };
 module_platform_driver(altera_spi_driver);
 
diff --git a/drivers/spi/spi-amd.c b/drivers/spi/spi-amd.c
new file mode 100644
index 0000000..7f62954
--- /dev/null
+++ b/drivers/spi/spi-amd.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+//
+// AMD SPI controller driver
+//
+// Copyright (c) 2020, Advanced Micro Devices, Inc.
+//
+// Author: Sanjay R Mehta <sanju.mehta@amd.com>
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/spi/spi.h>
+
+#define AMD_SPI_CTRL0_REG	0x00
+#define AMD_SPI_EXEC_CMD	BIT(16)
+#define AMD_SPI_FIFO_CLEAR	BIT(20)
+#define AMD_SPI_BUSY		BIT(31)
+
+#define AMD_SPI_OPCODE_MASK	0xFF
+
+#define AMD_SPI_ALT_CS_REG	0x1D
+#define AMD_SPI_ALT_CS_MASK	0x3
+
+#define AMD_SPI_FIFO_BASE	0x80
+#define AMD_SPI_TX_COUNT_REG	0x48
+#define AMD_SPI_RX_COUNT_REG	0x4B
+#define AMD_SPI_STATUS_REG	0x4C
+
+#define AMD_SPI_MEM_SIZE	200
+
+/* M_CMD OP codes for SPI */
+#define AMD_SPI_XFER_TX		1
+#define AMD_SPI_XFER_RX		2
+
+struct amd_spi {
+	void __iomem *io_remap_addr;
+	unsigned long io_base_addr;
+	u32 rom_addr;
+	u8 chip_select;
+};
+
+static inline u8 amd_spi_readreg8(struct spi_master *master, int idx)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	return ioread8((u8 __iomem *)amd_spi->io_remap_addr + idx);
+}
+
+static inline void amd_spi_writereg8(struct spi_master *master, int idx,
+				     u8 val)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	iowrite8(val, ((u8 __iomem *)amd_spi->io_remap_addr + idx));
+}
+
+static inline void amd_spi_setclear_reg8(struct spi_master *master, int idx,
+					 u8 set, u8 clear)
+{
+	u8 tmp = amd_spi_readreg8(master, idx);
+
+	tmp = (tmp & ~clear) | set;
+	amd_spi_writereg8(master, idx, tmp);
+}
+
+static inline u32 amd_spi_readreg32(struct spi_master *master, int idx)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	return ioread32((u8 __iomem *)amd_spi->io_remap_addr + idx);
+}
+
+static inline void amd_spi_writereg32(struct spi_master *master, int idx,
+				      u32 val)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	iowrite32(val, ((u8 __iomem *)amd_spi->io_remap_addr + idx));
+}
+
+static inline void amd_spi_setclear_reg32(struct spi_master *master, int idx,
+					  u32 set, u32 clear)
+{
+	u32 tmp = amd_spi_readreg32(master, idx);
+
+	tmp = (tmp & ~clear) | set;
+	amd_spi_writereg32(master, idx, tmp);
+}
+
+static void amd_spi_select_chip(struct spi_master *master)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+	u8 chip_select = amd_spi->chip_select;
+
+	amd_spi_setclear_reg8(master, AMD_SPI_ALT_CS_REG, chip_select,
+			      AMD_SPI_ALT_CS_MASK);
+}
+
+static void amd_spi_clear_fifo_ptr(struct spi_master *master)
+{
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, AMD_SPI_FIFO_CLEAR,
+			       AMD_SPI_FIFO_CLEAR);
+}
+
+static void amd_spi_set_opcode(struct spi_master *master, u8 cmd_opcode)
+{
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, cmd_opcode,
+			       AMD_SPI_OPCODE_MASK);
+}
+
+static inline void amd_spi_set_rx_count(struct spi_master *master,
+					u8 rx_count)
+{
+	amd_spi_setclear_reg8(master, AMD_SPI_RX_COUNT_REG, rx_count, 0xff);
+}
+
+static inline void amd_spi_set_tx_count(struct spi_master *master,
+					u8 tx_count)
+{
+	amd_spi_setclear_reg8(master, AMD_SPI_TX_COUNT_REG, tx_count, 0xff);
+}
+
+static inline int amd_spi_busy_wait(struct amd_spi *amd_spi)
+{
+	bool spi_busy;
+	int timeout = 100000;
+
+	/* poll for SPI bus to become idle */
+	spi_busy = (ioread32((u8 __iomem *)amd_spi->io_remap_addr +
+		    AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) == AMD_SPI_BUSY;
+	while (spi_busy) {
+		usleep_range(10, 20);
+		if (timeout-- < 0)
+			return -ETIMEDOUT;
+
+		spi_busy = (ioread32((u8 __iomem *)amd_spi->io_remap_addr +
+			    AMD_SPI_CTRL0_REG) & AMD_SPI_BUSY) == AMD_SPI_BUSY;
+	}
+
+	return 0;
+}
+
+static void amd_spi_execute_opcode(struct spi_master *master)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+
+	/* Set ExecuteOpCode bit in the CTRL0 register */
+	amd_spi_setclear_reg32(master, AMD_SPI_CTRL0_REG, AMD_SPI_EXEC_CMD,
+			       AMD_SPI_EXEC_CMD);
+
+	amd_spi_busy_wait(amd_spi);
+}
+
+static int amd_spi_master_setup(struct spi_device *spi)
+{
+	struct spi_master *master = spi->master;
+
+	amd_spi_clear_fifo_ptr(master);
+
+	return 0;
+}
+
+static inline int amd_spi_fifo_xfer(struct amd_spi *amd_spi,
+				    struct spi_master *master,
+				    struct spi_message *message)
+{
+	struct spi_transfer *xfer = NULL;
+	u8 cmd_opcode;
+	u8 *buf = NULL;
+	u32 m_cmd = 0;
+	u32 i = 0;
+	u32 tx_len = 0, rx_len = 0;
+
+	list_for_each_entry(xfer, &message->transfers,
+			    transfer_list) {
+		if (xfer->rx_buf)
+			m_cmd = AMD_SPI_XFER_RX;
+		if (xfer->tx_buf)
+			m_cmd = AMD_SPI_XFER_TX;
+
+		if (m_cmd & AMD_SPI_XFER_TX) {
+			buf = (u8 *)xfer->tx_buf;
+			tx_len = xfer->len - 1;
+			cmd_opcode = *(u8 *)xfer->tx_buf;
+			buf++;
+			amd_spi_set_opcode(master, cmd_opcode);
+
+			/* Write data into the FIFO. */
+			for (i = 0; i < tx_len; i++) {
+				iowrite8(buf[i],
+					 ((u8 __iomem *)amd_spi->io_remap_addr +
+					 AMD_SPI_FIFO_BASE + i));
+			}
+
+			amd_spi_set_tx_count(master, tx_len);
+			amd_spi_clear_fifo_ptr(master);
+			/* Execute command */
+			amd_spi_execute_opcode(master);
+		}
+		if (m_cmd & AMD_SPI_XFER_RX) {
+			/*
+			 * Store no. of bytes to be received from
+			 * FIFO
+			 */
+			rx_len = xfer->len;
+			buf = (u8 *)xfer->rx_buf;
+			amd_spi_set_rx_count(master, rx_len);
+			amd_spi_clear_fifo_ptr(master);
+			/* Execute command */
+			amd_spi_execute_opcode(master);
+			/* Read data from FIFO to receive buffer  */
+			for (i = 0; i < rx_len; i++)
+				buf[i] = amd_spi_readreg8(master,
+							  AMD_SPI_FIFO_BASE +
+							  tx_len + i);
+		}
+	}
+
+	/* Update statistics */
+	message->actual_length = tx_len + rx_len + 1;
+	/* complete the transaction */
+	message->status = 0;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static int amd_spi_master_transfer(struct spi_master *master,
+				   struct spi_message *msg)
+{
+	struct amd_spi *amd_spi = spi_master_get_devdata(master);
+	struct spi_device *spi = msg->spi;
+
+	amd_spi->chip_select = spi->chip_select;
+	amd_spi_select_chip(master);
+
+	/*
+	 * Extract spi_transfers from the spi message and
+	 * program the controller.
+	 */
+	amd_spi_fifo_xfer(amd_spi, master, msg);
+
+	return 0;
+}
+
+static int amd_spi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct spi_master *master;
+	struct amd_spi *amd_spi;
+	struct resource *res;
+	int err = 0;
+
+	/* Allocate storage for spi_master and driver private data */
+	master = spi_alloc_master(dev, sizeof(struct amd_spi));
+	if (!master) {
+		dev_err(dev, "Error allocating SPI master\n");
+		return -ENOMEM;
+	}
+
+	amd_spi = spi_master_get_devdata(master);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	amd_spi->io_remap_addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(amd_spi->io_remap_addr)) {
+		err = PTR_ERR(amd_spi->io_remap_addr);
+		dev_err(dev, "error %d ioremap of SPI registers failed\n", err);
+		goto err_free_master;
+	}
+	dev_dbg(dev, "io_remap_address: %p\n", amd_spi->io_remap_addr);
+
+	/* Initialize the spi_master fields */
+	master->bus_num = 0;
+	master->num_chipselect = 4;
+	master->mode_bits = 0;
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->setup = amd_spi_master_setup;
+	master->transfer_one_message = amd_spi_master_transfer;
+
+	/* Register the controller with SPI framework */
+	err = devm_spi_register_master(dev, master);
+	if (err) {
+		dev_err(dev, "error %d registering SPI controller\n", err);
+		goto err_free_master;
+	}
+
+	return 0;
+
+err_free_master:
+	spi_master_put(master);
+
+	return err;
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id spi_acpi_match[] = {
+	{ "AMDI0061", 0 },
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, spi_acpi_match);
+#endif
+
+static struct platform_driver amd_spi_driver = {
+	.driver = {
+		.name = "amd_spi",
+		.acpi_match_table = ACPI_PTR(spi_acpi_match),
+	},
+	.probe = amd_spi_probe,
+};
+
+module_platform_driver(amd_spi_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Sanjay Mehta <sanju.mehta@amd.com>");
+MODULE_DESCRIPTION("AMD SPI Master Controller Driver");
diff --git a/drivers/spi/spi-ar934x.c b/drivers/spi/spi-ar934x.c
new file mode 100644
index 0000000..def32e0
--- /dev/null
+++ b/drivers/spi/spi-ar934x.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// SPI controller driver for Qualcomm Atheros AR934x/QCA95xx SoCs
+//
+// Copyright (C) 2020 Chuanhong Guo <gch981213@gmail.com>
+//
+// Based on spi-mt7621.c:
+// Copyright (C) 2011 Sergiy <piratfm@gmail.com>
+// Copyright (C) 2011-2013 Gabor Juhos <juhosg@openwrt.org>
+// Copyright (C) 2014-2015 Felix Fietkau <nbd@nbd.name>
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME "spi-ar934x"
+
+#define AR934X_SPI_REG_FS		0x00
+#define AR934X_SPI_ENABLE		BIT(0)
+
+#define AR934X_SPI_REG_IOC		0x08
+#define AR934X_SPI_IOC_INITVAL		0x70000
+
+#define AR934X_SPI_REG_CTRL		0x04
+#define AR934X_SPI_CLK_MASK		GENMASK(5, 0)
+
+#define AR934X_SPI_DATAOUT		0x10
+
+#define AR934X_SPI_REG_SHIFT_CTRL	0x14
+#define AR934X_SPI_SHIFT_EN		BIT(31)
+#define AR934X_SPI_SHIFT_CS(n)		BIT(28 + (n))
+#define AR934X_SPI_SHIFT_TERM		26
+#define AR934X_SPI_SHIFT_VAL(cs, term, count)			\
+	(AR934X_SPI_SHIFT_EN | AR934X_SPI_SHIFT_CS(cs) |	\
+	(term) << AR934X_SPI_SHIFT_TERM | (count))
+
+#define AR934X_SPI_DATAIN 0x18
+
+struct ar934x_spi {
+	struct spi_controller *ctlr;
+	void __iomem *base;
+	struct clk *clk;
+	unsigned int clk_freq;
+};
+
+static inline int ar934x_spi_clk_div(struct ar934x_spi *sp, unsigned int freq)
+{
+	int div = DIV_ROUND_UP(sp->clk_freq, freq * 2) - 1;
+
+	if (div < 0)
+		return 0;
+	else if (div > AR934X_SPI_CLK_MASK)
+		return -EINVAL;
+	else
+		return div;
+}
+
+static int ar934x_spi_setup(struct spi_device *spi)
+{
+	struct ar934x_spi *sp = spi_controller_get_devdata(spi->master);
+
+	if ((spi->max_speed_hz == 0) ||
+	    (spi->max_speed_hz > (sp->clk_freq / 2))) {
+		spi->max_speed_hz = sp->clk_freq / 2;
+	} else if (spi->max_speed_hz < (sp->clk_freq / 128)) {
+		dev_err(&spi->dev, "spi clock is too low\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ar934x_spi_transfer_one_message(struct spi_controller *master,
+					   struct spi_message *m)
+{
+	struct ar934x_spi *sp = spi_controller_get_devdata(master);
+	struct spi_transfer *t = NULL;
+	struct spi_device *spi = m->spi;
+	unsigned long trx_done, trx_cur;
+	int stat = 0;
+	u8 term = 0;
+	int div, i;
+	u32 reg;
+	const u8 *tx_buf;
+	u8 *buf;
+
+	m->actual_length = 0;
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (t->speed_hz)
+			div = ar934x_spi_clk_div(sp, t->speed_hz);
+		else
+			div = ar934x_spi_clk_div(sp, spi->max_speed_hz);
+		if (div < 0) {
+			stat = -EIO;
+			goto msg_done;
+		}
+
+		reg = ioread32(sp->base + AR934X_SPI_REG_CTRL);
+		reg &= ~AR934X_SPI_CLK_MASK;
+		reg |= div;
+		iowrite32(reg, sp->base + AR934X_SPI_REG_CTRL);
+		iowrite32(0, sp->base + AR934X_SPI_DATAOUT);
+
+		for (trx_done = 0; trx_done < t->len; trx_done += 4) {
+			trx_cur = t->len - trx_done;
+			if (trx_cur > 4)
+				trx_cur = 4;
+			else if (list_is_last(&t->transfer_list, &m->transfers))
+				term = 1;
+
+			if (t->tx_buf) {
+				tx_buf = t->tx_buf + trx_done;
+				reg = tx_buf[0];
+				for (i = 1; i < trx_cur; i++)
+					reg = reg << 8 | tx_buf[i];
+				iowrite32(reg, sp->base + AR934X_SPI_DATAOUT);
+			}
+
+			reg = AR934X_SPI_SHIFT_VAL(spi->chip_select, term,
+						   trx_cur * 8);
+			iowrite32(reg, sp->base + AR934X_SPI_REG_SHIFT_CTRL);
+			stat = readl_poll_timeout(
+				sp->base + AR934X_SPI_REG_SHIFT_CTRL, reg,
+				!(reg & AR934X_SPI_SHIFT_EN), 0, 5);
+			if (stat < 0)
+				goto msg_done;
+
+			if (t->rx_buf) {
+				reg = ioread32(sp->base + AR934X_SPI_DATAIN);
+				buf = t->rx_buf + trx_done;
+				for (i = 0; i < trx_cur; i++) {
+					buf[trx_cur - i - 1] = reg & 0xff;
+					reg >>= 8;
+				}
+			}
+		}
+		m->actual_length += t->len;
+	}
+
+msg_done:
+	m->status = stat;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static const struct of_device_id ar934x_spi_match[] = {
+	{ .compatible = "qca,ar934x-spi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, ar934x_spi_match);
+
+static int ar934x_spi_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct ar934x_spi *sp;
+	void __iomem *base;
+	struct clk *clk;
+	int ret;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(clk);
+	}
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
+
+	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp));
+	if (!ctlr) {
+		dev_info(&pdev->dev, "failed to allocate spi controller\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	/* disable flash mapping and expose spi controller registers */
+	iowrite32(AR934X_SPI_ENABLE, base + AR934X_SPI_REG_FS);
+	/* restore pins to default state: CSn=1 DO=CLK=0 */
+	iowrite32(AR934X_SPI_IOC_INITVAL, base + AR934X_SPI_REG_IOC);
+
+	ctlr->mode_bits = SPI_LSB_FIRST;
+	ctlr->setup = ar934x_spi_setup;
+	ctlr->transfer_one_message = ar934x_spi_transfer_one_message;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->num_chipselect = 3;
+
+	dev_set_drvdata(&pdev->dev, ctlr);
+
+	sp = spi_controller_get_devdata(ctlr);
+	sp->base = base;
+	sp->clk = clk;
+	sp->clk_freq = clk_get_rate(clk);
+	sp->ctlr = ctlr;
+
+	ret = spi_register_controller(ctlr);
+	if (!ret)
+		return 0;
+
+err_clk_disable:
+	clk_disable_unprepare(clk);
+	return ret;
+}
+
+static int ar934x_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct ar934x_spi *sp;
+
+	ctlr = dev_get_drvdata(&pdev->dev);
+	sp = spi_controller_get_devdata(ctlr);
+
+	spi_unregister_controller(ctlr);
+	clk_disable_unprepare(sp->clk);
+
+	return 0;
+}
+
+static struct platform_driver ar934x_spi_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = ar934x_spi_match,
+	},
+	.probe = ar934x_spi_probe,
+	.remove = ar934x_spi_remove,
+};
+
+module_platform_driver(ar934x_spi_driver);
+
+MODULE_DESCRIPTION("SPI controller driver for Qualcomm Atheros AR934x/QCA95xx");
+MODULE_AUTHOR("Chuanhong Guo <gch981213@gmail.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
index e450ee1..d8cc4b2 100644
--- a/drivers/spi/spi-armada-3700.c
+++ b/drivers/spi/spi-armada-3700.c
@@ -276,11 +276,11 @@
 	return -ETIMEDOUT;
 }
 
-static int a3700_spi_init(struct a3700_spi *a3700_spi)
+static void a3700_spi_init(struct a3700_spi *a3700_spi)
 {
 	struct spi_master *master = a3700_spi->master;
 	u32 val;
-	int i, ret = 0;
+	int i;
 
 	/* Reset SPI unit */
 	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
@@ -311,8 +311,6 @@
 	/* Mask the interrupts and clear cause bits */
 	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0);
 	spireg_write(a3700_spi, A3700_SPI_INT_STAT_REG, ~0U);
-
-	return ret;
 }
 
 static irqreturn_t a3700_spi_interrupt(int irq, void *dev_id)
@@ -850,7 +848,6 @@
 	platform_set_drvdata(pdev, master);
 
 	spi = spi_master_get_devdata(master);
-	memset(spi, 0, sizeof(struct a3700_spi));
 
 	spi->master = master;
 
@@ -886,9 +883,7 @@
 	master->min_speed_hz = DIV_ROUND_UP(clk_get_rate(spi->clk),
 						A3700_SPI_MAX_PRESCALE);
 
-	ret = a3700_spi_init(spi);
-	if (ret)
-		goto error_clk;
+	a3700_spi_init(spi);
 
 	ret = devm_request_irq(dev, spi->irq, a3700_spi_interrupt, 0,
 			       dev_name(dev), master);
@@ -906,7 +901,7 @@
 	return 0;
 
 error_clk:
-	clk_disable_unprepare(spi->clk);
+	clk_unprepare(spi->clk);
 error:
 	spi_master_put(master);
 out:
diff --git a/drivers/spi/spi-at91-usart.c b/drivers/spi/spi-at91-usart.c
index a40bb2e..8c83526 100644
--- a/drivers/spi/spi-at91-usart.c
+++ b/drivers/spi/spi-at91-usart.c
@@ -132,7 +132,7 @@
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	ctlr->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	ctlr->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR_OR_NULL(ctlr->dma_tx)) {
 		if (IS_ERR(ctlr->dma_tx)) {
 			err = PTR_ERR(ctlr->dma_tx);
@@ -145,7 +145,7 @@
 		goto at91_usart_spi_error_clear;
 	}
 
-	ctlr->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	ctlr->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR_OR_NULL(ctlr->dma_rx)) {
 		if (IS_ERR(ctlr->dma_rx)) {
 			err = PTR_ERR(ctlr->dma_rx);
@@ -681,13 +681,6 @@
 			   at91_usart_spi_runtime_resume, NULL)
 };
 
-static const struct of_device_id at91_usart_spi_dt_ids[] = {
-	{ .compatible = "microchip,at91sam9g45-usart-spi"},
-	{ /* sentinel */}
-};
-
-MODULE_DEVICE_TABLE(of, at91_usart_spi_dt_ids);
-
 static struct platform_driver at91_usart_spi_driver = {
 	.driver = {
 		.name = "at91_usart_spi",
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index d9711ea..1db43cb 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -16,7 +16,6 @@
 #include <linux/interrupt.h>
 #include <linux/spi/spi.h>
 #include <linux/slab.h>
-#include <linux/platform_data/dma-atmel.h>
 #include <linux/of.h>
 
 #include <linux/io.h>
@@ -222,37 +221,13 @@
 	  | SPI_BF(name, value))
 
 /* Register access macros */
-#ifdef CONFIG_AVR32
-#define spi_readl(port, reg) \
-	__raw_readl((port)->regs + SPI_##reg)
-#define spi_writel(port, reg, value) \
-	__raw_writel((value), (port)->regs + SPI_##reg)
-
-#define spi_readw(port, reg) \
-	__raw_readw((port)->regs + SPI_##reg)
-#define spi_writew(port, reg, value) \
-	__raw_writew((value), (port)->regs + SPI_##reg)
-
-#define spi_readb(port, reg) \
-	__raw_readb((port)->regs + SPI_##reg)
-#define spi_writeb(port, reg, value) \
-	__raw_writeb((value), (port)->regs + SPI_##reg)
-#else
 #define spi_readl(port, reg) \
 	readl_relaxed((port)->regs + SPI_##reg)
 #define spi_writel(port, reg, value) \
 	writel_relaxed((value), (port)->regs + SPI_##reg)
-
-#define spi_readw(port, reg) \
-	readw_relaxed((port)->regs + SPI_##reg)
 #define spi_writew(port, reg, value) \
 	writew_relaxed((value), (port)->regs + SPI_##reg)
 
-#define spi_readb(port, reg) \
-	readb_relaxed((port)->regs + SPI_##reg)
-#define spi_writeb(port, reg, value) \
-	writeb_relaxed((value), (port)->regs + SPI_##reg)
-#endif
 /* use PIO for small transfers, avoiding DMA setup/teardown overhead and
  * cache operations; better heuristics consider wordsize and bitrate.
  */
@@ -299,16 +274,16 @@
 
 	bool			use_dma;
 	bool			use_pdc;
-	bool			use_cs_gpios;
 
 	bool			keep_cs;
 
 	u32			fifo_size;
+	u8			native_cs_free;
+	u8			native_cs_for_gpio;
 };
 
 /* Controller-specific per-slave state */
 struct atmel_spi_device {
-	struct gpio_desc	*npcs_pin;
 	u32			csr;
 };
 
@@ -335,11 +310,9 @@
  * transmitted")  Not so!  Workaround uses nCSx pins as GPIOs; or newer
  * controllers have CSAAT and friends.
  *
- * Since the CSAAT functionality is a bit weird on newer controllers as
- * well, we use GPIO to control nCSx pins on all controllers, updating
- * MR.PCS to avoid confusing the controller.  Using GPIOs also lets us
- * support active-high chipselects despite the controller's belief that
- * only active-low devices/systems exists.
+ * Even controller newer than ar91rm9200, using GPIOs can make sens as
+ * it lets us support active-high chipselects despite the controller's
+ * belief that only active-low devices/systems exists.
  *
  * However, at91rm9200 has a second erratum whereby nCS0 doesn't work
  * right when driven with GPIO.  ("Mode Fault does not allow more than one
@@ -351,30 +324,36 @@
 static void cs_activate(struct atmel_spi *as, struct spi_device *spi)
 {
 	struct atmel_spi_device *asd = spi->controller_state;
+	int chip_select;
 	u32 mr;
 
+	if (spi->cs_gpiod)
+		chip_select = as->native_cs_for_gpio;
+	else
+		chip_select = spi->chip_select;
+
 	if (atmel_spi_is_v2(as)) {
-		spi_writel(as, CSR0 + 4 * spi->chip_select, asd->csr);
+		spi_writel(as, CSR0 + 4 * chip_select, asd->csr);
 		/* For the low SPI version, there is a issue that PDC transfer
 		 * on CS1,2,3 needs SPI_CSR0.BITS config as SPI_CSR1,2,3.BITS
 		 */
 		spi_writel(as, CSR0, asd->csr);
 		if (as->caps.has_wdrbt) {
 			spi_writel(as, MR,
-					SPI_BF(PCS, ~(0x01 << spi->chip_select))
+					SPI_BF(PCS, ~(0x01 << chip_select))
 					| SPI_BIT(WDRBT)
 					| SPI_BIT(MODFDIS)
 					| SPI_BIT(MSTR));
 		} else {
 			spi_writel(as, MR,
-					SPI_BF(PCS, ~(0x01 << spi->chip_select))
+					SPI_BF(PCS, ~(0x01 << chip_select))
 					| SPI_BIT(MODFDIS)
 					| SPI_BIT(MSTR));
 		}
 
 		mr = spi_readl(as, MR);
-		if (as->use_cs_gpios)
-			gpiod_set_value(asd->npcs_pin, 1);
+		if (spi->cs_gpiod)
+			gpiod_set_value(spi->cs_gpiod, 1);
 	} else {
 		u32 cpol = (spi->mode & SPI_CPOL) ? SPI_BIT(CPOL) : 0;
 		int i;
@@ -389,9 +368,9 @@
 		}
 
 		mr = spi_readl(as, MR);
-		mr = SPI_BFINS(PCS, ~(1 << spi->chip_select), mr);
-		if (as->use_cs_gpios && spi->chip_select != 0)
-			gpiod_set_value(asd->npcs_pin, 1);
+		mr = SPI_BFINS(PCS, ~(1 << chip_select), mr);
+		if (spi->cs_gpiod)
+			gpiod_set_value(spi->cs_gpiod, 1);
 		spi_writel(as, MR, mr);
 	}
 
@@ -400,24 +379,29 @@
 
 static void cs_deactivate(struct atmel_spi *as, struct spi_device *spi)
 {
-	struct atmel_spi_device *asd = spi->controller_state;
+	int chip_select;
 	u32 mr;
 
+	if (spi->cs_gpiod)
+		chip_select = as->native_cs_for_gpio;
+	else
+		chip_select = spi->chip_select;
+
 	/* only deactivate *this* device; sometimes transfers to
 	 * another device may be active when this routine is called.
 	 */
 	mr = spi_readl(as, MR);
-	if (~SPI_BFEXT(PCS, mr) & (1 << spi->chip_select)) {
+	if (~SPI_BFEXT(PCS, mr) & (1 << chip_select)) {
 		mr = SPI_BFINS(PCS, 0xf, mr);
 		spi_writel(as, MR, mr);
 	}
 
 	dev_dbg(&spi->dev, "DEactivate NPCS, mr %08x\n", mr);
 
-	if (!as->use_cs_gpios)
+	if (!spi->cs_gpiod)
 		spi_writel(as, CR, SPI_BIT(LASTXFER));
-	else if (atmel_spi_is_v2(as) || spi->chip_select != 0)
-		gpiod_set_value(asd->npcs_pin, 0);
+	else
+		gpiod_set_value(spi->cs_gpiod, 0);
 }
 
 static void atmel_spi_lock(struct atmel_spi *as) __acquires(&as->lock)
@@ -526,29 +510,21 @@
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	master->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(master->dma_tx)) {
-		err = PTR_ERR(master->dma_tx);
-		if (err == -EPROBE_DEFER) {
-			dev_warn(dev, "no DMA channel available at the moment\n");
-			goto error_clear;
-		}
-		dev_err(dev,
-			"DMA TX channel not available, SPI unable to use DMA\n");
-		err = -EBUSY;
+		err = dev_err_probe(dev, PTR_ERR(master->dma_tx),
+				    "No TX DMA channel, DMA is disabled\n");
 		goto error_clear;
 	}
 
-	/*
-	 * No reason to check EPROBE_DEFER here since we have already requested
-	 * tx channel. If it fails here, it's for another reason.
-	 */
-	master->dma_rx = dma_request_slave_channel(dev, "rx");
-
-	if (!master->dma_rx) {
-		dev_err(dev,
-			"DMA RX channel not available, SPI unable to use DMA\n");
-		err = -EBUSY;
+	master->dma_rx = dma_request_chan(dev, "rx");
+	if (IS_ERR(master->dma_rx)) {
+		err = PTR_ERR(master->dma_rx);
+		/*
+		 * No reason to check EPROBE_DEFER here since we have already
+		 * requested tx channel.
+		 */
+		dev_err(dev, "No RX DMA channel, DMA is disabled\n");
 		goto error;
 	}
 
@@ -563,7 +539,7 @@
 
 	return 0;
 error:
-	if (master->dma_rx)
+	if (!IS_ERR(master->dma_rx))
 		dma_release_channel(master->dma_rx);
 	if (!IS_ERR(master->dma_tx))
 		dma_release_channel(master->dma_tx);
@@ -728,6 +704,7 @@
 static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 				struct spi_transfer *xfer,
 				u32 *plen)
+	__must_hold(&as->lock)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
 	struct dma_chan		*rxchan = master->dma_rx;
@@ -843,6 +820,12 @@
 {
 	u32			scbr, csr;
 	unsigned long		bus_hz;
+	int chip_select;
+
+	if (spi->cs_gpiod)
+		chip_select = as->native_cs_for_gpio;
+	else
+		chip_select = spi->chip_select;
 
 	/* v1 chips start out at half the peripheral bus speed. */
 	bus_hz = as->spi_clk;
@@ -871,9 +854,10 @@
 			xfer->speed_hz, scbr, bus_hz);
 		return -EINVAL;
 	}
-	csr = spi_readl(as, CSR0 + 4 * spi->chip_select);
+	csr = spi_readl(as, CSR0 + 4 * chip_select);
 	csr = SPI_BFINS(SCBR, scbr, csr);
-	spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
+	spi_writel(as, CSR0 + 4 * chip_select, csr);
+	xfer->effective_speed_hz = bus_hz / scbr;
 
 	return 0;
 }
@@ -1172,38 +1156,105 @@
 	return ret;
 }
 
+static int atmel_word_delay_csr(struct spi_device *spi, struct atmel_spi *as)
+{
+	struct spi_delay *delay = &spi->word_delay;
+	u32 value = delay->value;
+
+	switch (delay->unit) {
+	case SPI_DELAY_UNIT_NSECS:
+		value /= 1000;
+		break;
+	case SPI_DELAY_UNIT_USECS:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return (as->spi_clk / 1000000 * value) >> 5;
+}
+
+static void initialize_native_cs_for_gpio(struct atmel_spi *as)
+{
+	int i;
+	struct spi_master *master = platform_get_drvdata(as->pdev);
+
+	if (!as->native_cs_free)
+		return; /* already initialized */
+
+	if (!master->cs_gpiods)
+		return; /* No CS GPIO */
+
+	/*
+	 * On the first version of the controller (AT91RM9200), CS0
+	 * can't be used associated with GPIO
+	 */
+	if (atmel_spi_is_v2(as))
+		i = 0;
+	else
+		i = 1;
+
+	for (; i < 4; i++)
+		if (master->cs_gpiods[i])
+			as->native_cs_free |= BIT(i);
+
+	if (as->native_cs_free)
+		as->native_cs_for_gpio = ffs(as->native_cs_free);
+}
+
 static int atmel_spi_setup(struct spi_device *spi)
 {
 	struct atmel_spi	*as;
 	struct atmel_spi_device	*asd;
 	u32			csr;
 	unsigned int		bits = spi->bits_per_word;
+	int chip_select;
+	int			word_delay_csr;
 
 	as = spi_master_get_devdata(spi->master);
 
 	/* see notes above re chipselect */
-	if (!as->use_cs_gpios && (spi->mode & SPI_CS_HIGH)) {
+	if (!spi->cs_gpiod && (spi->mode & SPI_CS_HIGH)) {
 		dev_warn(&spi->dev, "setup: non GPIO CS can't be active-high\n");
 		return -EINVAL;
 	}
 
+	/* Setup() is called during spi_register_controller(aka
+	 * spi_register_master) but after all membmers of the cs_gpiod
+	 * array have been filled, so we can looked for which native
+	 * CS will be free for using with GPIO
+	 */
+	initialize_native_cs_for_gpio(as);
+
+	if (spi->cs_gpiod && as->native_cs_free) {
+		dev_err(&spi->dev,
+			"No native CS available to support this GPIO CS\n");
+		return -EBUSY;
+	}
+
+	if (spi->cs_gpiod)
+		chip_select = as->native_cs_for_gpio;
+	else
+		chip_select = spi->chip_select;
+
 	csr = SPI_BF(BITS, bits - 8);
 	if (spi->mode & SPI_CPOL)
 		csr |= SPI_BIT(CPOL);
 	if (!(spi->mode & SPI_CPHA))
 		csr |= SPI_BIT(NCPHA);
-	if (!as->use_cs_gpios)
-		csr |= SPI_BIT(CSAAT);
 
-	/* DLYBS is mostly irrelevant since we manage chipselect using GPIOs.
-	 */
+	if (!spi->cs_gpiod)
+		csr |= SPI_BIT(CSAAT);
 	csr |= SPI_BF(DLYBS, 0);
 
+	word_delay_csr = atmel_word_delay_csr(spi, as);
+	if (word_delay_csr < 0)
+		return word_delay_csr;
+
 	/* DLYBCT adds delays between words.  This is useful for slow devices
 	 * that need a bit of time to setup the next transfer.
 	 */
-	csr |= SPI_BF(DLYBCT,
-			(as->spi_clk / 1000000 * spi->word_delay_usecs) >> 5);
+	csr |= SPI_BF(DLYBCT, word_delay_csr);
 
 	asd = spi->controller_state;
 	if (!asd) {
@@ -1211,21 +1262,6 @@
 		if (!asd)
 			return -ENOMEM;
 
-		/*
-		 * If use_cs_gpios is true this means that we have "cs-gpios"
-		 * defined in the device tree node so we should have
-		 * gotten the GPIO lines from the device tree inside the
-		 * SPI core. Warn if this is not the case but continue since
-		 * CS GPIOs are after all optional.
-		 */
-		if (as->use_cs_gpios) {
-			if (!spi->cs_gpiod) {
-				dev_err(&spi->dev,
-					"host claims to use CS GPIOs but no CS found in DT by the SPI core\n");
-			}
-			asd->npcs_pin = spi->cs_gpiod;
-		}
-
 		spi->controller_state = asd;
 	}
 
@@ -1236,7 +1272,7 @@
 		bits, spi->mode, spi->chip_select, csr);
 
 	if (!atmel_spi_is_v2(as))
-		spi_writel(as, CSR0 + 4 * spi->chip_select, csr);
+		spi_writel(as, CSR0 + 4 * chip_select, csr);
 
 	return 0;
 }
@@ -1365,8 +1401,7 @@
 		&& as->use_pdc)
 		atmel_spi_dma_unmap_xfer(master, xfer);
 
-	if (xfer->delay_usecs)
-		udelay(xfer->delay_usecs);
+	spi_transfer_delay_exec(xfer);
 
 	if (xfer->cs_change) {
 		if (list_is_last(&xfer->transfer_list,
@@ -1510,10 +1545,9 @@
 		return PTR_ERR(clk);
 
 	/* setup spi core then atmel-specific driver state */
-	ret = -ENOMEM;
 	master = spi_alloc_master(&pdev->dev, sizeof(*as));
 	if (!master)
-		goto out_free;
+		return -ENOMEM;
 
 	/* the spi->mode bits understood by this driver: */
 	master->use_gpio_descriptors = true;
@@ -1521,7 +1555,7 @@
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 16);
 	master->dev.of_node = pdev->dev.of_node;
 	master->bus_num = pdev->id;
-	master->num_chipselect = master->dev.of_node ? 0 : 4;
+	master->num_chipselect = 4;
 	master->setup = atmel_spi_setup;
 	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
 	master->transfer_one_message = atmel_spi_transfer_one_message;
@@ -1549,19 +1583,6 @@
 
 	atmel_get_caps(as);
 
-	/*
-	 * If there are chip selects in the device tree, those will be
-	 * discovered by the SPI core when registering the SPI master
-	 * and assigned to each SPI device.
-	 */
-	as->use_cs_gpios = true;
-	if (atmel_spi_is_v2(as) &&
-	    pdev->dev.of_node &&
-	    !of_get_property(pdev->dev.of_node, "cs-gpios", NULL)) {
-		as->use_cs_gpios = false;
-		master->num_chipselect = 4;
-	}
-
 	as->use_dma = false;
 	as->use_pdc = false;
 	if (as->caps.has_dma_support) {
@@ -1655,7 +1676,6 @@
 	clk_disable_unprepare(clk);
 out_free_irq:
 out_unmap_regs:
-out_free:
 	spi_master_put(master);
 	return ret;
 }
@@ -1769,20 +1789,18 @@
 #define ATMEL_SPI_PM_OPS	NULL
 #endif
 
-#if defined(CONFIG_OF)
 static const struct of_device_id atmel_spi_dt_ids[] = {
 	{ .compatible = "atmel,at91rm9200-spi" },
 	{ /* sentinel */ }
 };
 
 MODULE_DEVICE_TABLE(of, atmel_spi_dt_ids);
-#endif
 
 static struct platform_driver atmel_spi_driver = {
 	.driver		= {
 		.name	= "atmel_spi",
 		.pm	= ATMEL_SPI_PM_OPS,
-		.of_match_table	= of_match_ptr(atmel_spi_dt_ids),
+		.of_match_table	= atmel_spi_dt_ids,
 	},
 	.probe		= atmel_spi_probe,
 	.remove		= atmel_spi_remove,
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 74842f6..af86e6d 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -163,10 +163,21 @@
 }
 
 static void spi_engine_gen_sleep(struct spi_engine_program *p, bool dry,
-	struct spi_engine *spi_engine, unsigned int clk_div, unsigned int delay)
+	struct spi_engine *spi_engine, unsigned int clk_div,
+	struct spi_transfer *xfer)
 {
 	unsigned int spi_clk = clk_get_rate(spi_engine->ref_clk);
 	unsigned int t;
+	int delay;
+
+	if (xfer->delay_usecs) {
+		delay = xfer->delay_usecs;
+	} else {
+		delay = spi_delay_to_ns(&xfer->delay, xfer);
+		if (delay < 0)
+			return;
+		delay /= 1000;
+	}
 
 	if (delay == 0)
 		return;
@@ -218,8 +229,7 @@
 			spi_engine_gen_cs(p, dry, spi, true);
 
 		spi_engine_gen_xfer(p, dry, xfer);
-		spi_engine_gen_sleep(p, dry, spi_engine, clk_div,
-			xfer->delay_usecs);
+		spi_engine_gen_sleep(p, dry, spi_engine, clk_div, xfer);
 
 		cs_change = xfer->cs_change;
 		if (list_is_last(&xfer->transfer_list, &msg->transfers))
@@ -479,22 +489,6 @@
 
 	spin_lock_init(&spi_engine->lock);
 
-	spi_engine->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(spi_engine->base)) {
-		ret = PTR_ERR(spi_engine->base);
-		goto err_put_master;
-	}
-
-	version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION);
-	if (SPI_ENGINE_VERSION_MAJOR(version) != 1) {
-		dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n",
-			SPI_ENGINE_VERSION_MAJOR(version),
-			SPI_ENGINE_VERSION_MINOR(version),
-			SPI_ENGINE_VERSION_PATCH(version));
-		ret = -ENODEV;
-		goto err_put_master;
-	}
-
 	spi_engine->clk = devm_clk_get(&pdev->dev, "s_axi_aclk");
 	if (IS_ERR(spi_engine->clk)) {
 		ret = PTR_ERR(spi_engine->clk);
@@ -515,6 +509,22 @@
 	if (ret)
 		goto err_clk_disable;
 
+	spi_engine->base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(spi_engine->base)) {
+		ret = PTR_ERR(spi_engine->base);
+		goto err_ref_clk_disable;
+	}
+
+	version = readl(spi_engine->base + SPI_ENGINE_REG_VERSION);
+	if (SPI_ENGINE_VERSION_MAJOR(version) != 1) {
+		dev_err(&pdev->dev, "Unsupported peripheral version %u.%u.%c\n",
+			SPI_ENGINE_VERSION_MAJOR(version),
+			SPI_ENGINE_VERSION_MINOR(version),
+			SPI_ENGINE_VERSION_PATCH(version));
+		ret = -ENODEV;
+		goto err_ref_clk_disable;
+	}
+
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_RESET);
 	writel_relaxed(0xff, spi_engine->base + SPI_ENGINE_REG_INT_PENDING);
 	writel_relaxed(0x00, spi_engine->base + SPI_ENGINE_REG_INT_ENABLE);
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 8a4be34..4a80f04 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -91,6 +91,7 @@
 #define MSPI_MSPI_STATUS			0x020
 #define MSPI_CPTQP				0x024
 #define MSPI_SPCR3				0x028
+#define MSPI_REV				0x02c
 #define MSPI_TXRAM				0x040
 #define MSPI_RXRAM				0x0c0
 #define MSPI_CDRAM				0x140
@@ -106,14 +107,22 @@
 #define MSPI_SPCR2_SPE				BIT(6)
 #define MSPI_SPCR2_CONT_AFTER_CMD		BIT(7)
 
+#define MSPI_SPCR3_FASTBR			BIT(0)
+#define MSPI_SPCR3_FASTDT			BIT(1)
+#define MSPI_SPCR3_SYSCLKSEL_MASK		GENMASK(11, 10)
+#define MSPI_SPCR3_SYSCLKSEL_27			(MSPI_SPCR3_SYSCLKSEL_MASK & \
+						 ~(BIT(10) | BIT(11)))
+#define MSPI_SPCR3_SYSCLKSEL_108		(MSPI_SPCR3_SYSCLKSEL_MASK & \
+						 BIT(11))
+
 #define MSPI_MSPI_STATUS_SPIF			BIT(0)
 
 #define INTR_BASE_BIT_SHIFT			0x02
 #define INTR_COUNT				0x07
 
 #define NUM_CHIPSELECT				4
-#define QSPI_SPBR_MIN				8U
 #define QSPI_SPBR_MAX				255U
+#define MSPI_BASE_FREQ				27000000UL
 
 #define OPCODE_DIOR				0xBB
 #define OPCODE_QIOR				0xEB
@@ -217,6 +226,9 @@
 	struct bcm_qspi_dev_id *dev_ids;
 	struct completion mspi_done;
 	struct completion bspi_done;
+	u8 mspi_maj_rev;
+	u8 mspi_min_rev;
+	bool mspi_spcr3_sysclk;
 };
 
 static inline bool has_bspi(struct bcm_qspi *qspi)
@@ -224,6 +236,36 @@
 	return qspi->bspi_mode;
 }
 
+/* hardware supports spcr3 and fast baud-rate  */
+static inline bool bcm_qspi_has_fastbr(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) &&
+	    ((qspi->mspi_maj_rev >= 1) &&
+	     (qspi->mspi_min_rev >= 5)))
+		return true;
+
+	return false;
+}
+
+/* hardware supports sys clk 108Mhz  */
+static inline bool bcm_qspi_has_sysclk_108(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) && (qspi->mspi_spcr3_sysclk ||
+	    ((qspi->mspi_maj_rev >= 1) &&
+	     (qspi->mspi_min_rev >= 6))))
+		return true;
+
+	return false;
+}
+
+static inline int bcm_qspi_spbr_min(struct bcm_qspi *qspi)
+{
+	if (bcm_qspi_has_fastbr(qspi))
+		return 1;
+	else
+		return 8;
+}
+
 /* Read qspi controller register*/
 static inline u32 bcm_qspi_read(struct bcm_qspi *qspi, enum base_type type,
 				unsigned int offset)
@@ -509,7 +551,7 @@
 	u32 rd = 0;
 	u32 wr = 0;
 
-	if (qspi->base[CHIP_SELECT]) {
+	if (cs >= 0 && qspi->base[CHIP_SELECT]) {
 		rd = bcm_qspi_read(qspi, CHIP_SELECT, 0);
 		wr = (rd & ~0xff) | (1 << cs);
 		if (rd == wr)
@@ -531,16 +573,39 @@
 	if (xp->speed_hz)
 		spbr = qspi->base_clk / (2 * xp->speed_hz);
 
-	spcr = clamp_val(spbr, QSPI_SPBR_MIN, QSPI_SPBR_MAX);
+	spcr = clamp_val(spbr, bcm_qspi_spbr_min(qspi), QSPI_SPBR_MAX);
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, spcr);
 
-	spcr = MSPI_MASTER_BIT;
+	if (!qspi->mspi_maj_rev)
+		/* legacy controller */
+		spcr = MSPI_MASTER_BIT;
+	else
+		spcr = 0;
+
 	/* for 16 bit the data should be zero */
 	if (xp->bits_per_word != 16)
 		spcr |= xp->bits_per_word << 2;
 	spcr |= xp->mode & 3;
+
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_MSB, spcr);
 
+	if (bcm_qspi_has_fastbr(qspi)) {
+		spcr = 0;
+
+		/* enable fastbr */
+		spcr |=	MSPI_SPCR3_FASTBR;
+
+		if (bcm_qspi_has_sysclk_108(qspi)) {
+			/* SYSCLK_108 */
+			spcr |= MSPI_SPCR3_SYSCLKSEL_108;
+			qspi->base_clk = MSPI_BASE_FREQ * 4;
+			/* Change spbr as we changed sysclk */
+			bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, 4);
+		}
+
+		bcm_qspi_write(qspi, MSPI, MSPI_SPCR3, spcr);
+	}
+
 	qspi->last_parms = *xp;
 }
 
@@ -612,19 +677,15 @@
 		if (qt->trans->cs_change &&
 		    (flags & TRANS_STATUS_BREAK_CS_CHANGE))
 			ret |= TRANS_STATUS_BREAK_CS_CHANGE;
-		if (ret)
-			goto done;
 
-		dev_dbg(&qspi->pdev->dev, "advance msg exit\n");
 		if (bcm_qspi_mspi_transfer_is_last(qspi, qt))
-			ret = TRANS_STATUS_BREAK_EOM;
+			ret |= TRANS_STATUS_BREAK_EOM;
 		else
-			ret = TRANS_STATUS_BREAK_NO_BYTES;
+			ret |= TRANS_STATUS_BREAK_NO_BYTES;
 
 		qt->trans = NULL;
 	}
 
-done:
 	dev_dbg(&qspi->pdev->dev, "trans %p len %d byte %d ret %x\n",
 		qt->trans, qt->trans ? qt->trans->len : 0, qt->byte, ret);
 	return ret;
@@ -771,7 +832,16 @@
 	bcm_qspi_write(qspi, MSPI, MSPI_NEWQP, 0);
 	bcm_qspi_write(qspi, MSPI, MSPI_ENDQP, slot - 1);
 
-	if (tstatus & TRANS_STATUS_BREAK_DESELECT) {
+	/*
+	 *  case 1) EOM =1, cs_change =0: SSb inactive
+	 *  case 2) EOM =1, cs_change =1: SSb stay active
+	 *  case 3) EOM =0, cs_change =0: SSb stay active
+	 *  case 4) EOM =0, cs_change =1: SSb inactive
+	 */
+	if (((tstatus & TRANS_STATUS_BREAK_DESELECT)
+	     == TRANS_STATUS_BREAK_CS_CHANGE) ||
+	    ((tstatus & TRANS_STATUS_BREAK_DESELECT)
+	     == TRANS_STATUS_BREAK_EOM)) {
 		mspi_cdram = read_cdram_slot(qspi, slot - 1) &
 			~MSPI_CDRAM_CONT_BIT;
 		write_cdram_slot(qspi, slot - 1, mspi_cdram);
@@ -803,7 +873,8 @@
 			return -EIO;
 
 	from = op->addr.val;
-	bcm_qspi_chip_select(qspi, spi->chip_select);
+	if (!spi->cs_gpiod)
+		bcm_qspi_chip_select(qspi, spi->chip_select);
 	bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
 
 	/*
@@ -882,7 +953,8 @@
 	int slots;
 	unsigned long timeo = msecs_to_jiffies(100);
 
-	bcm_qspi_chip_select(qspi, spi->chip_select);
+	if (!spi->cs_gpiod)
+		bcm_qspi_chip_select(qspi, spi->chip_select);
 	qspi->trans_pos.trans = trans;
 	qspi->trans_pos.byte = 0;
 
@@ -1178,18 +1250,58 @@
 
 static void bcm_qspi_hw_uninit(struct bcm_qspi *qspi)
 {
+	u32 status = bcm_qspi_read(qspi, MSPI, MSPI_MSPI_STATUS);
+
 	bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0);
 	if (has_bspi(qspi))
 		bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
 
+	/* clear interrupt */
+	bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status & ~1);
 }
 
 static const struct spi_controller_mem_ops bcm_qspi_mem_ops = {
 	.exec_op = bcm_qspi_exec_mem_op,
 };
 
+struct bcm_qspi_data {
+	bool	has_mspi_rev;
+	bool	has_spcr3_sysclk;
+};
+
+static const struct bcm_qspi_data bcm_qspi_no_rev_data = {
+	.has_mspi_rev	= false,
+	.has_spcr3_sysclk = false,
+};
+
+static const struct bcm_qspi_data bcm_qspi_rev_data = {
+	.has_mspi_rev	= true,
+	.has_spcr3_sysclk = false,
+};
+
+static const struct bcm_qspi_data bcm_qspi_spcr3_data = {
+	.has_mspi_rev	= true,
+	.has_spcr3_sysclk = true,
+};
+
 static const struct of_device_id bcm_qspi_of_match[] = {
-	{ .compatible = "brcm,spi-bcm-qspi" },
+	{
+		.compatible = "brcm,spi-bcm7445-qspi",
+		.data = &bcm_qspi_rev_data,
+
+	},
+	{
+		.compatible = "brcm,spi-bcm-qspi",
+		.data = &bcm_qspi_no_rev_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7216-qspi",
+		.data = &bcm_qspi_spcr3_data,
+	},
+	{
+		.compatible = "brcm,spi-bcm7278-qspi",
+		.data = &bcm_qspi_spcr3_data,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, bcm_qspi_of_match);
@@ -1197,12 +1309,15 @@
 int bcm_qspi_probe(struct platform_device *pdev,
 		   struct bcm_qspi_soc_intc *soc_intc)
 {
+	const struct of_device_id *of_id = NULL;
+	const struct bcm_qspi_data *data;
 	struct device *dev = &pdev->dev;
 	struct bcm_qspi *qspi;
 	struct spi_master *master;
 	struct resource *res;
 	int irq, ret = 0, num_ints = 0;
 	u32 val;
+	u32 rev = 0;
 	const char *name = NULL;
 	int num_irqs = ARRAY_SIZE(qspi_irq_tab);
 
@@ -1210,9 +1325,12 @@
 	if (!dev->of_node)
 		return -ENODEV;
 
-	if (!of_match_node(bcm_qspi_of_match, dev->of_node))
+	of_id = of_match_node(bcm_qspi_of_match, dev->of_node);
+	if (!of_id)
 		return -ENODEV;
 
+	data = of_id->data;
+
 	master = devm_spi_alloc_master(dev, sizeof(struct bcm_qspi));
 	if (!master) {
 		dev_err(dev, "error allocating spi_master\n");
@@ -1239,6 +1357,7 @@
 	master->cleanup = bcm_qspi_cleanup;
 	master->dev.of_node = dev->of_node;
 	master->num_chipselect = NUM_CHIPSELECT;
+	master->use_gpio_descriptors = true;
 
 	qspi->big_endian = of_device_is_big_endian(dev->of_node);
 
@@ -1282,12 +1401,53 @@
 	if (!qspi->dev_ids)
 		return -ENOMEM;
 
+	/*
+	 * Some SoCs integrate spi controller (e.g., its interrupt bits)
+	 * in specific ways
+	 */
+	if (soc_intc) {
+		qspi->soc_intc = soc_intc;
+		soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true);
+	} else {
+		qspi->soc_intc = NULL;
+	}
+
+	if (qspi->clk) {
+		ret = clk_prepare_enable(qspi->clk);
+		if (ret) {
+			dev_err(dev, "failed to prepare clock\n");
+			goto qspi_probe_err;
+		}
+		qspi->base_clk = clk_get_rate(qspi->clk);
+	} else {
+		qspi->base_clk = MSPI_BASE_FREQ;
+	}
+
+	if (data->has_mspi_rev) {
+		rev = bcm_qspi_read(qspi, MSPI, MSPI_REV);
+		/* some older revs do not have a MSPI_REV register */
+		if ((rev & 0xff) == 0xff)
+			rev = 0;
+	}
+
+	qspi->mspi_maj_rev = (rev >> 4) & 0xf;
+	qspi->mspi_min_rev = rev & 0xf;
+	qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk;
+
+	qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2);
+
+	/*
+	 * On SW resets it is possible to have the mask still enabled
+	 * Need to disable the mask and clear the status while we init
+	 */
+	bcm_qspi_hw_uninit(qspi);
+
 	for (val = 0; val < num_irqs; val++) {
 		irq = -1;
 		name = qspi_irq_tab[val].irq_name;
 		if (qspi_irq_tab[val].irq_source == SINGLE_L2) {
 			/* get the l2 interrupts */
-			irq = platform_get_irq_byname(pdev, name);
+			irq = platform_get_irq_byname_optional(pdev, name);
 		} else if (!num_ints && soc_intc) {
 			/* all mspi, bspi intrs muxed to one L1 intr */
 			irq = platform_get_irq(pdev, 0);
@@ -1300,7 +1460,7 @@
 					       &qspi->dev_ids[val]);
 			if (ret < 0) {
 				dev_err(&pdev->dev, "IRQ %s not found\n", name);
-				goto qspi_probe_err;
+				goto qspi_unprepare_err;
 			}
 
 			qspi->dev_ids[val].dev = qspi;
@@ -1315,29 +1475,9 @@
 	if (!num_ints) {
 		dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n");
 		ret = -EINVAL;
-		goto qspi_probe_err;
+		goto qspi_unprepare_err;
 	}
 
-	/*
-	 * Some SoCs integrate spi controller (e.g., its interrupt bits)
-	 * in specific ways
-	 */
-	if (soc_intc) {
-		qspi->soc_intc = soc_intc;
-		soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true);
-	} else {
-		qspi->soc_intc = NULL;
-	}
-
-	ret = clk_prepare_enable(qspi->clk);
-	if (ret) {
-		dev_err(dev, "failed to prepare clock\n");
-		goto qspi_probe_err;
-	}
-
-	qspi->base_clk = clk_get_rate(qspi->clk);
-	qspi->max_speed_hz = qspi->base_clk / (QSPI_SPBR_MIN * 2);
-
 	bcm_qspi_hw_init(qspi);
 	init_completion(&qspi->mspi_done);
 	init_completion(&qspi->bspi_done);
@@ -1359,6 +1499,7 @@
 
 qspi_reg_err:
 	bcm_qspi_hw_uninit(qspi);
+qspi_unprepare_err:
 	clk_disable_unprepare(qspi->clk);
 qspi_probe_err:
 	kfree(qspi->dev_ids);
@@ -1391,7 +1532,7 @@
 			bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
 
 	spi_master_suspend(qspi->master);
-	clk_disable(qspi->clk);
+	clk_disable_unprepare(qspi->clk);
 	bcm_qspi_hw_uninit(qspi);
 
 	return 0;
@@ -1409,7 +1550,7 @@
 		qspi->soc_intc->bcm_qspi_int_set(qspi->soc_intc, MSPI_DONE,
 						 true);
 
-	ret = clk_enable(qspi->clk);
+	ret = clk_prepare_enable(qspi->clk);
 	if (!ret)
 		spi_master_resume(qspi->master);
 
diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index c86c3ac..33c32e9 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -75,7 +75,7 @@
 #define DRV_NAME	"spi-bcm2835"
 
 /* define polling limits */
-unsigned int polling_limit_us = 30;
+static unsigned int polling_limit_us = 30;
 module_param(polling_limit_us, uint, 0664);
 MODULE_PARM_DESC(polling_limit_us,
 		 "time in us to run a transfer in polling mode\n");
@@ -84,8 +84,10 @@
  * struct bcm2835_spi - BCM2835 SPI controller
  * @regs: base address of register map
  * @clk: core clock, divided to calculate serial clock
+ * @clk_hz: core clock cached speed
  * @irq: interrupt, signals TX FIFO empty or RX FIFO ¾ full
  * @tfr: SPI transfer currently processed
+ * @ctlr: SPI controller reverse lookup
  * @tx_buf: pointer whence next transmitted byte is read
  * @rx_buf: pointer where next received byte is written
  * @tx_len: remaining bytes to transmit
@@ -123,8 +125,10 @@
 struct bcm2835_spi {
 	void __iomem *regs;
 	struct clk *clk;
+	unsigned long clk_hz;
 	int irq;
 	struct spi_transfer *tfr;
+	struct spi_controller *ctlr;
 	const u8 *tx_buf;
 	u8 *rx_buf;
 	int tx_len;
@@ -191,12 +195,12 @@
 }
 #endif /* CONFIG_DEBUG_FS */
 
-static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned reg)
+static inline u32 bcm2835_rd(struct bcm2835_spi *bs, unsigned int reg)
 {
 	return readl(bs->regs + reg);
 }
 
-static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned reg, u32 val)
+static inline void bcm2835_wr(struct bcm2835_spi *bs, unsigned int reg, u32 val)
 {
 	writel(val, bs->regs + reg);
 }
@@ -243,13 +247,13 @@
 
 	bs->rx_len -= count;
 
-	while (count > 0) {
+	do {
 		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
 		len = min(count, 4);
 		memcpy(bs->rx_buf, &val, len);
 		bs->rx_buf += len;
 		count -= 4;
-	}
+	} while (count > 0);
 }
 
 /**
@@ -269,7 +273,7 @@
 
 	bs->tx_len -= count;
 
-	while (count > 0) {
+	do {
 		if (bs->tx_buf) {
 			len = min(count, 4);
 			memcpy(&val, bs->tx_buf, len);
@@ -279,7 +283,7 @@
 		}
 		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
 		count -= 4;
-	}
+	} while (count > 0);
 }
 
 /**
@@ -308,12 +312,11 @@
 	count = min(count, bs->rx_len);
 	bs->rx_len -= count;
 
-	while (count) {
+	do {
 		val = bcm2835_rd(bs, BCM2835_SPI_FIFO);
 		if (bs->rx_buf)
 			*bs->rx_buf++ = val;
-		count--;
-	}
+	} while (--count);
 }
 
 /**
@@ -328,16 +331,14 @@
 	count = min(count, bs->tx_len);
 	bs->tx_len -= count;
 
-	while (count) {
+	do {
 		val = bs->tx_buf ? *bs->tx_buf++ : 0;
 		bcm2835_wr(bs, BCM2835_SPI_FIFO, val);
-		count--;
-	}
+	} while (--count);
 }
 
-static void bcm2835_spi_reset_hw(struct spi_controller *ctlr)
+static void bcm2835_spi_reset_hw(struct bcm2835_spi *bs)
 {
-	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
 	/* Disable SPI interrupts and transfer */
@@ -363,8 +364,7 @@
 
 static irqreturn_t bcm2835_spi_interrupt(int irq, void *dev_id)
 {
-	struct spi_controller *ctlr = dev_id;
-	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
+	struct bcm2835_spi *bs = dev_id;
 	u32 cs = bcm2835_rd(bs, BCM2835_SPI_CS);
 
 	/*
@@ -386,9 +386,9 @@
 
 	if (!bs->rx_len) {
 		/* Transfer complete - reset SPI HW */
-		bcm2835_spi_reset_hw(ctlr);
+		bcm2835_spi_reset_hw(bs);
 		/* wake up the framework */
-		complete(&ctlr->xfer_completion);
+		complete(&bs->ctlr->xfer_completion);
 	}
 
 	return IRQ_HANDLED;
@@ -607,7 +607,7 @@
 	bcm2835_spi_undo_prologue(bs);
 
 	/* reset fifo and HW */
-	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_reset_hw(bs);
 
 	/* and mark as completed */;
 	complete(&ctlr->xfer_completion);
@@ -641,7 +641,7 @@
 		dmaengine_terminate_async(ctlr->dma_rx);
 
 	bcm2835_spi_undo_prologue(bs);
-	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_reset_hw(bs);
 	complete(&ctlr->xfer_completion);
 }
 
@@ -825,14 +825,14 @@
 	if (!bs->rx_buf && !bs->tx_dma_active &&
 	    cmpxchg(&bs->rx_dma_active, true, false)) {
 		dmaengine_terminate_async(ctlr->dma_rx);
-		bcm2835_spi_reset_hw(ctlr);
+		bcm2835_spi_reset_hw(bs);
 	}
 
 	/* wait for wakeup in framework */
 	return 1;
 
 err_reset_hw:
-	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_reset_hw(bs);
 	bcm2835_spi_undo_prologue(bs);
 	return ret;
 }
@@ -888,8 +888,8 @@
 	}
 }
 
-static void bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
-			     struct bcm2835_spi *bs)
+static int bcm2835_dma_init(struct spi_controller *ctlr, struct device *dev,
+			    struct bcm2835_spi *bs)
 {
 	struct dma_slave_config slave_config;
 	const __be32 *addr;
@@ -900,19 +900,24 @@
 	addr = of_get_address(ctlr->dev.of_node, 0, NULL, NULL);
 	if (!addr) {
 		dev_err(dev, "could not get DMA-register address - not using dma mode\n");
-		goto err;
+		/* Fall back to interrupt mode */
+		return 0;
 	}
 	dma_reg_base = be32_to_cpup(addr);
 
 	/* get tx/rx dma */
-	ctlr->dma_tx = dma_request_slave_channel(dev, "tx");
-	if (!ctlr->dma_tx) {
+	ctlr->dma_tx = dma_request_chan(dev, "tx");
+	if (IS_ERR(ctlr->dma_tx)) {
 		dev_err(dev, "no tx-dma configuration found - not using dma mode\n");
+		ret = PTR_ERR(ctlr->dma_tx);
+		ctlr->dma_tx = NULL;
 		goto err;
 	}
-	ctlr->dma_rx = dma_request_slave_channel(dev, "rx");
-	if (!ctlr->dma_rx) {
+	ctlr->dma_rx = dma_request_chan(dev, "rx");
+	if (IS_ERR(ctlr->dma_rx)) {
 		dev_err(dev, "no rx-dma configuration found - not using dma mode\n");
+		ret = PTR_ERR(ctlr->dma_rx);
+		ctlr->dma_rx = NULL;
 		goto err_release;
 	}
 
@@ -935,6 +940,7 @@
 	if (dma_mapping_error(ctlr->dma_tx->device->dev, bs->fill_tx_addr)) {
 		dev_err(dev, "cannot map zero page - not using DMA mode\n");
 		bs->fill_tx_addr = 0;
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -944,6 +950,7 @@
 						     DMA_MEM_TO_DEV, 0);
 	if (!bs->fill_tx_desc) {
 		dev_err(dev, "cannot prepare fill_tx_desc - not using DMA mode\n");
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -974,6 +981,7 @@
 	if (dma_mapping_error(ctlr->dma_rx->device->dev, bs->clear_rx_addr)) {
 		dev_err(dev, "cannot map clear_rx_cs - not using DMA mode\n");
 		bs->clear_rx_addr = 0;
+		ret = -ENOMEM;
 		goto err_release;
 	}
 
@@ -984,6 +992,7 @@
 					   DMA_MEM_TO_DEV, 0);
 		if (!bs->clear_rx_desc[i]) {
 			dev_err(dev, "cannot prepare clear_rx_desc - not using DMA mode\n");
+			ret = -ENOMEM;
 			goto err_release;
 		}
 
@@ -997,7 +1006,7 @@
 	/* all went well, so set can_dma */
 	ctlr->can_dma = bcm2835_spi_can_dma;
 
-	return;
+	return 0;
 
 err_config:
 	dev_err(dev, "issue configuring dma: %d - not using DMA mode\n",
@@ -1005,7 +1014,14 @@
 err_release:
 	bcm2835_dma_release(ctlr, bs);
 err:
-	return;
+	/*
+	 * Only report error for deferred probing, otherwise fall back to
+	 * interrupt mode
+	 */
+	if (ret != -EPROBE_DEFER)
+		ret = 0;
+
+	return ret;
 }
 
 static int bcm2835_spi_transfer_one_poll(struct spi_controller *ctlr,
@@ -1058,7 +1074,7 @@
 	}
 
 	/* Transfer complete - reset SPI HW */
-	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_reset_hw(bs);
 	/* and return without waiting for completion */
 	return 0;
 }
@@ -1068,19 +1084,18 @@
 				    struct spi_transfer *tfr)
 {
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
-	unsigned long spi_hz, clk_hz, cdiv, spi_used_hz;
+	unsigned long spi_hz, cdiv;
 	unsigned long hz_per_byte, byte_limit;
 	u32 cs = bs->prepare_cs[spi->chip_select];
 
 	/* set clock */
 	spi_hz = tfr->speed_hz;
-	clk_hz = clk_get_rate(bs->clk);
 
-	if (spi_hz >= clk_hz / 2) {
+	if (spi_hz >= bs->clk_hz / 2) {
 		cdiv = 2; /* clk_hz/2 is the fastest we can go */
 	} else if (spi_hz) {
 		/* CDIV must be a multiple of two */
-		cdiv = DIV_ROUND_UP(clk_hz, spi_hz);
+		cdiv = DIV_ROUND_UP(bs->clk_hz, spi_hz);
 		cdiv += (cdiv % 2);
 
 		if (cdiv >= 65536)
@@ -1088,7 +1103,7 @@
 	} else {
 		cdiv = 0; /* 0 is the slowest we can go */
 	}
-	spi_used_hz = cdiv ? (clk_hz / cdiv) : (clk_hz / 65536);
+	tfr->effective_speed_hz = cdiv ? (bs->clk_hz / cdiv) : (bs->clk_hz / 65536);
 	bcm2835_wr(bs, BCM2835_SPI_CLK, cdiv);
 
 	/* handle all the 3-wire mode */
@@ -1108,7 +1123,7 @@
 	 * per 300,000 Hz of bus clock.
 	 */
 	hz_per_byte = polling_limit_us ? (9 * 1000000) / polling_limit_us : 0;
-	byte_limit = hz_per_byte ? spi_used_hz / hz_per_byte : 1;
+	byte_limit = hz_per_byte ? tfr->effective_speed_hz / hz_per_byte : 1;
 
 	/* run in polling mode for short transfers */
 	if (tfr->len < byte_limit)
@@ -1166,7 +1181,7 @@
 	bcm2835_spi_undo_prologue(bs);
 
 	/* and reset */
-	bcm2835_spi_reset_hw(ctlr);
+	bcm2835_spi_reset_hw(bs);
 }
 
 static int chip_match_name(struct gpio_chip *chip, void *data)
@@ -1288,32 +1303,34 @@
 	ctlr->dev.of_node = pdev->dev.of_node;
 
 	bs = spi_controller_get_devdata(ctlr);
+	bs->ctlr = ctlr;
 
 	bs->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(bs->regs))
 		return PTR_ERR(bs->regs);
 
 	bs->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(bs->clk)) {
-		err = PTR_ERR(bs->clk);
-		dev_err(&pdev->dev, "could not get clk: %d\n", err);
-		return err;
-	}
+	if (IS_ERR(bs->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(bs->clk),
+				     "could not get clk\n");
 
 	bs->irq = platform_get_irq(pdev, 0);
 	if (bs->irq <= 0)
 		return bs->irq ? bs->irq : -ENODEV;
 
 	clk_prepare_enable(bs->clk);
+	bs->clk_hz = clk_get_rate(bs->clk);
 
-	bcm2835_dma_init(ctlr, &pdev->dev, bs);
+	err = bcm2835_dma_init(ctlr, &pdev->dev, bs);
+	if (err)
+		goto out_clk_disable;
 
 	/* initialise the hardware with the default polarities */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
 	err = devm_request_irq(&pdev->dev, bs->irq, bcm2835_spi_interrupt, 0,
-			       dev_name(&pdev->dev), ctlr);
+			       dev_name(&pdev->dev), bs);
 	if (err) {
 		dev_err(&pdev->dev, "could not request IRQ: %d\n", err);
 		goto out_dma_release;
@@ -1332,6 +1349,7 @@
 
 out_dma_release:
 	bcm2835_dma_release(ctlr, bs);
+out_clk_disable:
 	clk_disable_unprepare(bs->clk);
 	return err;
 }
@@ -1345,17 +1363,26 @@
 
 	spi_unregister_controller(ctlr);
 
+	bcm2835_dma_release(ctlr, bs);
+
 	/* Clear FIFOs, and disable the HW block */
 	bcm2835_wr(bs, BCM2835_SPI_CS,
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
 	clk_disable_unprepare(bs->clk);
 
-	bcm2835_dma_release(ctlr, bs);
-
 	return 0;
 }
 
+static void bcm2835_spi_shutdown(struct platform_device *pdev)
+{
+	int ret;
+
+	ret = bcm2835_spi_remove(pdev);
+	if (ret)
+		dev_err(&pdev->dev, "failed to shutdown\n");
+}
+
 static const struct of_device_id bcm2835_spi_match[] = {
 	{ .compatible = "brcm,bcm2835-spi", },
 	{}
@@ -1369,6 +1396,7 @@
 	},
 	.probe		= bcm2835_spi_probe,
 	.remove		= bcm2835_spi_remove,
+	.shutdown	= bcm2835_spi_shutdown,
 };
 module_platform_driver(bcm2835_spi_driver);
 
diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c
index 8211107..1a26865 100644
--- a/drivers/spi/spi-bcm2835aux.c
+++ b/drivers/spi/spi-bcm2835aux.c
@@ -164,10 +164,10 @@
 		switch (count) {
 		case 3:
 			*bs->rx_buf++ = (data >> 16) & 0xff;
-			/* fallthrough */
+			fallthrough;
 		case 2:
 			*bs->rx_buf++ = (data >> 8) & 0xff;
-			/* fallthrough */
+			fallthrough;
 		case 1:
 			*bs->rx_buf++ = (data >> 0) & 0xff;
 			/* fallthrough - no default */
@@ -345,7 +345,7 @@
 				       struct spi_transfer *tfr)
 {
 	struct bcm2835aux_spi *bs = spi_master_get_devdata(master);
-	unsigned long spi_hz, clk_hz, speed, spi_used_hz;
+	unsigned long spi_hz, clk_hz, speed;
 	unsigned long hz_per_byte, byte_limit;
 
 	/* calculate the registers to handle
@@ -374,7 +374,7 @@
 	/* set the new speed */
 	bs->cntl[0] |= speed << BCM2835_AUX_SPI_CNTL0_SPEED_SHIFT;
 
-	spi_used_hz = clk_hz / (2 * (speed + 1));
+	tfr->effective_speed_hz = clk_hz / (2 * (speed + 1));
 
 	/* set transmit buffers and length */
 	bs->tx_buf = tfr->tx_buf;
@@ -391,7 +391,7 @@
 	 * 30 µs per 300,000 Hz of bus clock.
 	 */
 	hz_per_byte = polling_limit_us ? (9 * 1000000) / polling_limit_us : 0;
-	byte_limit = hz_per_byte ? spi_used_hz / hz_per_byte : 1;
+	byte_limit = hz_per_byte ? tfr->effective_speed_hz / hz_per_byte : 1;
 
 	/* run in polling mode for short transfers */
 	if (tfr->len < byte_limit)
diff --git a/drivers/spi/spi-bcm63xx-hsspi.c b/drivers/spi/spi-bcm63xx-hsspi.c
index 509476a..1f08d75 100644
--- a/drivers/spi/spi-bcm63xx-hsspi.c
+++ b/drivers/spi/spi-bcm63xx-hsspi.c
@@ -20,6 +20,7 @@
 #include <linux/spi/spi.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
+#include <linux/reset.h>
 
 #define HSSPI_GLOBAL_CTRL_REG			0x0
 #define GLOBAL_CTRL_CS_POLARITY_SHIFT		0
@@ -291,8 +292,7 @@
 
 		msg->actual_length += t->len;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		if (t->cs_change)
 			bcm63xx_hsspi_set_cs(bs, spi->chip_select, false);
@@ -335,6 +335,7 @@
 	struct clk *clk, *pll_clk = NULL;
 	int irq, ret;
 	u32 reg, rate, num_cs = HSSPI_SPI_MAX_CS;
+	struct reset_control *reset;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -349,10 +350,20 @@
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	reset = devm_reset_control_get_optional_exclusive(dev, NULL);
+	if (IS_ERR(reset))
+		return PTR_ERR(reset);
+
 	ret = clk_prepare_enable(clk);
 	if (ret)
 		return ret;
 
+	ret = reset_control_reset(reset);
+	if (ret) {
+		dev_err(dev, "unable to reset device: %d\n", ret);
+		goto out_disable_clk;
+	}
+
 	rate = clk_get_rate(clk);
 	if (!rate) {
 		pll_clk = devm_clk_get(dev, "pll");
diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
index fdd7eaa..96d075e 100644
--- a/drivers/spi/spi-bcm63xx.c
+++ b/drivers/spi/spi-bcm63xx.c
@@ -18,6 +18,7 @@
 #include <linux/err.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
+#include <linux/reset.h>
 
 /* BCM 6338/6348 SPI core */
 #define SPI_6348_RSET_SIZE		64
@@ -368,7 +369,7 @@
 		}
 
 		/* CS will be deasserted directly after transfer */
-		if (t->delay_usecs) {
+		if (t->delay_usecs || t->delay.value) {
 			dev_err(&spi->dev, "unable to keep CS asserted after transfer\n");
 			status = -EINVAL;
 			goto exit;
@@ -493,6 +494,7 @@
 	struct bcm63xx_spi *bs;
 	int ret;
 	u32 num_cs = BCM63XX_SPI_MAX_CS;
+	struct reset_control *reset;
 
 	if (dev->of_node) {
 		const struct of_device_id *match;
@@ -529,6 +531,10 @@
 		return PTR_ERR(clk);
 	}
 
+	reset = devm_reset_control_get_optional_exclusive(dev, NULL);
+	if (IS_ERR(reset))
+		return PTR_ERR(reset);
+
 	master = spi_alloc_master(dev, sizeof(*bs));
 	if (!master) {
 		dev_err(dev, "out of memory\n");
@@ -579,6 +585,12 @@
 	if (ret)
 		goto out_err;
 
+	ret = reset_control_reset(reset);
+	if (ret) {
+		dev_err(dev, "unable to reset device: %d\n", ret);
+		goto out_clk_disable;
+	}
+
 	bcm_spi_writeb(bs, SPI_INTR_CLEAR_ALL, SPI_INT_STATUS);
 
 	/* register and we are done */
diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c
index 7d61a3b..3d8948a 100644
--- a/drivers/spi/spi-bitbang.c
+++ b/drivers/spi/spi-bitbang.c
@@ -174,7 +174,7 @@
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_setup_transfer);
 
-/**
+/*
  * spi_bitbang_setup - default setup for per-word I/O loops
  */
 int spi_bitbang_setup(struct spi_device *spi)
@@ -218,7 +218,7 @@
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_setup);
 
-/**
+/*
  * spi_bitbang_cleanup - default cleanup for per-word I/O loops
  */
 void spi_bitbang_cleanup(struct spi_device *spi)
@@ -339,8 +339,20 @@
 int spi_bitbang_init(struct spi_bitbang *bitbang)
 {
 	struct spi_master *master = bitbang->master;
+	bool custom_cs;
 
-	if (!master || !bitbang->chipselect)
+	if (!master)
+		return -EINVAL;
+	/*
+	 * We only need the chipselect callback if we are actually using it.
+	 * If we just use GPIO descriptors, it is surplus. If the
+	 * SPI_MASTER_GPIO_SS flag is set, we always need to call the
+	 * driver-specific chipselect routine.
+	 */
+	custom_cs = (!master->use_gpio_descriptors ||
+		     (master->flags & SPI_MASTER_GPIO_SS));
+
+	if (custom_cs && !bitbang->chipselect)
 		return -EINVAL;
 
 	mutex_init(&bitbang->lock);
@@ -354,7 +366,12 @@
 	master->prepare_transfer_hardware = spi_bitbang_prepare_hardware;
 	master->unprepare_transfer_hardware = spi_bitbang_unprepare_hardware;
 	master->transfer_one = spi_bitbang_transfer_one;
-	master->set_cs = spi_bitbang_set_cs;
+	/*
+	 * When using GPIO descriptors, the ->set_cs() callback doesn't even
+	 * get called unless SPI_MASTER_GPIO_SS is set.
+	 */
+	if (custom_cs)
+		master->set_cs = spi_bitbang_set_cs;
 
 	if (!bitbang->txrx_bufs) {
 		bitbang->use_dma = 0;
@@ -420,7 +437,7 @@
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_start);
 
-/**
+/*
  * spi_bitbang_stop - stops the task providing spi communication
  */
 void spi_bitbang_stop(struct spi_bitbang *bitbang)
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
new file mode 100644
index 0000000..2e1255b
--- /dev/null
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -0,0 +1,1430 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Driver for Cadence QSPI Controller
+//
+// Copyright Altera Corporation (C) 2012-2014. All rights reserved.
+// Copyright Intel Corporation (C) 2019-2020. All rights reserved.
+// Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/sched.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/timer.h>
+
+#define CQSPI_NAME			"cadence-qspi"
+#define CQSPI_MAX_CHIPSELECT		16
+
+/* Quirks */
+#define CQSPI_NEEDS_WR_DELAY		BIT(0)
+#define CQSPI_DISABLE_DAC_MODE		BIT(1)
+
+/* Capabilities */
+#define CQSPI_SUPPORTS_OCTAL		BIT(0)
+
+struct cqspi_st;
+
+struct cqspi_flash_pdata {
+	struct cqspi_st	*cqspi;
+	u32		clk_rate;
+	u32		read_delay;
+	u32		tshsl_ns;
+	u32		tsd2d_ns;
+	u32		tchsh_ns;
+	u32		tslch_ns;
+	u8		inst_width;
+	u8		addr_width;
+	u8		data_width;
+	u8		cs;
+};
+
+struct cqspi_st {
+	struct platform_device	*pdev;
+
+	struct clk		*clk;
+	unsigned int		sclk;
+
+	void __iomem		*iobase;
+	void __iomem		*ahb_base;
+	resource_size_t		ahb_size;
+	struct completion	transfer_complete;
+
+	struct dma_chan		*rx_chan;
+	struct completion	rx_dma_complete;
+	dma_addr_t		mmap_phys_base;
+
+	int			current_cs;
+	unsigned long		master_ref_clk_hz;
+	bool			is_decoded_cs;
+	u32			fifo_depth;
+	u32			fifo_width;
+	bool			rclk_en;
+	u32			trigger_address;
+	u32			wr_delay;
+	bool			use_direct_mode;
+	struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT];
+};
+
+struct cqspi_driver_platdata {
+	u32 hwcaps_mask;
+	u8 quirks;
+};
+
+/* Operation timeout value */
+#define CQSPI_TIMEOUT_MS			500
+#define CQSPI_READ_TIMEOUT_MS			10
+
+/* Instruction type */
+#define CQSPI_INST_TYPE_SINGLE			0
+#define CQSPI_INST_TYPE_DUAL			1
+#define CQSPI_INST_TYPE_QUAD			2
+#define CQSPI_INST_TYPE_OCTAL			3
+
+#define CQSPI_DUMMY_CLKS_PER_BYTE		8
+#define CQSPI_DUMMY_BYTES_MAX			4
+#define CQSPI_DUMMY_CLKS_MAX			31
+
+#define CQSPI_STIG_DATA_LEN_MAX			8
+
+/* Register map */
+#define CQSPI_REG_CONFIG			0x00
+#define CQSPI_REG_CONFIG_ENABLE_MASK		BIT(0)
+#define CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL	BIT(7)
+#define CQSPI_REG_CONFIG_DECODE_MASK		BIT(9)
+#define CQSPI_REG_CONFIG_CHIPSELECT_LSB		10
+#define CQSPI_REG_CONFIG_DMA_MASK		BIT(15)
+#define CQSPI_REG_CONFIG_BAUD_LSB		19
+#define CQSPI_REG_CONFIG_IDLE_LSB		31
+#define CQSPI_REG_CONFIG_CHIPSELECT_MASK	0xF
+#define CQSPI_REG_CONFIG_BAUD_MASK		0xF
+
+#define CQSPI_REG_RD_INSTR			0x04
+#define CQSPI_REG_RD_INSTR_OPCODE_LSB		0
+#define CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB	8
+#define CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB	12
+#define CQSPI_REG_RD_INSTR_TYPE_DATA_LSB	16
+#define CQSPI_REG_RD_INSTR_MODE_EN_LSB		20
+#define CQSPI_REG_RD_INSTR_DUMMY_LSB		24
+#define CQSPI_REG_RD_INSTR_TYPE_INSTR_MASK	0x3
+#define CQSPI_REG_RD_INSTR_TYPE_ADDR_MASK	0x3
+#define CQSPI_REG_RD_INSTR_TYPE_DATA_MASK	0x3
+#define CQSPI_REG_RD_INSTR_DUMMY_MASK		0x1F
+
+#define CQSPI_REG_WR_INSTR			0x08
+#define CQSPI_REG_WR_INSTR_OPCODE_LSB		0
+#define CQSPI_REG_WR_INSTR_TYPE_ADDR_LSB	12
+#define CQSPI_REG_WR_INSTR_TYPE_DATA_LSB	16
+
+#define CQSPI_REG_DELAY				0x0C
+#define CQSPI_REG_DELAY_TSLCH_LSB		0
+#define CQSPI_REG_DELAY_TCHSH_LSB		8
+#define CQSPI_REG_DELAY_TSD2D_LSB		16
+#define CQSPI_REG_DELAY_TSHSL_LSB		24
+#define CQSPI_REG_DELAY_TSLCH_MASK		0xFF
+#define CQSPI_REG_DELAY_TCHSH_MASK		0xFF
+#define CQSPI_REG_DELAY_TSD2D_MASK		0xFF
+#define CQSPI_REG_DELAY_TSHSL_MASK		0xFF
+
+#define CQSPI_REG_READCAPTURE			0x10
+#define CQSPI_REG_READCAPTURE_BYPASS_LSB	0
+#define CQSPI_REG_READCAPTURE_DELAY_LSB		1
+#define CQSPI_REG_READCAPTURE_DELAY_MASK	0xF
+
+#define CQSPI_REG_SIZE				0x14
+#define CQSPI_REG_SIZE_ADDRESS_LSB		0
+#define CQSPI_REG_SIZE_PAGE_LSB			4
+#define CQSPI_REG_SIZE_BLOCK_LSB		16
+#define CQSPI_REG_SIZE_ADDRESS_MASK		0xF
+#define CQSPI_REG_SIZE_PAGE_MASK		0xFFF
+#define CQSPI_REG_SIZE_BLOCK_MASK		0x3F
+
+#define CQSPI_REG_SRAMPARTITION			0x18
+#define CQSPI_REG_INDIRECTTRIGGER		0x1C
+
+#define CQSPI_REG_DMA				0x20
+#define CQSPI_REG_DMA_SINGLE_LSB		0
+#define CQSPI_REG_DMA_BURST_LSB			8
+#define CQSPI_REG_DMA_SINGLE_MASK		0xFF
+#define CQSPI_REG_DMA_BURST_MASK		0xFF
+
+#define CQSPI_REG_REMAP				0x24
+#define CQSPI_REG_MODE_BIT			0x28
+
+#define CQSPI_REG_SDRAMLEVEL			0x2C
+#define CQSPI_REG_SDRAMLEVEL_RD_LSB		0
+#define CQSPI_REG_SDRAMLEVEL_WR_LSB		16
+#define CQSPI_REG_SDRAMLEVEL_RD_MASK		0xFFFF
+#define CQSPI_REG_SDRAMLEVEL_WR_MASK		0xFFFF
+
+#define CQSPI_REG_IRQSTATUS			0x40
+#define CQSPI_REG_IRQMASK			0x44
+
+#define CQSPI_REG_INDIRECTRD			0x60
+#define CQSPI_REG_INDIRECTRD_START_MASK		BIT(0)
+#define CQSPI_REG_INDIRECTRD_CANCEL_MASK	BIT(1)
+#define CQSPI_REG_INDIRECTRD_DONE_MASK		BIT(5)
+
+#define CQSPI_REG_INDIRECTRDWATERMARK		0x64
+#define CQSPI_REG_INDIRECTRDSTARTADDR		0x68
+#define CQSPI_REG_INDIRECTRDBYTES		0x6C
+
+#define CQSPI_REG_CMDCTRL			0x90
+#define CQSPI_REG_CMDCTRL_EXECUTE_MASK		BIT(0)
+#define CQSPI_REG_CMDCTRL_INPROGRESS_MASK	BIT(1)
+#define CQSPI_REG_CMDCTRL_WR_BYTES_LSB		12
+#define CQSPI_REG_CMDCTRL_WR_EN_LSB		15
+#define CQSPI_REG_CMDCTRL_ADD_BYTES_LSB		16
+#define CQSPI_REG_CMDCTRL_ADDR_EN_LSB		19
+#define CQSPI_REG_CMDCTRL_RD_BYTES_LSB		20
+#define CQSPI_REG_CMDCTRL_RD_EN_LSB		23
+#define CQSPI_REG_CMDCTRL_OPCODE_LSB		24
+#define CQSPI_REG_CMDCTRL_WR_BYTES_MASK		0x7
+#define CQSPI_REG_CMDCTRL_ADD_BYTES_MASK	0x3
+#define CQSPI_REG_CMDCTRL_RD_BYTES_MASK		0x7
+
+#define CQSPI_REG_INDIRECTWR			0x70
+#define CQSPI_REG_INDIRECTWR_START_MASK		BIT(0)
+#define CQSPI_REG_INDIRECTWR_CANCEL_MASK	BIT(1)
+#define CQSPI_REG_INDIRECTWR_DONE_MASK		BIT(5)
+
+#define CQSPI_REG_INDIRECTWRWATERMARK		0x74
+#define CQSPI_REG_INDIRECTWRSTARTADDR		0x78
+#define CQSPI_REG_INDIRECTWRBYTES		0x7C
+
+#define CQSPI_REG_CMDADDRESS			0x94
+#define CQSPI_REG_CMDREADDATALOWER		0xA0
+#define CQSPI_REG_CMDREADDATAUPPER		0xA4
+#define CQSPI_REG_CMDWRITEDATALOWER		0xA8
+#define CQSPI_REG_CMDWRITEDATAUPPER		0xAC
+
+/* Interrupt status bits */
+#define CQSPI_REG_IRQ_MODE_ERR			BIT(0)
+#define CQSPI_REG_IRQ_UNDERFLOW			BIT(1)
+#define CQSPI_REG_IRQ_IND_COMP			BIT(2)
+#define CQSPI_REG_IRQ_IND_RD_REJECT		BIT(3)
+#define CQSPI_REG_IRQ_WR_PROTECTED_ERR		BIT(4)
+#define CQSPI_REG_IRQ_ILLEGAL_AHB_ERR		BIT(5)
+#define CQSPI_REG_IRQ_WATERMARK			BIT(6)
+#define CQSPI_REG_IRQ_IND_SRAM_FULL		BIT(12)
+
+#define CQSPI_IRQ_MASK_RD		(CQSPI_REG_IRQ_WATERMARK	| \
+					 CQSPI_REG_IRQ_IND_SRAM_FULL	| \
+					 CQSPI_REG_IRQ_IND_COMP)
+
+#define CQSPI_IRQ_MASK_WR		(CQSPI_REG_IRQ_IND_COMP		| \
+					 CQSPI_REG_IRQ_WATERMARK	| \
+					 CQSPI_REG_IRQ_UNDERFLOW)
+
+#define CQSPI_IRQ_STATUS_MASK		0x1FFFF
+
+static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clr)
+{
+	u32 val;
+
+	return readl_relaxed_poll_timeout(reg, val,
+					  (((clr ? ~val : val) & mask) == mask),
+					  10, CQSPI_TIMEOUT_MS * 1000);
+}
+
+static bool cqspi_is_idle(struct cqspi_st *cqspi)
+{
+	u32 reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+
+	return reg & (1 << CQSPI_REG_CONFIG_IDLE_LSB);
+}
+
+static u32 cqspi_get_rd_sram_level(struct cqspi_st *cqspi)
+{
+	u32 reg = readl(cqspi->iobase + CQSPI_REG_SDRAMLEVEL);
+
+	reg >>= CQSPI_REG_SDRAMLEVEL_RD_LSB;
+	return reg & CQSPI_REG_SDRAMLEVEL_RD_MASK;
+}
+
+static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
+{
+	struct cqspi_st *cqspi = dev;
+	unsigned int irq_status;
+
+	/* Read interrupt status */
+	irq_status = readl(cqspi->iobase + CQSPI_REG_IRQSTATUS);
+
+	/* Clear interrupt */
+	writel(irq_status, cqspi->iobase + CQSPI_REG_IRQSTATUS);
+
+	irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR;
+
+	if (irq_status)
+		complete(&cqspi->transfer_complete);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int cqspi_calc_rdreg(struct cqspi_flash_pdata *f_pdata)
+{
+	u32 rdreg = 0;
+
+	rdreg |= f_pdata->inst_width << CQSPI_REG_RD_INSTR_TYPE_INSTR_LSB;
+	rdreg |= f_pdata->addr_width << CQSPI_REG_RD_INSTR_TYPE_ADDR_LSB;
+	rdreg |= f_pdata->data_width << CQSPI_REG_RD_INSTR_TYPE_DATA_LSB;
+
+	return rdreg;
+}
+
+static int cqspi_wait_idle(struct cqspi_st *cqspi)
+{
+	const unsigned int poll_idle_retry = 3;
+	unsigned int count = 0;
+	unsigned long timeout;
+
+	timeout = jiffies + msecs_to_jiffies(CQSPI_TIMEOUT_MS);
+	while (1) {
+		/*
+		 * Read few times in succession to ensure the controller
+		 * is indeed idle, that is, the bit does not transition
+		 * low again.
+		 */
+		if (cqspi_is_idle(cqspi))
+			count++;
+		else
+			count = 0;
+
+		if (count >= poll_idle_retry)
+			return 0;
+
+		if (time_after(jiffies, timeout)) {
+			/* Timeout, in busy mode. */
+			dev_err(&cqspi->pdev->dev,
+				"QSPI is still busy after %dms timeout.\n",
+				CQSPI_TIMEOUT_MS);
+			return -ETIMEDOUT;
+		}
+
+		cpu_relax();
+	}
+}
+
+static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg)
+{
+	void __iomem *reg_base = cqspi->iobase;
+	int ret;
+
+	/* Write the CMDCTRL without start execution. */
+	writel(reg, reg_base + CQSPI_REG_CMDCTRL);
+	/* Start execute */
+	reg |= CQSPI_REG_CMDCTRL_EXECUTE_MASK;
+	writel(reg, reg_base + CQSPI_REG_CMDCTRL);
+
+	/* Polling for completion. */
+	ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_CMDCTRL,
+				 CQSPI_REG_CMDCTRL_INPROGRESS_MASK, 1);
+	if (ret) {
+		dev_err(&cqspi->pdev->dev,
+			"Flash command execution timed out.\n");
+		return ret;
+	}
+
+	/* Polling QSPI idle status. */
+	return cqspi_wait_idle(cqspi);
+}
+
+static int cqspi_command_read(struct cqspi_flash_pdata *f_pdata,
+			      const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *reg_base = cqspi->iobase;
+	u8 *rxbuf = op->data.buf.in;
+	u8 opcode = op->cmd.opcode;
+	size_t n_rx = op->data.nbytes;
+	unsigned int rdreg;
+	unsigned int reg;
+	size_t read_len;
+	int status;
+
+	if (!n_rx || n_rx > CQSPI_STIG_DATA_LEN_MAX || !rxbuf) {
+		dev_err(&cqspi->pdev->dev,
+			"Invalid input argument, len %zu rxbuf 0x%p\n",
+			n_rx, rxbuf);
+		return -EINVAL;
+	}
+
+	reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+
+	rdreg = cqspi_calc_rdreg(f_pdata);
+	writel(rdreg, reg_base + CQSPI_REG_RD_INSTR);
+
+	reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
+
+	/* 0 means 1 byte. */
+	reg |= (((n_rx - 1) & CQSPI_REG_CMDCTRL_RD_BYTES_MASK)
+		<< CQSPI_REG_CMDCTRL_RD_BYTES_LSB);
+	status = cqspi_exec_flash_cmd(cqspi, reg);
+	if (status)
+		return status;
+
+	reg = readl(reg_base + CQSPI_REG_CMDREADDATALOWER);
+
+	/* Put the read value into rx_buf */
+	read_len = (n_rx > 4) ? 4 : n_rx;
+	memcpy(rxbuf, &reg, read_len);
+	rxbuf += read_len;
+
+	if (n_rx > 4) {
+		reg = readl(reg_base + CQSPI_REG_CMDREADDATAUPPER);
+
+		read_len = n_rx - read_len;
+		memcpy(rxbuf, &reg, read_len);
+	}
+
+	return 0;
+}
+
+static int cqspi_command_write(struct cqspi_flash_pdata *f_pdata,
+			       const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *reg_base = cqspi->iobase;
+	const u8 opcode = op->cmd.opcode;
+	const u8 *txbuf = op->data.buf.out;
+	size_t n_tx = op->data.nbytes;
+	unsigned int reg;
+	unsigned int data;
+	size_t write_len;
+
+	if (n_tx > CQSPI_STIG_DATA_LEN_MAX || (n_tx && !txbuf)) {
+		dev_err(&cqspi->pdev->dev,
+			"Invalid input argument, cmdlen %zu txbuf 0x%p\n",
+			n_tx, txbuf);
+		return -EINVAL;
+	}
+
+	reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+
+	if (op->addr.nbytes) {
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_ADDR_EN_LSB);
+		reg |= ((op->addr.nbytes - 1) &
+			CQSPI_REG_CMDCTRL_ADD_BYTES_MASK)
+			<< CQSPI_REG_CMDCTRL_ADD_BYTES_LSB;
+
+		writel(op->addr.val, reg_base + CQSPI_REG_CMDADDRESS);
+	}
+
+	if (n_tx) {
+		reg |= (0x1 << CQSPI_REG_CMDCTRL_WR_EN_LSB);
+		reg |= ((n_tx - 1) & CQSPI_REG_CMDCTRL_WR_BYTES_MASK)
+			<< CQSPI_REG_CMDCTRL_WR_BYTES_LSB;
+		data = 0;
+		write_len = (n_tx > 4) ? 4 : n_tx;
+		memcpy(&data, txbuf, write_len);
+		txbuf += write_len;
+		writel(data, reg_base + CQSPI_REG_CMDWRITEDATALOWER);
+
+		if (n_tx > 4) {
+			data = 0;
+			write_len = n_tx - 4;
+			memcpy(&data, txbuf, write_len);
+			writel(data, reg_base + CQSPI_REG_CMDWRITEDATAUPPER);
+		}
+	}
+
+	return cqspi_exec_flash_cmd(cqspi, reg);
+}
+
+static int cqspi_read_setup(struct cqspi_flash_pdata *f_pdata,
+			    const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *reg_base = cqspi->iobase;
+	unsigned int dummy_clk = 0;
+	unsigned int reg;
+
+	reg = op->cmd.opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB;
+	reg |= cqspi_calc_rdreg(f_pdata);
+
+	/* Setup dummy clock cycles */
+	dummy_clk = op->dummy.nbytes * 8;
+	if (dummy_clk > CQSPI_DUMMY_CLKS_MAX)
+		return -EOPNOTSUPP;
+
+	if (dummy_clk)
+		reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK)
+		       << CQSPI_REG_RD_INSTR_DUMMY_LSB;
+
+	writel(reg, reg_base + CQSPI_REG_RD_INSTR);
+
+	/* Set address width */
+	reg = readl(reg_base + CQSPI_REG_SIZE);
+	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
+	reg |= (op->addr.nbytes - 1);
+	writel(reg, reg_base + CQSPI_REG_SIZE);
+	return 0;
+}
+
+static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
+				       u8 *rxbuf, loff_t from_addr,
+				       const size_t n_rx)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	struct device *dev = &cqspi->pdev->dev;
+	void __iomem *reg_base = cqspi->iobase;
+	void __iomem *ahb_base = cqspi->ahb_base;
+	unsigned int remaining = n_rx;
+	unsigned int mod_bytes = n_rx % 4;
+	unsigned int bytes_to_read = 0;
+	u8 *rxbuf_end = rxbuf + n_rx;
+	int ret = 0;
+
+	writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
+	writel(remaining, reg_base + CQSPI_REG_INDIRECTRDBYTES);
+
+	/* Clear all interrupts. */
+	writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
+
+	writel(CQSPI_IRQ_MASK_RD, reg_base + CQSPI_REG_IRQMASK);
+
+	reinit_completion(&cqspi->transfer_complete);
+	writel(CQSPI_REG_INDIRECTRD_START_MASK,
+	       reg_base + CQSPI_REG_INDIRECTRD);
+
+	while (remaining > 0) {
+		if (!wait_for_completion_timeout(&cqspi->transfer_complete,
+						 msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS)))
+			ret = -ETIMEDOUT;
+
+		bytes_to_read = cqspi_get_rd_sram_level(cqspi);
+
+		if (ret && bytes_to_read == 0) {
+			dev_err(dev, "Indirect read timeout, no bytes\n");
+			goto failrd;
+		}
+
+		while (bytes_to_read != 0) {
+			unsigned int word_remain = round_down(remaining, 4);
+
+			bytes_to_read *= cqspi->fifo_width;
+			bytes_to_read = bytes_to_read > remaining ?
+					remaining : bytes_to_read;
+			bytes_to_read = round_down(bytes_to_read, 4);
+			/* Read 4 byte word chunks then single bytes */
+			if (bytes_to_read) {
+				ioread32_rep(ahb_base, rxbuf,
+					     (bytes_to_read / 4));
+			} else if (!word_remain && mod_bytes) {
+				unsigned int temp = ioread32(ahb_base);
+
+				bytes_to_read = mod_bytes;
+				memcpy(rxbuf, &temp, min((unsigned int)
+							 (rxbuf_end - rxbuf),
+							 bytes_to_read));
+			}
+			rxbuf += bytes_to_read;
+			remaining -= bytes_to_read;
+			bytes_to_read = cqspi_get_rd_sram_level(cqspi);
+		}
+
+		if (remaining > 0)
+			reinit_completion(&cqspi->transfer_complete);
+	}
+
+	/* Check indirect done status */
+	ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTRD,
+				 CQSPI_REG_INDIRECTRD_DONE_MASK, 0);
+	if (ret) {
+		dev_err(dev, "Indirect read completion error (%i)\n", ret);
+		goto failrd;
+	}
+
+	/* Disable interrupt */
+	writel(0, reg_base + CQSPI_REG_IRQMASK);
+
+	/* Clear indirect completion status */
+	writel(CQSPI_REG_INDIRECTRD_DONE_MASK, reg_base + CQSPI_REG_INDIRECTRD);
+
+	return 0;
+
+failrd:
+	/* Disable interrupt */
+	writel(0, reg_base + CQSPI_REG_IRQMASK);
+
+	/* Cancel the indirect read */
+	writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
+	       reg_base + CQSPI_REG_INDIRECTRD);
+	return ret;
+}
+
+static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata,
+			     const struct spi_mem_op *op)
+{
+	unsigned int reg;
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *reg_base = cqspi->iobase;
+
+	/* Set opcode. */
+	reg = op->cmd.opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB;
+	writel(reg, reg_base + CQSPI_REG_WR_INSTR);
+	reg = cqspi_calc_rdreg(f_pdata);
+	writel(reg, reg_base + CQSPI_REG_RD_INSTR);
+
+	reg = readl(reg_base + CQSPI_REG_SIZE);
+	reg &= ~CQSPI_REG_SIZE_ADDRESS_MASK;
+	reg |= (op->addr.nbytes - 1);
+	writel(reg, reg_base + CQSPI_REG_SIZE);
+	return 0;
+}
+
+static int cqspi_indirect_write_execute(struct cqspi_flash_pdata *f_pdata,
+					loff_t to_addr, const u8 *txbuf,
+					const size_t n_tx)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	struct device *dev = &cqspi->pdev->dev;
+	void __iomem *reg_base = cqspi->iobase;
+	unsigned int remaining = n_tx;
+	unsigned int write_bytes;
+	int ret;
+
+	writel(to_addr, reg_base + CQSPI_REG_INDIRECTWRSTARTADDR);
+	writel(remaining, reg_base + CQSPI_REG_INDIRECTWRBYTES);
+
+	/* Clear all interrupts. */
+	writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
+
+	writel(CQSPI_IRQ_MASK_WR, reg_base + CQSPI_REG_IRQMASK);
+
+	reinit_completion(&cqspi->transfer_complete);
+	writel(CQSPI_REG_INDIRECTWR_START_MASK,
+	       reg_base + CQSPI_REG_INDIRECTWR);
+	/*
+	 * As per 66AK2G02 TRM SPRUHY8F section 11.15.5.3 Indirect Access
+	 * Controller programming sequence, couple of cycles of
+	 * QSPI_REF_CLK delay is required for the above bit to
+	 * be internally synchronized by the QSPI module. Provide 5
+	 * cycles of delay.
+	 */
+	if (cqspi->wr_delay)
+		ndelay(cqspi->wr_delay);
+
+	while (remaining > 0) {
+		size_t write_words, mod_bytes;
+
+		write_bytes = remaining;
+		write_words = write_bytes / 4;
+		mod_bytes = write_bytes % 4;
+		/* Write 4 bytes at a time then single bytes. */
+		if (write_words) {
+			iowrite32_rep(cqspi->ahb_base, txbuf, write_words);
+			txbuf += (write_words * 4);
+		}
+		if (mod_bytes) {
+			unsigned int temp = 0xFFFFFFFF;
+
+			memcpy(&temp, txbuf, mod_bytes);
+			iowrite32(temp, cqspi->ahb_base);
+			txbuf += mod_bytes;
+		}
+
+		if (!wait_for_completion_timeout(&cqspi->transfer_complete,
+						 msecs_to_jiffies(CQSPI_TIMEOUT_MS))) {
+			dev_err(dev, "Indirect write timeout\n");
+			ret = -ETIMEDOUT;
+			goto failwr;
+		}
+
+		remaining -= write_bytes;
+
+		if (remaining > 0)
+			reinit_completion(&cqspi->transfer_complete);
+	}
+
+	/* Check indirect done status */
+	ret = cqspi_wait_for_bit(reg_base + CQSPI_REG_INDIRECTWR,
+				 CQSPI_REG_INDIRECTWR_DONE_MASK, 0);
+	if (ret) {
+		dev_err(dev, "Indirect write completion error (%i)\n", ret);
+		goto failwr;
+	}
+
+	/* Disable interrupt. */
+	writel(0, reg_base + CQSPI_REG_IRQMASK);
+
+	/* Clear indirect completion status */
+	writel(CQSPI_REG_INDIRECTWR_DONE_MASK, reg_base + CQSPI_REG_INDIRECTWR);
+
+	cqspi_wait_idle(cqspi);
+
+	return 0;
+
+failwr:
+	/* Disable interrupt. */
+	writel(0, reg_base + CQSPI_REG_IRQMASK);
+
+	/* Cancel the indirect write */
+	writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
+	       reg_base + CQSPI_REG_INDIRECTWR);
+	return ret;
+}
+
+static void cqspi_chipselect(struct cqspi_flash_pdata *f_pdata)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *reg_base = cqspi->iobase;
+	unsigned int chip_select = f_pdata->cs;
+	unsigned int reg;
+
+	reg = readl(reg_base + CQSPI_REG_CONFIG);
+	if (cqspi->is_decoded_cs) {
+		reg |= CQSPI_REG_CONFIG_DECODE_MASK;
+	} else {
+		reg &= ~CQSPI_REG_CONFIG_DECODE_MASK;
+
+		/* Convert CS if without decoder.
+		 * CS0 to 4b'1110
+		 * CS1 to 4b'1101
+		 * CS2 to 4b'1011
+		 * CS3 to 4b'0111
+		 */
+		chip_select = 0xF & ~(1 << chip_select);
+	}
+
+	reg &= ~(CQSPI_REG_CONFIG_CHIPSELECT_MASK
+		 << CQSPI_REG_CONFIG_CHIPSELECT_LSB);
+	reg |= (chip_select & CQSPI_REG_CONFIG_CHIPSELECT_MASK)
+	    << CQSPI_REG_CONFIG_CHIPSELECT_LSB;
+	writel(reg, reg_base + CQSPI_REG_CONFIG);
+}
+
+static unsigned int calculate_ticks_for_ns(const unsigned int ref_clk_hz,
+					   const unsigned int ns_val)
+{
+	unsigned int ticks;
+
+	ticks = ref_clk_hz / 1000;	/* kHz */
+	ticks = DIV_ROUND_UP(ticks * ns_val, 1000000);
+
+	return ticks;
+}
+
+static void cqspi_delay(struct cqspi_flash_pdata *f_pdata)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	void __iomem *iobase = cqspi->iobase;
+	const unsigned int ref_clk_hz = cqspi->master_ref_clk_hz;
+	unsigned int tshsl, tchsh, tslch, tsd2d;
+	unsigned int reg;
+	unsigned int tsclk;
+
+	/* calculate the number of ref ticks for one sclk tick */
+	tsclk = DIV_ROUND_UP(ref_clk_hz, cqspi->sclk);
+
+	tshsl = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tshsl_ns);
+	/* this particular value must be at least one sclk */
+	if (tshsl < tsclk)
+		tshsl = tsclk;
+
+	tchsh = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tchsh_ns);
+	tslch = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tslch_ns);
+	tsd2d = calculate_ticks_for_ns(ref_clk_hz, f_pdata->tsd2d_ns);
+
+	reg = (tshsl & CQSPI_REG_DELAY_TSHSL_MASK)
+	       << CQSPI_REG_DELAY_TSHSL_LSB;
+	reg |= (tchsh & CQSPI_REG_DELAY_TCHSH_MASK)
+		<< CQSPI_REG_DELAY_TCHSH_LSB;
+	reg |= (tslch & CQSPI_REG_DELAY_TSLCH_MASK)
+		<< CQSPI_REG_DELAY_TSLCH_LSB;
+	reg |= (tsd2d & CQSPI_REG_DELAY_TSD2D_MASK)
+		<< CQSPI_REG_DELAY_TSD2D_LSB;
+	writel(reg, iobase + CQSPI_REG_DELAY);
+}
+
+static void cqspi_config_baudrate_div(struct cqspi_st *cqspi)
+{
+	const unsigned int ref_clk_hz = cqspi->master_ref_clk_hz;
+	void __iomem *reg_base = cqspi->iobase;
+	u32 reg, div;
+
+	/* Recalculate the baudrate divisor based on QSPI specification. */
+	div = DIV_ROUND_UP(ref_clk_hz, 2 * cqspi->sclk) - 1;
+
+	reg = readl(reg_base + CQSPI_REG_CONFIG);
+	reg &= ~(CQSPI_REG_CONFIG_BAUD_MASK << CQSPI_REG_CONFIG_BAUD_LSB);
+	reg |= (div & CQSPI_REG_CONFIG_BAUD_MASK) << CQSPI_REG_CONFIG_BAUD_LSB;
+	writel(reg, reg_base + CQSPI_REG_CONFIG);
+}
+
+static void cqspi_readdata_capture(struct cqspi_st *cqspi,
+				   const bool bypass,
+				   const unsigned int delay)
+{
+	void __iomem *reg_base = cqspi->iobase;
+	unsigned int reg;
+
+	reg = readl(reg_base + CQSPI_REG_READCAPTURE);
+
+	if (bypass)
+		reg |= (1 << CQSPI_REG_READCAPTURE_BYPASS_LSB);
+	else
+		reg &= ~(1 << CQSPI_REG_READCAPTURE_BYPASS_LSB);
+
+	reg &= ~(CQSPI_REG_READCAPTURE_DELAY_MASK
+		 << CQSPI_REG_READCAPTURE_DELAY_LSB);
+
+	reg |= (delay & CQSPI_REG_READCAPTURE_DELAY_MASK)
+		<< CQSPI_REG_READCAPTURE_DELAY_LSB;
+
+	writel(reg, reg_base + CQSPI_REG_READCAPTURE);
+}
+
+static void cqspi_controller_enable(struct cqspi_st *cqspi, bool enable)
+{
+	void __iomem *reg_base = cqspi->iobase;
+	unsigned int reg;
+
+	reg = readl(reg_base + CQSPI_REG_CONFIG);
+
+	if (enable)
+		reg |= CQSPI_REG_CONFIG_ENABLE_MASK;
+	else
+		reg &= ~CQSPI_REG_CONFIG_ENABLE_MASK;
+
+	writel(reg, reg_base + CQSPI_REG_CONFIG);
+}
+
+static void cqspi_configure(struct cqspi_flash_pdata *f_pdata,
+			    unsigned long sclk)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	int switch_cs = (cqspi->current_cs != f_pdata->cs);
+	int switch_ck = (cqspi->sclk != sclk);
+
+	if (switch_cs || switch_ck)
+		cqspi_controller_enable(cqspi, 0);
+
+	/* Switch chip select. */
+	if (switch_cs) {
+		cqspi->current_cs = f_pdata->cs;
+		cqspi_chipselect(f_pdata);
+	}
+
+	/* Setup baudrate divisor and delays */
+	if (switch_ck) {
+		cqspi->sclk = sclk;
+		cqspi_config_baudrate_div(cqspi);
+		cqspi_delay(f_pdata);
+		cqspi_readdata_capture(cqspi, !cqspi->rclk_en,
+				       f_pdata->read_delay);
+	}
+
+	if (switch_cs || switch_ck)
+		cqspi_controller_enable(cqspi, 1);
+}
+
+static int cqspi_set_protocol(struct cqspi_flash_pdata *f_pdata,
+			      const struct spi_mem_op *op)
+{
+	f_pdata->inst_width = CQSPI_INST_TYPE_SINGLE;
+	f_pdata->addr_width = CQSPI_INST_TYPE_SINGLE;
+	f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		switch (op->data.buswidth) {
+		case 1:
+			f_pdata->data_width = CQSPI_INST_TYPE_SINGLE;
+			break;
+		case 2:
+			f_pdata->data_width = CQSPI_INST_TYPE_DUAL;
+			break;
+		case 4:
+			f_pdata->data_width = CQSPI_INST_TYPE_QUAD;
+			break;
+		case 8:
+			f_pdata->data_width = CQSPI_INST_TYPE_OCTAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static ssize_t cqspi_write(struct cqspi_flash_pdata *f_pdata,
+			   const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	loff_t to = op->addr.val;
+	size_t len = op->data.nbytes;
+	const u_char *buf = op->data.buf.out;
+	int ret;
+
+	ret = cqspi_set_protocol(f_pdata, op);
+	if (ret)
+		return ret;
+
+	ret = cqspi_write_setup(f_pdata, op);
+	if (ret)
+		return ret;
+
+	if (cqspi->use_direct_mode && ((to + len) <= cqspi->ahb_size)) {
+		memcpy_toio(cqspi->ahb_base + to, buf, len);
+		return cqspi_wait_idle(cqspi);
+	}
+
+	return cqspi_indirect_write_execute(f_pdata, to, buf, len);
+}
+
+static void cqspi_rx_dma_callback(void *param)
+{
+	struct cqspi_st *cqspi = param;
+
+	complete(&cqspi->rx_dma_complete);
+}
+
+static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata,
+				     u_char *buf, loff_t from, size_t len)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	struct device *dev = &cqspi->pdev->dev;
+	enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+	dma_addr_t dma_src = (dma_addr_t)cqspi->mmap_phys_base + from;
+	int ret = 0;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	dma_addr_t dma_dst;
+	struct device *ddev;
+
+	if (!cqspi->rx_chan || !virt_addr_valid(buf)) {
+		memcpy_fromio(buf, cqspi->ahb_base + from, len);
+		return 0;
+	}
+
+	ddev = cqspi->rx_chan->device->dev;
+	dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE);
+	if (dma_mapping_error(ddev, dma_dst)) {
+		dev_err(dev, "dma mapping failed\n");
+		return -ENOMEM;
+	}
+	tx = dmaengine_prep_dma_memcpy(cqspi->rx_chan, dma_dst, dma_src,
+				       len, flags);
+	if (!tx) {
+		dev_err(dev, "device_prep_dma_memcpy error\n");
+		ret = -EIO;
+		goto err_unmap;
+	}
+
+	tx->callback = cqspi_rx_dma_callback;
+	tx->callback_param = cqspi;
+	cookie = tx->tx_submit(tx);
+	reinit_completion(&cqspi->rx_dma_complete);
+
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dev_err(dev, "dma_submit_error %d\n", cookie);
+		ret = -EIO;
+		goto err_unmap;
+	}
+
+	dma_async_issue_pending(cqspi->rx_chan);
+	if (!wait_for_completion_timeout(&cqspi->rx_dma_complete,
+					 msecs_to_jiffies(len))) {
+		dmaengine_terminate_sync(cqspi->rx_chan);
+		dev_err(dev, "DMA wait_for_completion_timeout\n");
+		ret = -ETIMEDOUT;
+		goto err_unmap;
+	}
+
+err_unmap:
+	dma_unmap_single(ddev, dma_dst, len, DMA_FROM_DEVICE);
+
+	return ret;
+}
+
+static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
+			  const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = f_pdata->cqspi;
+	loff_t from = op->addr.val;
+	size_t len = op->data.nbytes;
+	u_char *buf = op->data.buf.in;
+	int ret;
+
+	ret = cqspi_set_protocol(f_pdata, op);
+	if (ret)
+		return ret;
+
+	ret = cqspi_read_setup(f_pdata, op);
+	if (ret)
+		return ret;
+
+	if (cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size))
+		return cqspi_direct_read_execute(f_pdata, buf, from, len);
+
+	return cqspi_indirect_read_execute(f_pdata, buf, from, len);
+}
+
+static int cqspi_mem_process(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct cqspi_st *cqspi = spi_master_get_devdata(mem->spi->master);
+	struct cqspi_flash_pdata *f_pdata;
+
+	f_pdata = &cqspi->f_pdata[mem->spi->chip_select];
+	cqspi_configure(f_pdata, mem->spi->max_speed_hz);
+
+	if (op->data.dir == SPI_MEM_DATA_IN && op->data.buf.in) {
+		if (!op->addr.nbytes)
+			return cqspi_command_read(f_pdata, op);
+
+		return cqspi_read(f_pdata, op);
+	}
+
+	if (!op->addr.nbytes || !op->data.buf.out)
+		return cqspi_command_write(f_pdata, op);
+
+	return cqspi_write(f_pdata, op);
+}
+
+static int cqspi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	int ret;
+
+	ret = cqspi_mem_process(mem, op);
+	if (ret)
+		dev_err(&mem->spi->dev, "operation failed with %d\n", ret);
+
+	return ret;
+}
+
+static int cqspi_of_get_flash_pdata(struct platform_device *pdev,
+				    struct cqspi_flash_pdata *f_pdata,
+				    struct device_node *np)
+{
+	if (of_property_read_u32(np, "cdns,read-delay", &f_pdata->read_delay)) {
+		dev_err(&pdev->dev, "couldn't determine read-delay\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,tshsl-ns", &f_pdata->tshsl_ns)) {
+		dev_err(&pdev->dev, "couldn't determine tshsl-ns\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,tsd2d-ns", &f_pdata->tsd2d_ns)) {
+		dev_err(&pdev->dev, "couldn't determine tsd2d-ns\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,tchsh-ns", &f_pdata->tchsh_ns)) {
+		dev_err(&pdev->dev, "couldn't determine tchsh-ns\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,tslch-ns", &f_pdata->tslch_ns)) {
+		dev_err(&pdev->dev, "couldn't determine tslch-ns\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "spi-max-frequency", &f_pdata->clk_rate)) {
+		dev_err(&pdev->dev, "couldn't determine spi-max-frequency\n");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int cqspi_of_get_pdata(struct cqspi_st *cqspi)
+{
+	struct device *dev = &cqspi->pdev->dev;
+	struct device_node *np = dev->of_node;
+
+	cqspi->is_decoded_cs = of_property_read_bool(np, "cdns,is-decoded-cs");
+
+	if (of_property_read_u32(np, "cdns,fifo-depth", &cqspi->fifo_depth)) {
+		dev_err(dev, "couldn't determine fifo-depth\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,fifo-width", &cqspi->fifo_width)) {
+		dev_err(dev, "couldn't determine fifo-width\n");
+		return -ENXIO;
+	}
+
+	if (of_property_read_u32(np, "cdns,trigger-address",
+				 &cqspi->trigger_address)) {
+		dev_err(dev, "couldn't determine trigger-address\n");
+		return -ENXIO;
+	}
+
+	cqspi->rclk_en = of_property_read_bool(np, "cdns,rclk-en");
+
+	return 0;
+}
+
+static void cqspi_controller_init(struct cqspi_st *cqspi)
+{
+	u32 reg;
+
+	cqspi_controller_enable(cqspi, 0);
+
+	/* Configure the remap address register, no remap */
+	writel(0, cqspi->iobase + CQSPI_REG_REMAP);
+
+	/* Disable all interrupts. */
+	writel(0, cqspi->iobase + CQSPI_REG_IRQMASK);
+
+	/* Configure the SRAM split to 1:1 . */
+	writel(cqspi->fifo_depth / 2, cqspi->iobase + CQSPI_REG_SRAMPARTITION);
+
+	/* Load indirect trigger address. */
+	writel(cqspi->trigger_address,
+	       cqspi->iobase + CQSPI_REG_INDIRECTTRIGGER);
+
+	/* Program read watermark -- 1/2 of the FIFO. */
+	writel(cqspi->fifo_depth * cqspi->fifo_width / 2,
+	       cqspi->iobase + CQSPI_REG_INDIRECTRDWATERMARK);
+	/* Program write watermark -- 1/8 of the FIFO. */
+	writel(cqspi->fifo_depth * cqspi->fifo_width / 8,
+	       cqspi->iobase + CQSPI_REG_INDIRECTWRWATERMARK);
+
+	/* Enable Direct Access Controller */
+	reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+	reg |= CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL;
+	writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
+
+	cqspi_controller_enable(cqspi, 1);
+}
+
+static int cqspi_request_mmap_dma(struct cqspi_st *cqspi)
+{
+	dma_cap_mask_t mask;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
+
+	cqspi->rx_chan = dma_request_chan_by_mask(&mask);
+	if (IS_ERR(cqspi->rx_chan)) {
+		int ret = PTR_ERR(cqspi->rx_chan);
+		cqspi->rx_chan = NULL;
+		return dev_err_probe(&cqspi->pdev->dev, ret, "No Rx DMA available\n");
+	}
+	init_completion(&cqspi->rx_dma_complete);
+
+	return 0;
+}
+
+static const char *cqspi_get_name(struct spi_mem *mem)
+{
+	struct cqspi_st *cqspi = spi_master_get_devdata(mem->spi->master);
+	struct device *dev = &cqspi->pdev->dev;
+
+	return devm_kasprintf(dev, GFP_KERNEL, "%s.%d", dev_name(dev), mem->spi->chip_select);
+}
+
+static const struct spi_controller_mem_ops cqspi_mem_ops = {
+	.exec_op = cqspi_exec_mem_op,
+	.get_name = cqspi_get_name,
+};
+
+static int cqspi_setup_flash(struct cqspi_st *cqspi)
+{
+	struct platform_device *pdev = cqspi->pdev;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct cqspi_flash_pdata *f_pdata;
+	unsigned int cs;
+	int ret;
+
+	/* Get flash device data */
+	for_each_available_child_of_node(dev->of_node, np) {
+		ret = of_property_read_u32(np, "reg", &cs);
+		if (ret) {
+			dev_err(dev, "Couldn't determine chip select.\n");
+			return ret;
+		}
+
+		if (cs >= CQSPI_MAX_CHIPSELECT) {
+			dev_err(dev, "Chip select %d out of range.\n", cs);
+			return -EINVAL;
+		}
+
+		f_pdata = &cqspi->f_pdata[cs];
+		f_pdata->cqspi = cqspi;
+		f_pdata->cs = cs;
+
+		ret = cqspi_of_get_flash_pdata(pdev, f_pdata, np);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int cqspi_probe(struct platform_device *pdev)
+{
+	const struct cqspi_driver_platdata *ddata;
+	struct reset_control *rstc, *rstc_ocp;
+	struct device *dev = &pdev->dev;
+	struct spi_master *master;
+	struct resource *res_ahb;
+	struct cqspi_st *cqspi;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(*cqspi));
+	if (!master) {
+		dev_err(&pdev->dev, "spi_alloc_master failed\n");
+		return -ENOMEM;
+	}
+	master->mode_bits = SPI_RX_QUAD | SPI_RX_DUAL;
+	master->mem_ops = &cqspi_mem_ops;
+	master->dev.of_node = pdev->dev.of_node;
+
+	cqspi = spi_master_get_devdata(master);
+
+	cqspi->pdev = pdev;
+	platform_set_drvdata(pdev, cqspi);
+
+	/* Obtain configuration from OF. */
+	ret = cqspi_of_get_pdata(cqspi);
+	if (ret) {
+		dev_err(dev, "Cannot get mandatory OF data.\n");
+		ret = -ENODEV;
+		goto probe_master_put;
+	}
+
+	/* Obtain QSPI clock. */
+	cqspi->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(cqspi->clk)) {
+		dev_err(dev, "Cannot claim QSPI clock.\n");
+		ret = PTR_ERR(cqspi->clk);
+		goto probe_master_put;
+	}
+
+	/* Obtain and remap controller address. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	cqspi->iobase = devm_ioremap_resource(dev, res);
+	if (IS_ERR(cqspi->iobase)) {
+		dev_err(dev, "Cannot remap controller address.\n");
+		ret = PTR_ERR(cqspi->iobase);
+		goto probe_master_put;
+	}
+
+	/* Obtain and remap AHB address. */
+	res_ahb = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	cqspi->ahb_base = devm_ioremap_resource(dev, res_ahb);
+	if (IS_ERR(cqspi->ahb_base)) {
+		dev_err(dev, "Cannot remap AHB address.\n");
+		ret = PTR_ERR(cqspi->ahb_base);
+		goto probe_master_put;
+	}
+	cqspi->mmap_phys_base = (dma_addr_t)res_ahb->start;
+	cqspi->ahb_size = resource_size(res_ahb);
+
+	init_completion(&cqspi->transfer_complete);
+
+	/* Obtain IRQ line. */
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = -ENXIO;
+		goto probe_master_put;
+	}
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		goto probe_master_put;
+	}
+
+	ret = clk_prepare_enable(cqspi->clk);
+	if (ret) {
+		dev_err(dev, "Cannot enable QSPI clock.\n");
+		goto probe_clk_failed;
+	}
+
+	/* Obtain QSPI reset control */
+	rstc = devm_reset_control_get_optional_exclusive(dev, "qspi");
+	if (IS_ERR(rstc)) {
+		ret = PTR_ERR(rstc);
+		dev_err(dev, "Cannot get QSPI reset.\n");
+		goto probe_reset_failed;
+	}
+
+	rstc_ocp = devm_reset_control_get_optional_exclusive(dev, "qspi-ocp");
+	if (IS_ERR(rstc_ocp)) {
+		ret = PTR_ERR(rstc_ocp);
+		dev_err(dev, "Cannot get QSPI OCP reset.\n");
+		goto probe_reset_failed;
+	}
+
+	reset_control_assert(rstc);
+	reset_control_deassert(rstc);
+
+	reset_control_assert(rstc_ocp);
+	reset_control_deassert(rstc_ocp);
+
+	cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk);
+	ddata  = of_device_get_match_data(dev);
+	if (ddata) {
+		if (ddata->quirks & CQSPI_NEEDS_WR_DELAY)
+			cqspi->wr_delay = 5 * DIV_ROUND_UP(NSEC_PER_SEC,
+						cqspi->master_ref_clk_hz);
+		if (ddata->hwcaps_mask & CQSPI_SUPPORTS_OCTAL)
+			master->mode_bits |= SPI_RX_OCTAL;
+		if (!(ddata->quirks & CQSPI_DISABLE_DAC_MODE))
+			cqspi->use_direct_mode = true;
+	}
+
+	ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0,
+			       pdev->name, cqspi);
+	if (ret) {
+		dev_err(dev, "Cannot request IRQ.\n");
+		goto probe_reset_failed;
+	}
+
+	cqspi_wait_idle(cqspi);
+	cqspi_controller_init(cqspi);
+	cqspi->current_cs = -1;
+	cqspi->sclk = 0;
+
+	ret = cqspi_setup_flash(cqspi);
+	if (ret) {
+		dev_err(dev, "failed to setup flash parameters %d\n", ret);
+		goto probe_setup_failed;
+	}
+
+	if (cqspi->use_direct_mode) {
+		ret = cqspi_request_mmap_dma(cqspi);
+		if (ret == -EPROBE_DEFER)
+			goto probe_setup_failed;
+	}
+
+	ret = devm_spi_register_master(dev, master);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register SPI ctlr %d\n", ret);
+		goto probe_setup_failed;
+	}
+
+	return 0;
+probe_setup_failed:
+	cqspi_controller_enable(cqspi, 0);
+probe_reset_failed:
+	clk_disable_unprepare(cqspi->clk);
+probe_clk_failed:
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+probe_master_put:
+	spi_master_put(master);
+	return ret;
+}
+
+static int cqspi_remove(struct platform_device *pdev)
+{
+	struct cqspi_st *cqspi = platform_get_drvdata(pdev);
+
+	cqspi_controller_enable(cqspi, 0);
+
+	if (cqspi->rx_chan)
+		dma_release_channel(cqspi->rx_chan);
+
+	clk_disable_unprepare(cqspi->clk);
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int cqspi_suspend(struct device *dev)
+{
+	struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+	cqspi_controller_enable(cqspi, 0);
+	return 0;
+}
+
+static int cqspi_resume(struct device *dev)
+{
+	struct cqspi_st *cqspi = dev_get_drvdata(dev);
+
+	cqspi_controller_enable(cqspi, 1);
+	return 0;
+}
+
+static const struct dev_pm_ops cqspi__dev_pm_ops = {
+	.suspend = cqspi_suspend,
+	.resume = cqspi_resume,
+};
+
+#define CQSPI_DEV_PM_OPS	(&cqspi__dev_pm_ops)
+#else
+#define CQSPI_DEV_PM_OPS	NULL
+#endif
+
+static const struct cqspi_driver_platdata cdns_qspi = {
+	.quirks = CQSPI_DISABLE_DAC_MODE,
+};
+
+static const struct cqspi_driver_platdata k2g_qspi = {
+	.quirks = CQSPI_NEEDS_WR_DELAY,
+};
+
+static const struct cqspi_driver_platdata am654_ospi = {
+	.hwcaps_mask = CQSPI_SUPPORTS_OCTAL,
+	.quirks = CQSPI_NEEDS_WR_DELAY,
+};
+
+static const struct of_device_id cqspi_dt_ids[] = {
+	{
+		.compatible = "cdns,qspi-nor",
+		.data = &cdns_qspi,
+	},
+	{
+		.compatible = "ti,k2g-qspi",
+		.data = &k2g_qspi,
+	},
+	{
+		.compatible = "ti,am654-ospi",
+		.data = &am654_ospi,
+	},
+	{ /* end of table */ }
+};
+
+MODULE_DEVICE_TABLE(of, cqspi_dt_ids);
+
+static struct platform_driver cqspi_platform_driver = {
+	.probe = cqspi_probe,
+	.remove = cqspi_remove,
+	.driver = {
+		.name = CQSPI_NAME,
+		.pm = CQSPI_DEV_PM_OPS,
+		.of_match_table = cqspi_dt_ids,
+	},
+};
+
+module_platform_driver(cqspi_platform_driver);
+
+MODULE_DESCRIPTION("Cadence QSPI Controller Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" CQSPI_NAME);
+MODULE_AUTHOR("Ley Foon Tan <lftan@altera.com>");
+MODULE_AUTHOR("Graham Moore <grmoore@opensource.altera.com>");
+MODULE_AUTHOR("Vadivel Murugan R <vadivel.muruganx.ramuthevar@intel.com>");
+MODULE_AUTHOR("Vignesh Raghavendra <vigneshr@ti.com>");
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index 5ac60d0..ceb16e7 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -419,8 +419,8 @@
 	xspi->rx_bytes = transfer->len;
 
 	cdns_spi_setup_transfer(spi, transfer);
-
 	cdns_spi_fill_tx_fifo(xspi);
+	spi_transfer_delay_exec(transfer);
 
 	cdns_spi_write(xspi, CDNS_SPI_IER, CDNS_SPI_IXR_DEFAULT);
 	return transfer->len;
@@ -558,7 +558,7 @@
 	master->unprepare_transfer_hardware = cdns_unprepare_transfer_hardware;
 	master->set_cs = cdns_spi_chipselect;
 	master->auto_runtime_pm = true;
-	master->mode_bits = SPI_CPOL | SPI_CPHA;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
 	xspi->clk_rate = clk_get_rate(xspi->ref_clk);
 	/* Set to default valid value */
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index fd6b9ca..60c0d69 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -64,6 +64,7 @@
 		p->sys_freq = SYS_FREQ_DEFAULT;
 	dev_info(dev, "Set system clock to %u\n", p->sys_freq);
 
+	master->flags = SPI_MASTER_HALF_DUPLEX;
 	master->num_chipselect = 4;
 	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH |
 			    SPI_LSB_FIRST | SPI_3WIRE;
diff --git a/drivers/spi/spi-cavium.c b/drivers/spi/spi-cavium.c
index 5aaf215..6854c3c 100644
--- a/drivers/spi/spi-cavium.c
+++ b/drivers/spi/spi-cavium.c
@@ -119,8 +119,7 @@
 			*rx_buf++ = (u8)v;
 		}
 
-	if (xfer->delay_usecs)
-		udelay(xfer->delay_usecs);
+	spi_transfer_delay_exec(xfer);
 
 	return xfer->len;
 }
diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index f80e06c..263ce90 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c
@@ -387,7 +387,7 @@
 		status = PTR_ERR(mcfqspi->clk);
 		goto fail0;
 	}
-	clk_enable(mcfqspi->clk);
+	clk_prepare_enable(mcfqspi->clk);
 
 	master->bus_num = pdata->bus_num;
 	master->num_chipselect = pdata->num_chipselect;
@@ -425,7 +425,7 @@
 	pm_runtime_disable(&pdev->dev);
 	mcfqspi_cs_teardown(mcfqspi);
 fail1:
-	clk_disable(mcfqspi->clk);
+	clk_disable_unprepare(mcfqspi->clk);
 fail0:
 	spi_master_put(master);
 
@@ -444,7 +444,7 @@
 	mcfqspi_wr_qmr(mcfqspi, MCFQSPI_QMR_MSTR);
 
 	mcfqspi_cs_teardown(mcfqspi);
-	clk_disable(mcfqspi->clk);
+	clk_disable_unprepare(mcfqspi->clk);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 5260183..fda7322 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -213,12 +213,6 @@
 	 * line for the controller
 	 */
 	if (spi->cs_gpiod) {
-		/*
-		 * FIXME: is this code ever executed? This host does not
-		 * set SPI_MASTER_GPIO_SS so this chipselect callback should
-		 * not get called from the SPI core when we are using
-		 * GPIOs for chip select.
-		 */
 		if (value == BITBANG_CS_ACTIVE)
 			gpiod_set_value(spi->cs_gpiod, 1);
 		else
@@ -236,7 +230,8 @@
 
 /**
  * davinci_spi_get_prescale - Calculates the correct prescale value
- * @maxspeed_hz: the maximum rate the SPI clock can run at
+ * @dspi: the controller data
+ * @max_speed_hz: the maximum rate the SPI clock can run at
  *
  * This function calculates the prescale value that generates a clock rate
  * less than or equal to the specified maximum.
@@ -576,7 +571,6 @@
 	u32 errors = 0;
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
-	unsigned uninitialized_var(rx_buf_count);
 
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = &dspi->pdata;
@@ -711,7 +705,7 @@
 /**
  * dummy_thread_fn - dummy thread function
  * @irq: IRQ number for this SPI Master
- * @context_data: structure for SPI Master controller davinci_spi
+ * @data: structure for SPI Master controller davinci_spi
  *
  * This is to satisfy the request_threaded_irq() API so that the irq
  * handler is called in interrupt context.
@@ -724,7 +718,7 @@
 /**
  * davinci_spi_irq - Interrupt handler for SPI Master Controller
  * @irq: IRQ number for this SPI Master
- * @context_data: structure for SPI Master controller davinci_spi
+ * @data: structure for SPI Master controller davinci_spi
  *
  * ISR will determine that interrupt arrives either for READ or WRITE command.
  * According to command it will do the appropriate action. It will check
@@ -950,7 +944,7 @@
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = SPI_MASTER_MUST_RX;
+	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_GPIO_SS;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c
new file mode 100644
index 0000000..bc9d5ea
--- /dev/null
+++ b/drivers/spi/spi-dw-bt1.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2020 BAIKAL ELECTRONICS, JSC
+//
+// Authors:
+//   Ramil Zaripov <Ramil.Zaripov@baikalelectronics.ru>
+//   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+//
+// Baikal-T1 DW APB SPI and System Boot SPI driver
+//
+
+#include <linux/clk.h>
+#include <linux/cpumask.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/spi/spi.h>
+
+#include "spi-dw.h"
+
+#define BT1_BOOT_DIRMAP		0
+#define BT1_BOOT_REGS		1
+
+struct dw_spi_bt1 {
+	struct dw_spi		dws;
+	struct clk		*clk;
+	struct mux_control	*mux;
+
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+	void __iomem		*map;
+	resource_size_t		map_len;
+#endif
+};
+#define to_dw_spi_bt1(_ctlr) \
+	container_of(spi_controller_get_devdata(_ctlr), struct dw_spi_bt1, dws)
+
+typedef int (*dw_spi_bt1_init_cb)(struct platform_device *pdev,
+				    struct dw_spi_bt1 *dwsbt1);
+
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+
+static int dw_spi_bt1_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+	struct dw_spi_bt1 *dwsbt1 = to_dw_spi_bt1(desc->mem->spi->controller);
+
+	if (!dwsbt1->map ||
+	    !dwsbt1->dws.mem_ops.supports_op(desc->mem, &desc->info.op_tmpl))
+		return -EOPNOTSUPP;
+
+	/*
+	 * Make sure the requested region doesn't go out of the physically
+	 * mapped flash memory bounds and the operation is read-only.
+	 */
+	if (desc->info.offset + desc->info.length > dwsbt1->map_len ||
+	    desc->info.op_tmpl.data.dir != SPI_MEM_DATA_IN)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/*
+ * Directly mapped SPI memory region is only accessible in the dword chunks.
+ * That's why we have to create a dedicated read-method to copy data from there
+ * to the passed buffer.
+ */
+static void dw_spi_bt1_dirmap_copy_from_map(void *to, void __iomem *from, size_t len)
+{
+	size_t shift, chunk;
+	u32 data;
+
+	/*
+	 * We split the copying up into the next three stages: unaligned head,
+	 * aligned body, unaligned tail.
+	 */
+	shift = (size_t)from & 0x3;
+	if (shift) {
+		chunk = min_t(size_t, 4 - shift, len);
+		data = readl_relaxed(from - shift);
+		memcpy(to, (char *)&data + shift, chunk);
+		from += chunk;
+		to += chunk;
+		len -= chunk;
+	}
+
+	while (len >= 4) {
+		data = readl_relaxed(from);
+		memcpy(to, &data, 4);
+		from += 4;
+		to += 4;
+		len -= 4;
+	}
+
+	if (len) {
+		data = readl_relaxed(from);
+		memcpy(to, &data, len);
+	}
+}
+
+static ssize_t dw_spi_bt1_dirmap_read(struct spi_mem_dirmap_desc *desc,
+				      u64 offs, size_t len, void *buf)
+{
+	struct dw_spi_bt1 *dwsbt1 = to_dw_spi_bt1(desc->mem->spi->controller);
+	struct dw_spi *dws = &dwsbt1->dws;
+	struct spi_mem *mem = desc->mem;
+	struct dw_spi_cfg cfg;
+	int ret;
+
+	/*
+	 * Make sure the requested operation length is valid. Truncate the
+	 * length if it's greater than the length of the MMIO region.
+	 */
+	if (offs >= dwsbt1->map_len || !len)
+		return 0;
+
+	len = min_t(size_t, len, dwsbt1->map_len - offs);
+
+	/* Collect the controller configuration required by the operation */
+	cfg.tmode = SPI_TMOD_EPROMREAD;
+	cfg.dfs = 8;
+	cfg.ndf = 4;
+	cfg.freq = mem->spi->max_speed_hz;
+
+	/* Make sure the corresponding CS is de-asserted on transmission */
+	dw_spi_set_cs(mem->spi, false);
+
+	spi_enable_chip(dws, 0);
+
+	dw_spi_update_config(dws, mem->spi, &cfg);
+
+	spi_umask_intr(dws, SPI_INT_RXFI);
+
+	spi_enable_chip(dws, 1);
+
+	/*
+	 * Enable the transparent mode of the System Boot Controller.
+	 * The SPI core IO should have been locked before calling this method
+	 * so noone would be touching the controller' registers during the
+	 * dirmap operation.
+	 */
+	ret = mux_control_select(dwsbt1->mux, BT1_BOOT_DIRMAP);
+	if (ret)
+		return ret;
+
+	dw_spi_bt1_dirmap_copy_from_map(buf, dwsbt1->map + offs, len);
+
+	mux_control_deselect(dwsbt1->mux);
+
+	dw_spi_set_cs(mem->spi, true);
+
+	ret = dw_spi_check_status(dws, true);
+
+	return ret ?: len;
+}
+
+#endif /* CONFIG_SPI_DW_BT1_DIRMAP */
+
+static int dw_spi_bt1_std_init(struct platform_device *pdev,
+			       struct dw_spi_bt1 *dwsbt1)
+{
+	struct dw_spi *dws = &dwsbt1->dws;
+
+	dws->irq = platform_get_irq(pdev, 0);
+	if (dws->irq < 0)
+		return dws->irq;
+
+	dws->num_cs = 4;
+
+	/*
+	 * Baikal-T1 Normal SPI Controllers don't always keep up with full SPI
+	 * bus speed especially when it comes to the concurrent access to the
+	 * APB bus resources. Thus we have no choice but to set a constraint on
+	 * the SPI bus frequency for the memory operations which require to
+	 * read/write data as fast as possible.
+	 */
+	dws->max_mem_freq = 20000000U;
+
+	dw_spi_dma_setup_generic(dws);
+
+	return 0;
+}
+
+static int dw_spi_bt1_sys_init(struct platform_device *pdev,
+			       struct dw_spi_bt1 *dwsbt1)
+{
+	struct resource *mem __maybe_unused;
+	struct dw_spi *dws = &dwsbt1->dws;
+
+	/*
+	 * Baikal-T1 System Boot Controller is equipped with a mux, which
+	 * switches between the directly mapped SPI flash access mode and
+	 * IO access to the DW APB SSI registers. Note the mux controller
+	 * must be setup to preserve the registers being accessible by default
+	 * (on idle-state).
+	 */
+	dwsbt1->mux = devm_mux_control_get(&pdev->dev, NULL);
+	if (IS_ERR(dwsbt1->mux))
+		return PTR_ERR(dwsbt1->mux);
+
+	/*
+	 * Directly mapped SPI flash memory is a 16MB MMIO region, which can be
+	 * used to access a peripheral memory device just by reading/writing
+	 * data from/to it. Note the system APB bus will stall during each IO
+	 * from/to the dirmap region until the operation is finished. So don't
+	 * use it concurrently with time-critical tasks (like the SPI memory
+	 * operations implemented in the DW APB SSI driver).
+	 */
+#ifdef CONFIG_SPI_DW_BT1_DIRMAP
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (mem) {
+		dwsbt1->map = devm_ioremap_resource(&pdev->dev, mem);
+		if (!IS_ERR(dwsbt1->map)) {
+			dwsbt1->map_len = (mem->end - mem->start + 1);
+			dws->mem_ops.dirmap_create = dw_spi_bt1_dirmap_create;
+			dws->mem_ops.dirmap_read = dw_spi_bt1_dirmap_read;
+		} else {
+			dwsbt1->map = NULL;
+		}
+	}
+#endif /* CONFIG_SPI_DW_BT1_DIRMAP */
+
+	/*
+	 * There is no IRQ, no DMA and just one CS available on the System Boot
+	 * SPI controller.
+	 */
+	dws->irq = IRQ_NOTCONNECTED;
+	dws->num_cs = 1;
+
+	/*
+	 * Baikal-T1 System Boot SPI Controller doesn't keep up with the full
+	 * SPI bus speed due to relatively slow APB bus and races for it'
+	 * resources from different CPUs. The situation is worsen by a small
+	 * FIFOs depth (just 8 words). It works better in a single CPU mode
+	 * though, but still tends to be not fast enough at low CPU
+	 * frequencies.
+	 */
+	if (num_possible_cpus() > 1)
+		dws->max_mem_freq = 10000000U;
+	else
+		dws->max_mem_freq = 20000000U;
+
+	return 0;
+}
+
+static int dw_spi_bt1_probe(struct platform_device *pdev)
+{
+	dw_spi_bt1_init_cb init_func;
+	struct dw_spi_bt1 *dwsbt1;
+	struct resource *mem;
+	struct dw_spi *dws;
+	int ret;
+
+	dwsbt1 = devm_kzalloc(&pdev->dev, sizeof(struct dw_spi_bt1), GFP_KERNEL);
+	if (!dwsbt1)
+		return -ENOMEM;
+
+	dws = &dwsbt1->dws;
+
+	dws->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+	if (IS_ERR(dws->regs))
+		return PTR_ERR(dws->regs);
+
+	dws->paddr = mem->start;
+
+	dwsbt1->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dwsbt1->clk))
+		return PTR_ERR(dwsbt1->clk);
+
+	ret = clk_prepare_enable(dwsbt1->clk);
+	if (ret)
+		return ret;
+
+	dws->bus_num = pdev->id;
+	dws->reg_io_width = 4;
+	dws->max_freq = clk_get_rate(dwsbt1->clk);
+	if (!dws->max_freq) {
+		ret = -EINVAL;
+		goto err_disable_clk;
+	}
+
+	init_func = device_get_match_data(&pdev->dev);
+	ret = init_func(pdev, dwsbt1);
+	if (ret)
+		goto err_disable_clk;
+
+	pm_runtime_enable(&pdev->dev);
+
+	ret = dw_spi_add_host(&pdev->dev, dws);
+	if (ret)
+		goto err_disable_clk;
+
+	platform_set_drvdata(pdev, dwsbt1);
+
+	return 0;
+
+err_disable_clk:
+	clk_disable_unprepare(dwsbt1->clk);
+
+	return ret;
+}
+
+static int dw_spi_bt1_remove(struct platform_device *pdev)
+{
+	struct dw_spi_bt1 *dwsbt1 = platform_get_drvdata(pdev);
+
+	dw_spi_remove_host(&dwsbt1->dws);
+
+	pm_runtime_disable(&pdev->dev);
+
+	clk_disable_unprepare(dwsbt1->clk);
+
+	return 0;
+}
+
+static const struct of_device_id dw_spi_bt1_of_match[] = {
+	{ .compatible = "baikal,bt1-ssi", .data = dw_spi_bt1_std_init},
+	{ .compatible = "baikal,bt1-sys-ssi", .data = dw_spi_bt1_sys_init},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, dw_spi_bt1_of_match);
+
+static struct platform_driver dw_spi_bt1_driver = {
+	.probe	= dw_spi_bt1_probe,
+	.remove	= dw_spi_bt1_remove,
+	.driver	= {
+		.name		= "bt1-sys-ssi",
+		.of_match_table	= dw_spi_bt1_of_match,
+	},
+};
+module_platform_driver(dw_spi_bt1_driver);
+
+MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 System Boot SPI Controller driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-dw-core.c b/drivers/spi/spi-dw-core.c
new file mode 100644
index 0000000..c33866f
--- /dev/null
+++ b/drivers/spi/spi-dw-core.c
@@ -0,0 +1,957 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Designware SPI core controller driver (refer pxa2xx_spi.c)
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/highmem.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/string.h>
+#include <linux/of.h>
+
+#include "spi-dw.h"
+
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#endif
+
+/* Slave spi_device related */
+struct chip_data {
+	u32 cr0;
+	u32 rx_sample_dly;	/* RX sample delay */
+};
+
+#ifdef CONFIG_DEBUG_FS
+
+#define DW_SPI_DBGFS_REG(_name, _off)	\
+{					\
+	.name = _name,			\
+	.offset = _off,			\
+}
+
+static const struct debugfs_reg32 dw_spi_dbgfs_regs[] = {
+	DW_SPI_DBGFS_REG("CTRLR0", DW_SPI_CTRLR0),
+	DW_SPI_DBGFS_REG("CTRLR1", DW_SPI_CTRLR1),
+	DW_SPI_DBGFS_REG("SSIENR", DW_SPI_SSIENR),
+	DW_SPI_DBGFS_REG("SER", DW_SPI_SER),
+	DW_SPI_DBGFS_REG("BAUDR", DW_SPI_BAUDR),
+	DW_SPI_DBGFS_REG("TXFTLR", DW_SPI_TXFTLR),
+	DW_SPI_DBGFS_REG("RXFTLR", DW_SPI_RXFTLR),
+	DW_SPI_DBGFS_REG("TXFLR", DW_SPI_TXFLR),
+	DW_SPI_DBGFS_REG("RXFLR", DW_SPI_RXFLR),
+	DW_SPI_DBGFS_REG("SR", DW_SPI_SR),
+	DW_SPI_DBGFS_REG("IMR", DW_SPI_IMR),
+	DW_SPI_DBGFS_REG("ISR", DW_SPI_ISR),
+	DW_SPI_DBGFS_REG("DMACR", DW_SPI_DMACR),
+	DW_SPI_DBGFS_REG("DMATDLR", DW_SPI_DMATDLR),
+	DW_SPI_DBGFS_REG("DMARDLR", DW_SPI_DMARDLR),
+	DW_SPI_DBGFS_REG("RX_SAMPLE_DLY", DW_SPI_RX_SAMPLE_DLY),
+};
+
+static int dw_spi_debugfs_init(struct dw_spi *dws)
+{
+	char name[32];
+
+	snprintf(name, 32, "dw_spi%d", dws->master->bus_num);
+	dws->debugfs = debugfs_create_dir(name, NULL);
+	if (!dws->debugfs)
+		return -ENOMEM;
+
+	dws->regset.regs = dw_spi_dbgfs_regs;
+	dws->regset.nregs = ARRAY_SIZE(dw_spi_dbgfs_regs);
+	dws->regset.base = dws->regs;
+	debugfs_create_regset32("registers", 0400, dws->debugfs, &dws->regset);
+
+	return 0;
+}
+
+static void dw_spi_debugfs_remove(struct dw_spi *dws)
+{
+	debugfs_remove_recursive(dws->debugfs);
+}
+
+#else
+static inline int dw_spi_debugfs_init(struct dw_spi *dws)
+{
+	return 0;
+}
+
+static inline void dw_spi_debugfs_remove(struct dw_spi *dws)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+void dw_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
+	bool cs_high = !!(spi->mode & SPI_CS_HIGH);
+
+	/*
+	 * DW SPI controller demands any native CS being set in order to
+	 * proceed with data transfer. So in order to activate the SPI
+	 * communications we must set a corresponding bit in the Slave
+	 * Enable register no matter whether the SPI core is configured to
+	 * support active-high or active-low CS level.
+	 */
+	if (cs_high == enable)
+		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
+	else
+		dw_writel(dws, DW_SPI_SER, 0);
+}
+EXPORT_SYMBOL_GPL(dw_spi_set_cs);
+
+/* Return the max entries we can fill into tx fifo */
+static inline u32 tx_max(struct dw_spi *dws)
+{
+	u32 tx_room, rxtx_gap;
+
+	tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
+
+	/*
+	 * Another concern is about the tx/rx mismatch, we
+	 * though to use (dws->fifo_len - rxflr - txflr) as
+	 * one maximum value for tx, but it doesn't cover the
+	 * data which is out of tx/rx fifo and inside the
+	 * shift registers. So a control from sw point of
+	 * view is taken.
+	 */
+	rxtx_gap = dws->fifo_len - (dws->rx_len - dws->tx_len);
+
+	return min3((u32)dws->tx_len, tx_room, rxtx_gap);
+}
+
+/* Return the max entries we should read out of rx fifo */
+static inline u32 rx_max(struct dw_spi *dws)
+{
+	return min_t(u32, dws->rx_len, dw_readl(dws, DW_SPI_RXFLR));
+}
+
+static void dw_writer(struct dw_spi *dws)
+{
+	u32 max = tx_max(dws);
+	u16 txw = 0;
+
+	while (max--) {
+		if (dws->tx) {
+			if (dws->n_bytes == 1)
+				txw = *(u8 *)(dws->tx);
+			else
+				txw = *(u16 *)(dws->tx);
+
+			dws->tx += dws->n_bytes;
+		}
+		dw_write_io_reg(dws, DW_SPI_DR, txw);
+		--dws->tx_len;
+	}
+}
+
+static void dw_reader(struct dw_spi *dws)
+{
+	u32 max = rx_max(dws);
+	u16 rxw;
+
+	while (max--) {
+		rxw = dw_read_io_reg(dws, DW_SPI_DR);
+		if (dws->rx) {
+			if (dws->n_bytes == 1)
+				*(u8 *)(dws->rx) = rxw;
+			else
+				*(u16 *)(dws->rx) = rxw;
+
+			dws->rx += dws->n_bytes;
+		}
+		--dws->rx_len;
+	}
+}
+
+int dw_spi_check_status(struct dw_spi *dws, bool raw)
+{
+	u32 irq_status;
+	int ret = 0;
+
+	if (raw)
+		irq_status = dw_readl(dws, DW_SPI_RISR);
+	else
+		irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (irq_status & SPI_INT_RXOI) {
+		dev_err(&dws->master->dev, "RX FIFO overflow detected\n");
+		ret = -EIO;
+	}
+
+	if (irq_status & SPI_INT_RXUI) {
+		dev_err(&dws->master->dev, "RX FIFO underflow detected\n");
+		ret = -EIO;
+	}
+
+	if (irq_status & SPI_INT_TXOI) {
+		dev_err(&dws->master->dev, "TX FIFO overflow detected\n");
+		ret = -EIO;
+	}
+
+	/* Generically handle the erroneous situation */
+	if (ret) {
+		spi_reset_chip(dws);
+		if (dws->master->cur_msg)
+			dws->master->cur_msg->status = ret;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_spi_check_status);
+
+static irqreturn_t dw_spi_transfer_handler(struct dw_spi *dws)
+{
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
+
+	if (dw_spi_check_status(dws, false)) {
+		spi_finalize_current_transfer(dws->master);
+		return IRQ_HANDLED;
+	}
+
+	/*
+	 * Read data from the Rx FIFO every time we've got a chance executing
+	 * this method. If there is nothing left to receive, terminate the
+	 * procedure. Otherwise adjust the Rx FIFO Threshold level if it's a
+	 * final stage of the transfer. By doing so we'll get the next IRQ
+	 * right when the leftover incoming data is received.
+	 */
+	dw_reader(dws);
+	if (!dws->rx_len) {
+		spi_mask_intr(dws, 0xff);
+		spi_finalize_current_transfer(dws->master);
+	} else if (dws->rx_len <= dw_readl(dws, DW_SPI_RXFTLR)) {
+		dw_writel(dws, DW_SPI_RXFTLR, dws->rx_len - 1);
+	}
+
+	/*
+	 * Send data out if Tx FIFO Empty IRQ is received. The IRQ will be
+	 * disabled after the data transmission is finished so not to
+	 * have the TXE IRQ flood at the final stage of the transfer.
+	 */
+	if (irq_status & SPI_INT_TXEI) {
+		dw_writer(dws);
+		if (!dws->tx_len)
+			spi_mask_intr(dws, SPI_INT_TXEI);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dw_spi_irq(int irq, void *dev_id)
+{
+	struct spi_controller *master = dev_id;
+	struct dw_spi *dws = spi_controller_get_devdata(master);
+	u16 irq_status = dw_readl(dws, DW_SPI_ISR) & 0x3f;
+
+	if (!irq_status)
+		return IRQ_NONE;
+
+	if (!master->cur_msg) {
+		spi_mask_intr(dws, 0xff);
+		return IRQ_HANDLED;
+	}
+
+	return dws->transfer_handler(dws);
+}
+
+static u32 dw_spi_prepare_cr0(struct dw_spi *dws, struct spi_device *spi)
+{
+	u32 cr0 = 0;
+
+	if (!(dws->caps & DW_SPI_CAP_DWC_SSI)) {
+		/* CTRLR0[ 5: 4] Frame Format */
+		cr0 |= SSI_MOTO_SPI << SPI_FRF_OFFSET;
+
+		/*
+		 * SPI mode (SCPOL|SCPH)
+		 * CTRLR0[ 6] Serial Clock Phase
+		 * CTRLR0[ 7] Serial Clock Polarity
+		 */
+		cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET;
+		cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET;
+
+		/* CTRLR0[11] Shift Register Loop */
+		cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET;
+	} else {
+		/* CTRLR0[ 7: 6] Frame Format */
+		cr0 |= SSI_MOTO_SPI << DWC_SSI_CTRLR0_FRF_OFFSET;
+
+		/*
+		 * SPI mode (SCPOL|SCPH)
+		 * CTRLR0[ 8] Serial Clock Phase
+		 * CTRLR0[ 9] Serial Clock Polarity
+		 */
+		cr0 |= ((spi->mode & SPI_CPOL) ? 1 : 0) << DWC_SSI_CTRLR0_SCPOL_OFFSET;
+		cr0 |= ((spi->mode & SPI_CPHA) ? 1 : 0) << DWC_SSI_CTRLR0_SCPH_OFFSET;
+
+		/* CTRLR0[13] Shift Register Loop */
+		cr0 |= ((spi->mode & SPI_LOOP) ? 1 : 0) << DWC_SSI_CTRLR0_SRL_OFFSET;
+
+		if (dws->caps & DW_SPI_CAP_KEEMBAY_MST)
+			cr0 |= DWC_SSI_CTRLR0_KEEMBAY_MST;
+	}
+
+	return cr0;
+}
+
+void dw_spi_update_config(struct dw_spi *dws, struct spi_device *spi,
+			  struct dw_spi_cfg *cfg)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+	u32 cr0 = chip->cr0;
+	u32 speed_hz;
+	u16 clk_div;
+
+	/* CTRLR0[ 4/3: 0] Data Frame Size */
+	cr0 |= (cfg->dfs - 1);
+
+	if (!(dws->caps & DW_SPI_CAP_DWC_SSI))
+		/* CTRLR0[ 9:8] Transfer Mode */
+		cr0 |= cfg->tmode << SPI_TMOD_OFFSET;
+	else
+		/* CTRLR0[11:10] Transfer Mode */
+		cr0 |= cfg->tmode << DWC_SSI_CTRLR0_TMOD_OFFSET;
+
+	dw_writel(dws, DW_SPI_CTRLR0, cr0);
+
+	if (cfg->tmode == SPI_TMOD_EPROMREAD || cfg->tmode == SPI_TMOD_RO)
+		dw_writel(dws, DW_SPI_CTRLR1, cfg->ndf ? cfg->ndf - 1 : 0);
+
+	/* Note DW APB SSI clock divider doesn't support odd numbers */
+	clk_div = (DIV_ROUND_UP(dws->max_freq, cfg->freq) + 1) & 0xfffe;
+	speed_hz = dws->max_freq / clk_div;
+
+	if (dws->current_freq != speed_hz) {
+		spi_set_clk(dws, clk_div);
+		dws->current_freq = speed_hz;
+	}
+
+	/* Update RX sample delay if required */
+	if (dws->cur_rx_sample_dly != chip->rx_sample_dly) {
+		dw_writel(dws, DW_SPI_RX_SAMPLE_DLY, chip->rx_sample_dly);
+		dws->cur_rx_sample_dly = chip->rx_sample_dly;
+	}
+}
+EXPORT_SYMBOL_GPL(dw_spi_update_config);
+
+static void dw_spi_irq_setup(struct dw_spi *dws)
+{
+	u16 level;
+	u8 imask;
+
+	/*
+	 * Originally Tx and Rx data lengths match. Rx FIFO Threshold level
+	 * will be adjusted at the final stage of the IRQ-based SPI transfer
+	 * execution so not to lose the leftover of the incoming data.
+	 */
+	level = min_t(u16, dws->fifo_len / 2, dws->tx_len);
+	dw_writel(dws, DW_SPI_TXFTLR, level);
+	dw_writel(dws, DW_SPI_RXFTLR, level - 1);
+
+	dws->transfer_handler = dw_spi_transfer_handler;
+
+	imask = SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI |
+		SPI_INT_RXFI;
+	spi_umask_intr(dws, imask);
+}
+
+/*
+ * The iterative procedure of the poll-based transfer is simple: write as much
+ * as possible to the Tx FIFO, wait until the pending to receive data is ready
+ * to be read, read it from the Rx FIFO and check whether the performed
+ * procedure has been successful.
+ *
+ * Note this method the same way as the IRQ-based transfer won't work well for
+ * the SPI devices connected to the controller with native CS due to the
+ * automatic CS assertion/de-assertion.
+ */
+static int dw_spi_poll_transfer(struct dw_spi *dws,
+				struct spi_transfer *transfer)
+{
+	struct spi_delay delay;
+	u16 nbits;
+	int ret;
+
+	delay.unit = SPI_DELAY_UNIT_SCK;
+	nbits = dws->n_bytes * BITS_PER_BYTE;
+
+	do {
+		dw_writer(dws);
+
+		delay.value = nbits * (dws->rx_len - dws->tx_len);
+		spi_delay_exec(&delay, transfer);
+
+		dw_reader(dws);
+
+		ret = dw_spi_check_status(dws, true);
+		if (ret)
+			return ret;
+	} while (dws->rx_len);
+
+	return 0;
+}
+
+static int dw_spi_transfer_one(struct spi_controller *master,
+		struct spi_device *spi, struct spi_transfer *transfer)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(master);
+	struct dw_spi_cfg cfg = {
+		.tmode = SPI_TMOD_TR,
+		.dfs = transfer->bits_per_word,
+		.freq = transfer->speed_hz,
+	};
+	int ret;
+
+	dws->dma_mapped = 0;
+	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
+	dws->tx = (void *)transfer->tx_buf;
+	dws->tx_len = transfer->len / dws->n_bytes;
+	dws->rx = transfer->rx_buf;
+	dws->rx_len = dws->tx_len;
+
+	/* Ensure the data above is visible for all CPUs */
+	smp_mb();
+
+	spi_enable_chip(dws, 0);
+
+	dw_spi_update_config(dws, spi, &cfg);
+
+	transfer->effective_speed_hz = dws->current_freq;
+
+	/* Check if current transfer is a DMA transaction */
+	if (master->can_dma && master->can_dma(master, spi, transfer))
+		dws->dma_mapped = master->cur_msg_mapped;
+
+	/* For poll mode just disable all interrupts */
+	spi_mask_intr(dws, 0xff);
+
+	if (dws->dma_mapped) {
+		ret = dws->dma_ops->dma_setup(dws, transfer);
+		if (ret)
+			return ret;
+	}
+
+	spi_enable_chip(dws, 1);
+
+	if (dws->dma_mapped)
+		return dws->dma_ops->dma_transfer(dws, transfer);
+	else if (dws->irq == IRQ_NOTCONNECTED)
+		return dw_spi_poll_transfer(dws, transfer);
+
+	dw_spi_irq_setup(dws);
+
+	return 1;
+}
+
+static void dw_spi_handle_err(struct spi_controller *master,
+		struct spi_message *msg)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(master);
+
+	if (dws->dma_mapped)
+		dws->dma_ops->dma_stop(dws);
+
+	spi_reset_chip(dws);
+}
+
+static int dw_spi_adjust_mem_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	if (op->data.dir == SPI_MEM_DATA_IN)
+		op->data.nbytes = clamp_val(op->data.nbytes, 0, SPI_NDF_MASK + 1);
+
+	return 0;
+}
+
+static bool dw_spi_supports_mem_op(struct spi_mem *mem,
+				   const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 1 || op->addr.buswidth > 1 ||
+	    op->dummy.buswidth > 1 || op->cmd.buswidth > 1)
+		return false;
+
+	return spi_mem_default_supports_op(mem, op);
+}
+
+static int dw_spi_init_mem_buf(struct dw_spi *dws, const struct spi_mem_op *op)
+{
+	unsigned int i, j, len;
+	u8 *out;
+
+	/*
+	 * Calculate the total length of the EEPROM command transfer and
+	 * either use the pre-allocated buffer or create a temporary one.
+	 */
+	len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		len += op->data.nbytes;
+
+	if (len <= SPI_BUF_SIZE) {
+		out = dws->buf;
+	} else {
+		out = kzalloc(len, GFP_KERNEL);
+		if (!out)
+			return -ENOMEM;
+	}
+
+	/*
+	 * Collect the operation code, address and dummy bytes into the single
+	 * buffer. If it's a transfer with data to be sent, also copy it into the
+	 * single buffer in order to speed the data transmission up.
+	 */
+	for (i = 0; i < op->cmd.nbytes; ++i)
+		out[i] = SPI_GET_BYTE(op->cmd.opcode, op->cmd.nbytes - i - 1);
+	for (j = 0; j < op->addr.nbytes; ++i, ++j)
+		out[i] = SPI_GET_BYTE(op->addr.val, op->addr.nbytes - j - 1);
+	for (j = 0; j < op->dummy.nbytes; ++i, ++j)
+		out[i] = 0x0;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		memcpy(&out[i], op->data.buf.out, op->data.nbytes);
+
+	dws->n_bytes = 1;
+	dws->tx = out;
+	dws->tx_len = len;
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		dws->rx = op->data.buf.in;
+		dws->rx_len = op->data.nbytes;
+	} else {
+		dws->rx = NULL;
+		dws->rx_len = 0;
+	}
+
+	return 0;
+}
+
+static void dw_spi_free_mem_buf(struct dw_spi *dws)
+{
+	if (dws->tx != dws->buf)
+		kfree(dws->tx);
+}
+
+static int dw_spi_write_then_read(struct dw_spi *dws, struct spi_device *spi)
+{
+	u32 room, entries, sts;
+	unsigned int len;
+	u8 *buf;
+
+	/*
+	 * At initial stage we just pre-fill the Tx FIFO in with no rush,
+	 * since native CS hasn't been enabled yet and the automatic data
+	 * transmission won't start til we do that.
+	 */
+	len = min(dws->fifo_len, dws->tx_len);
+	buf = dws->tx;
+	while (len--)
+		dw_write_io_reg(dws, DW_SPI_DR, *buf++);
+
+	/*
+	 * After setting any bit in the SER register the transmission will
+	 * start automatically. We have to keep up with that procedure
+	 * otherwise the CS de-assertion will happen whereupon the memory
+	 * operation will be pre-terminated.
+	 */
+	len = dws->tx_len - ((void *)buf - dws->tx);
+	dw_spi_set_cs(spi, false);
+	while (len) {
+		entries = readl_relaxed(dws->regs + DW_SPI_TXFLR);
+		if (!entries) {
+			dev_err(&dws->master->dev, "CS de-assertion on Tx\n");
+			return -EIO;
+		}
+		room = min(dws->fifo_len - entries, len);
+		for (; room; --room, --len)
+			dw_write_io_reg(dws, DW_SPI_DR, *buf++);
+	}
+
+	/*
+	 * Data fetching will start automatically if the EEPROM-read mode is
+	 * activated. We have to keep up with the incoming data pace to
+	 * prevent the Rx FIFO overflow causing the inbound data loss.
+	 */
+	len = dws->rx_len;
+	buf = dws->rx;
+	while (len) {
+		entries = readl_relaxed(dws->regs + DW_SPI_RXFLR);
+		if (!entries) {
+			sts = readl_relaxed(dws->regs + DW_SPI_RISR);
+			if (sts & SPI_INT_RXOI) {
+				dev_err(&dws->master->dev, "FIFO overflow on Rx\n");
+				return -EIO;
+			}
+			continue;
+		}
+		entries = min(entries, len);
+		for (; entries; --entries, --len)
+			*buf++ = dw_read_io_reg(dws, DW_SPI_DR);
+	}
+
+	return 0;
+}
+
+static inline bool dw_spi_ctlr_busy(struct dw_spi *dws)
+{
+	return dw_readl(dws, DW_SPI_SR) & SR_BUSY;
+}
+
+static int dw_spi_wait_mem_op_done(struct dw_spi *dws)
+{
+	int retry = SPI_WAIT_RETRIES;
+	struct spi_delay delay;
+	unsigned long ns, us;
+	u32 nents;
+
+	nents = dw_readl(dws, DW_SPI_TXFLR);
+	ns = NSEC_PER_SEC / dws->current_freq * nents;
+	ns *= dws->n_bytes * BITS_PER_BYTE;
+	if (ns <= NSEC_PER_USEC) {
+		delay.unit = SPI_DELAY_UNIT_NSECS;
+		delay.value = ns;
+	} else {
+		us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
+		delay.unit = SPI_DELAY_UNIT_USECS;
+		delay.value = clamp_val(us, 0, USHRT_MAX);
+	}
+
+	while (dw_spi_ctlr_busy(dws) && retry--)
+		spi_delay_exec(&delay, NULL);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Mem op hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void dw_spi_stop_mem_op(struct dw_spi *dws, struct spi_device *spi)
+{
+	spi_enable_chip(dws, 0);
+	dw_spi_set_cs(spi, true);
+	spi_enable_chip(dws, 1);
+}
+
+/*
+ * The SPI memory operation implementation below is the best choice for the
+ * devices, which are selected by the native chip-select lane. It's
+ * specifically developed to workaround the problem with automatic chip-select
+ * lane toggle when there is no data in the Tx FIFO buffer. Luckily the current
+ * SPI-mem core calls exec_op() callback only if the GPIO-based CS is
+ * unavailable.
+ */
+static int dw_spi_exec_mem_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(mem->spi->controller);
+	struct dw_spi_cfg cfg;
+	unsigned long flags;
+	int ret;
+
+	/*
+	 * Collect the outbound data into a single buffer to speed the
+	 * transmission up at least on the initial stage.
+	 */
+	ret = dw_spi_init_mem_buf(dws, op);
+	if (ret)
+		return ret;
+
+	/*
+	 * DW SPI EEPROM-read mode is required only for the SPI memory Data-IN
+	 * operation. Transmit-only mode is suitable for the rest of them.
+	 */
+	cfg.dfs = 8;
+	cfg.freq = clamp(mem->spi->max_speed_hz, 0U, dws->max_mem_freq);
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		cfg.tmode = SPI_TMOD_EPROMREAD;
+		cfg.ndf = op->data.nbytes;
+	} else {
+		cfg.tmode = SPI_TMOD_TO;
+	}
+
+	spi_enable_chip(dws, 0);
+
+	dw_spi_update_config(dws, mem->spi, &cfg);
+
+	spi_mask_intr(dws, 0xff);
+
+	spi_enable_chip(dws, 1);
+
+	/*
+	 * DW APB SSI controller has very nasty peculiarities. First originally
+	 * (without any vendor-specific modifications) it doesn't provide a
+	 * direct way to set and clear the native chip-select signal. Instead
+	 * the controller asserts the CS lane if Tx FIFO isn't empty and a
+	 * transmission is going on, and automatically de-asserts it back to
+	 * the high level if the Tx FIFO doesn't have anything to be pushed
+	 * out. Due to that a multi-tasking or heavy IRQs activity might be
+	 * fatal, since the transfer procedure preemption may cause the Tx FIFO
+	 * getting empty and sudden CS de-assertion, which in the middle of the
+	 * transfer will most likely cause the data loss. Secondly the
+	 * EEPROM-read or Read-only DW SPI transfer modes imply the incoming
+	 * data being automatically pulled in into the Rx FIFO. So if the
+	 * driver software is late in fetching the data from the FIFO before
+	 * it's overflown, new incoming data will be lost. In order to make
+	 * sure the executed memory operations are CS-atomic and to prevent the
+	 * Rx FIFO overflow we have to disable the local interrupts so to block
+	 * any preemption during the subsequent IO operations.
+	 *
+	 * Note. At some circumstances disabling IRQs may not help to prevent
+	 * the problems described above. The CS de-assertion and Rx FIFO
+	 * overflow may still happen due to the relatively slow system bus or
+	 * CPU not working fast enough, so the write-then-read algo implemented
+	 * here just won't keep up with the SPI bus data transfer. Such
+	 * situation is highly platform specific and is supposed to be fixed by
+	 * manually restricting the SPI bus frequency using the
+	 * dws->max_mem_freq parameter.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+
+	ret = dw_spi_write_then_read(dws, mem->spi);
+
+	local_irq_restore(flags);
+	preempt_enable();
+
+	/*
+	 * Wait for the operation being finished and check the controller
+	 * status only if there hasn't been any run-time error detected. In the
+	 * former case it's just pointless. In the later one to prevent an
+	 * additional error message printing since any hw error flag being set
+	 * would be due to an error detected on the data transfer.
+	 */
+	if (!ret) {
+		ret = dw_spi_wait_mem_op_done(dws);
+		if (!ret)
+			ret = dw_spi_check_status(dws, true);
+	}
+
+	dw_spi_stop_mem_op(dws, mem->spi);
+
+	dw_spi_free_mem_buf(dws);
+
+	return ret;
+}
+
+/*
+ * Initialize the default memory operations if a glue layer hasn't specified
+ * custom ones. Direct mapping operations will be preserved anyway since DW SPI
+ * controller doesn't have an embedded dirmap interface. Note the memory
+ * operations implemented in this driver is the best choice only for the DW APB
+ * SSI controller with standard native CS functionality. If a hardware vendor
+ * has fixed the automatic CS assertion/de-assertion peculiarity, then it will
+ * be safer to use the normal SPI-messages-based transfers implementation.
+ */
+static void dw_spi_init_mem_ops(struct dw_spi *dws)
+{
+	if (!dws->mem_ops.exec_op && !(dws->caps & DW_SPI_CAP_CS_OVERRIDE) &&
+	    !dws->set_cs) {
+		dws->mem_ops.adjust_op_size = dw_spi_adjust_mem_op_size;
+		dws->mem_ops.supports_op = dw_spi_supports_mem_op;
+		dws->mem_ops.exec_op = dw_spi_exec_mem_op;
+		if (!dws->max_mem_freq)
+			dws->max_mem_freq = dws->max_freq;
+	}
+}
+
+/* This may be called twice for each spi dev */
+static int dw_spi_setup(struct spi_device *spi)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
+	struct chip_data *chip;
+
+	/* Only alloc on first setup */
+	chip = spi_get_ctldata(spi);
+	if (!chip) {
+		struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
+		u32 rx_sample_dly_ns;
+
+		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
+		if (!chip)
+			return -ENOMEM;
+		spi_set_ctldata(spi, chip);
+		/* Get specific / default rx-sample-delay */
+		if (device_property_read_u32(&spi->dev,
+					     "rx-sample-delay-ns",
+					     &rx_sample_dly_ns) != 0)
+			/* Use default controller value */
+			rx_sample_dly_ns = dws->def_rx_sample_dly_ns;
+		chip->rx_sample_dly = DIV_ROUND_CLOSEST(rx_sample_dly_ns,
+							NSEC_PER_SEC /
+							dws->max_freq);
+	}
+
+	/*
+	 * Update CR0 data each time the setup callback is invoked since
+	 * the device parameters could have been changed, for instance, by
+	 * the MMC SPI driver or something else.
+	 */
+	chip->cr0 = dw_spi_prepare_cr0(dws, spi);
+
+	return 0;
+}
+
+static void dw_spi_cleanup(struct spi_device *spi)
+{
+	struct chip_data *chip = spi_get_ctldata(spi);
+
+	kfree(chip);
+	spi_set_ctldata(spi, NULL);
+}
+
+/* Restart the controller, disable all interrupts, clean rx fifo */
+static void spi_hw_init(struct device *dev, struct dw_spi *dws)
+{
+	spi_reset_chip(dws);
+
+	/*
+	 * Try to detect the FIFO depth if not set by interface driver,
+	 * the depth could be from 2 to 256 from HW spec
+	 */
+	if (!dws->fifo_len) {
+		u32 fifo;
+
+		for (fifo = 1; fifo < 256; fifo++) {
+			dw_writel(dws, DW_SPI_TXFTLR, fifo);
+			if (fifo != dw_readl(dws, DW_SPI_TXFTLR))
+				break;
+		}
+		dw_writel(dws, DW_SPI_TXFTLR, 0);
+
+		dws->fifo_len = (fifo == 1) ? 0 : fifo;
+		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
+	}
+
+	/* enable HW fixup for explicit CS deselect for Amazon's alpine chip */
+	if (dws->caps & DW_SPI_CAP_CS_OVERRIDE)
+		dw_writel(dws, DW_SPI_CS_OVERRIDE, 0xF);
+}
+
+int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
+{
+	struct spi_controller *master;
+	int ret;
+
+	if (!dws)
+		return -EINVAL;
+
+	master = spi_alloc_master(dev, 0);
+	if (!master)
+		return -ENOMEM;
+
+	dws->master = master;
+	dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
+
+	spi_controller_set_devdata(master, dws);
+
+	/* Basic HW init */
+	spi_hw_init(dev, dws);
+
+	ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev),
+			  master);
+	if (ret < 0 && ret != -ENOTCONN) {
+		dev_err(dev, "can not get IRQ\n");
+		goto err_free_master;
+	}
+
+	dw_spi_init_mem_ops(dws);
+
+	master->use_gpio_descriptors = true;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
+	master->bits_per_word_mask =  SPI_BPW_RANGE_MASK(4, 16);
+	master->bus_num = dws->bus_num;
+	master->num_chipselect = dws->num_cs;
+	master->setup = dw_spi_setup;
+	master->cleanup = dw_spi_cleanup;
+	if (dws->set_cs)
+		master->set_cs = dws->set_cs;
+	else
+		master->set_cs = dw_spi_set_cs;
+	master->transfer_one = dw_spi_transfer_one;
+	master->handle_err = dw_spi_handle_err;
+	if (dws->mem_ops.exec_op)
+		master->mem_ops = &dws->mem_ops;
+	master->max_speed_hz = dws->max_freq;
+	master->dev.of_node = dev->of_node;
+	master->dev.fwnode = dev->fwnode;
+	master->flags = SPI_MASTER_GPIO_SS;
+	master->auto_runtime_pm = true;
+
+	/* Get default rx sample delay */
+	device_property_read_u32(dev, "rx-sample-delay-ns",
+				 &dws->def_rx_sample_dly_ns);
+
+	if (dws->dma_ops && dws->dma_ops->dma_init) {
+		ret = dws->dma_ops->dma_init(dev, dws);
+		if (ret) {
+			dev_warn(dev, "DMA init failed\n");
+		} else {
+			master->can_dma = dws->dma_ops->can_dma;
+			master->flags |= SPI_CONTROLLER_MUST_TX;
+		}
+	}
+
+	ret = spi_register_controller(master);
+	if (ret) {
+		dev_err(&master->dev, "problem registering spi master\n");
+		goto err_dma_exit;
+	}
+
+	dw_spi_debugfs_init(dws);
+	return 0;
+
+err_dma_exit:
+	if (dws->dma_ops && dws->dma_ops->dma_exit)
+		dws->dma_ops->dma_exit(dws);
+	spi_enable_chip(dws, 0);
+	free_irq(dws->irq, master);
+err_free_master:
+	spi_controller_put(master);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dw_spi_add_host);
+
+void dw_spi_remove_host(struct dw_spi *dws)
+{
+	dw_spi_debugfs_remove(dws);
+
+	spi_unregister_controller(dws->master);
+
+	if (dws->dma_ops && dws->dma_ops->dma_exit)
+		dws->dma_ops->dma_exit(dws);
+
+	spi_shutdown_chip(dws);
+
+	free_irq(dws->irq, dws->master);
+}
+EXPORT_SYMBOL_GPL(dw_spi_remove_host);
+
+int dw_spi_suspend_host(struct dw_spi *dws)
+{
+	int ret;
+
+	ret = spi_controller_suspend(dws->master);
+	if (ret)
+		return ret;
+
+	spi_shutdown_chip(dws);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dw_spi_suspend_host);
+
+int dw_spi_resume_host(struct dw_spi *dws)
+{
+	spi_hw_init(&dws->master->dev, dws);
+	return spi_controller_resume(dws->master);
+}
+EXPORT_SYMBOL_GPL(dw_spi_resume_host);
+
+MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
+MODULE_DESCRIPTION("Driver for DesignWare SPI controller core");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-dw-dma.c b/drivers/spi/spi-dw-dma.c
new file mode 100644
index 0000000..a09831c
--- /dev/null
+++ b/drivers/spi/spi-dw-dma.c
@@ -0,0 +1,656 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Special handling for DW DMA core
+ *
+ * Copyright (c) 2009, 2014 Intel Corporation.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/irqreturn.h>
+#include <linux/jiffies.h>
+#include <linux/pci.h>
+#include <linux/platform_data/dma-dw.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+
+#include "spi-dw.h"
+
+#define RX_BUSY		0
+#define RX_BURST_LEVEL	16
+#define TX_BUSY		1
+#define TX_BURST_LEVEL	16
+
+static bool dw_spi_dma_chan_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_slave *s = param;
+
+	if (s->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = s;
+	return true;
+}
+
+static void dw_spi_dma_maxburst_init(struct dw_spi *dws)
+{
+	struct dma_slave_caps caps;
+	u32 max_burst, def_burst;
+	int ret;
+
+	def_burst = dws->fifo_len / 2;
+
+	ret = dma_get_slave_caps(dws->rxchan, &caps);
+	if (!ret && caps.max_burst)
+		max_burst = caps.max_burst;
+	else
+		max_burst = RX_BURST_LEVEL;
+
+	dws->rxburst = min(max_burst, def_burst);
+	dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1);
+
+	ret = dma_get_slave_caps(dws->txchan, &caps);
+	if (!ret && caps.max_burst)
+		max_burst = caps.max_burst;
+	else
+		max_burst = TX_BURST_LEVEL;
+
+	/*
+	 * Having a Rx DMA channel serviced with higher priority than a Tx DMA
+	 * channel might not be enough to provide a well balanced DMA-based
+	 * SPI transfer interface. There might still be moments when the Tx DMA
+	 * channel is occasionally handled faster than the Rx DMA channel.
+	 * That in its turn will eventually cause the SPI Rx FIFO overflow if
+	 * SPI bus speed is high enough to fill the SPI Rx FIFO in before it's
+	 * cleared by the Rx DMA channel. In order to fix the problem the Tx
+	 * DMA activity is intentionally slowed down by limiting the SPI Tx
+	 * FIFO depth with a value twice bigger than the Tx burst length.
+	 */
+	dws->txburst = min(max_burst, def_burst);
+	dw_writel(dws, DW_SPI_DMATDLR, dws->txburst);
+}
+
+static void dw_spi_dma_sg_burst_init(struct dw_spi *dws)
+{
+	struct dma_slave_caps tx = {0}, rx = {0};
+
+	dma_get_slave_caps(dws->txchan, &tx);
+	dma_get_slave_caps(dws->rxchan, &rx);
+
+	if (tx.max_sg_burst > 0 && rx.max_sg_burst > 0)
+		dws->dma_sg_burst = min(tx.max_sg_burst, rx.max_sg_burst);
+	else if (tx.max_sg_burst > 0)
+		dws->dma_sg_burst = tx.max_sg_burst;
+	else if (rx.max_sg_burst > 0)
+		dws->dma_sg_burst = rx.max_sg_burst;
+	else
+		dws->dma_sg_burst = 0;
+}
+
+static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws)
+{
+	struct dw_dma_slave dma_tx = { .dst_id = 1 }, *tx = &dma_tx;
+	struct dw_dma_slave dma_rx = { .src_id = 0 }, *rx = &dma_rx;
+	struct pci_dev *dma_dev;
+	dma_cap_mask_t mask;
+
+	/*
+	 * Get pci device for DMA controller, currently it could only
+	 * be the DMA controller of Medfield
+	 */
+	dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
+	if (!dma_dev)
+		return -ENODEV;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/* 1. Init rx channel */
+	rx->dma_dev = &dma_dev->dev;
+	dws->rxchan = dma_request_channel(mask, dw_spi_dma_chan_filter, rx);
+	if (!dws->rxchan)
+		goto err_exit;
+
+	/* 2. Init tx channel */
+	tx->dma_dev = &dma_dev->dev;
+	dws->txchan = dma_request_channel(mask, dw_spi_dma_chan_filter, tx);
+	if (!dws->txchan)
+		goto free_rxchan;
+
+	dws->master->dma_rx = dws->rxchan;
+	dws->master->dma_tx = dws->txchan;
+
+	init_completion(&dws->dma_completion);
+
+	dw_spi_dma_maxburst_init(dws);
+
+	dw_spi_dma_sg_burst_init(dws);
+
+	return 0;
+
+free_rxchan:
+	dma_release_channel(dws->rxchan);
+	dws->rxchan = NULL;
+err_exit:
+	return -EBUSY;
+}
+
+static int dw_spi_dma_init_generic(struct device *dev, struct dw_spi *dws)
+{
+	dws->rxchan = dma_request_slave_channel(dev, "rx");
+	if (!dws->rxchan)
+		return -ENODEV;
+
+	dws->txchan = dma_request_slave_channel(dev, "tx");
+	if (!dws->txchan) {
+		dma_release_channel(dws->rxchan);
+		dws->rxchan = NULL;
+		return -ENODEV;
+	}
+
+	dws->master->dma_rx = dws->rxchan;
+	dws->master->dma_tx = dws->txchan;
+
+	init_completion(&dws->dma_completion);
+
+	dw_spi_dma_maxburst_init(dws);
+
+	dw_spi_dma_sg_burst_init(dws);
+
+	return 0;
+}
+
+static void dw_spi_dma_exit(struct dw_spi *dws)
+{
+	if (dws->txchan) {
+		dmaengine_terminate_sync(dws->txchan);
+		dma_release_channel(dws->txchan);
+	}
+
+	if (dws->rxchan) {
+		dmaengine_terminate_sync(dws->rxchan);
+		dma_release_channel(dws->rxchan);
+	}
+}
+
+static irqreturn_t dw_spi_dma_transfer_handler(struct dw_spi *dws)
+{
+	dw_spi_check_status(dws, false);
+
+	complete(&dws->dma_completion);
+
+	return IRQ_HANDLED;
+}
+
+static bool dw_spi_can_dma(struct spi_controller *master,
+			   struct spi_device *spi, struct spi_transfer *xfer)
+{
+	struct dw_spi *dws = spi_controller_get_devdata(master);
+
+	return xfer->len > dws->fifo_len;
+}
+
+static enum dma_slave_buswidth dw_spi_dma_convert_width(u8 n_bytes)
+{
+	if (n_bytes == 1)
+		return DMA_SLAVE_BUSWIDTH_1_BYTE;
+	else if (n_bytes == 2)
+		return DMA_SLAVE_BUSWIDTH_2_BYTES;
+
+	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
+}
+
+static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed)
+{
+	unsigned long long ms;
+
+	ms = len * MSEC_PER_SEC * BITS_PER_BYTE;
+	do_div(ms, speed);
+	ms += ms + 200;
+
+	if (ms > UINT_MAX)
+		ms = UINT_MAX;
+
+	ms = wait_for_completion_timeout(&dws->dma_completion,
+					 msecs_to_jiffies(ms));
+
+	if (ms == 0) {
+		dev_err(&dws->master->cur_msg->spi->dev,
+			"DMA transaction timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws)
+{
+	return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT);
+}
+
+static int dw_spi_dma_wait_tx_done(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
+{
+	int retry = SPI_WAIT_RETRIES;
+	struct spi_delay delay;
+	u32 nents;
+
+	nents = dw_readl(dws, DW_SPI_TXFLR);
+	delay.unit = SPI_DELAY_UNIT_SCK;
+	delay.value = nents * dws->n_bytes * BITS_PER_BYTE;
+
+	while (dw_spi_dma_tx_busy(dws) && retry--)
+		spi_delay_exec(&delay, xfer);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Tx hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
+ * channel will clear a corresponding bit.
+ */
+static void dw_spi_dma_tx_done(void *arg)
+{
+	struct dw_spi *dws = arg;
+
+	clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
+		return;
+
+	complete(&dws->dma_completion);
+}
+
+static int dw_spi_dma_config_tx(struct dw_spi *dws)
+{
+	struct dma_slave_config txconf;
+
+	memset(&txconf, 0, sizeof(txconf));
+	txconf.direction = DMA_MEM_TO_DEV;
+	txconf.dst_addr = dws->dma_addr;
+	txconf.dst_maxburst = dws->txburst;
+	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	txconf.dst_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
+	txconf.device_fc = false;
+
+	return dmaengine_slave_config(dws->txchan, &txconf);
+}
+
+static int dw_spi_dma_submit_tx(struct dw_spi *dws, struct scatterlist *sgl,
+				unsigned int nents)
+{
+	struct dma_async_tx_descriptor *txdesc;
+	dma_cookie_t cookie;
+	int ret;
+
+	txdesc = dmaengine_prep_slave_sg(dws->txchan, sgl, nents,
+					 DMA_MEM_TO_DEV,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!txdesc)
+		return -ENOMEM;
+
+	txdesc->callback = dw_spi_dma_tx_done;
+	txdesc->callback_param = dws;
+
+	cookie = dmaengine_submit(txdesc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dmaengine_terminate_sync(dws->txchan);
+		return ret;
+	}
+
+	set_bit(TX_BUSY, &dws->dma_chan_busy);
+
+	return 0;
+}
+
+static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws)
+{
+	return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT);
+}
+
+static int dw_spi_dma_wait_rx_done(struct dw_spi *dws)
+{
+	int retry = SPI_WAIT_RETRIES;
+	struct spi_delay delay;
+	unsigned long ns, us;
+	u32 nents;
+
+	/*
+	 * It's unlikely that DMA engine is still doing the data fetching, but
+	 * if it's let's give it some reasonable time. The timeout calculation
+	 * is based on the synchronous APB/SSI reference clock rate, on a
+	 * number of data entries left in the Rx FIFO, times a number of clock
+	 * periods normally needed for a single APB read/write transaction
+	 * without PREADY signal utilized (which is true for the DW APB SSI
+	 * controller).
+	 */
+	nents = dw_readl(dws, DW_SPI_RXFLR);
+	ns = 4U * NSEC_PER_SEC / dws->max_freq * nents;
+	if (ns <= NSEC_PER_USEC) {
+		delay.unit = SPI_DELAY_UNIT_NSECS;
+		delay.value = ns;
+	} else {
+		us = DIV_ROUND_UP(ns, NSEC_PER_USEC);
+		delay.unit = SPI_DELAY_UNIT_USECS;
+		delay.value = clamp_val(us, 0, USHRT_MAX);
+	}
+
+	while (dw_spi_dma_rx_busy(dws) && retry--)
+		spi_delay_exec(&delay, NULL);
+
+	if (retry < 0) {
+		dev_err(&dws->master->dev, "Rx hanged up\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+/*
+ * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
+ * channel will clear a corresponding bit.
+ */
+static void dw_spi_dma_rx_done(void *arg)
+{
+	struct dw_spi *dws = arg;
+
+	clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
+		return;
+
+	complete(&dws->dma_completion);
+}
+
+static int dw_spi_dma_config_rx(struct dw_spi *dws)
+{
+	struct dma_slave_config rxconf;
+
+	memset(&rxconf, 0, sizeof(rxconf));
+	rxconf.direction = DMA_DEV_TO_MEM;
+	rxconf.src_addr = dws->dma_addr;
+	rxconf.src_maxburst = dws->rxburst;
+	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	rxconf.src_addr_width = dw_spi_dma_convert_width(dws->n_bytes);
+	rxconf.device_fc = false;
+
+	return dmaengine_slave_config(dws->rxchan, &rxconf);
+}
+
+static int dw_spi_dma_submit_rx(struct dw_spi *dws, struct scatterlist *sgl,
+				unsigned int nents)
+{
+	struct dma_async_tx_descriptor *rxdesc;
+	dma_cookie_t cookie;
+	int ret;
+
+	rxdesc = dmaengine_prep_slave_sg(dws->rxchan, sgl, nents,
+					 DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!rxdesc)
+		return -ENOMEM;
+
+	rxdesc->callback = dw_spi_dma_rx_done;
+	rxdesc->callback_param = dws;
+
+	cookie = dmaengine_submit(rxdesc);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dmaengine_terminate_sync(dws->rxchan);
+		return ret;
+	}
+
+	set_bit(RX_BUSY, &dws->dma_chan_busy);
+
+	return 0;
+}
+
+static int dw_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	u16 imr, dma_ctrl;
+	int ret;
+
+	if (!xfer->tx_buf)
+		return -EINVAL;
+
+	/* Setup DMA channels */
+	ret = dw_spi_dma_config_tx(dws);
+	if (ret)
+		return ret;
+
+	if (xfer->rx_buf) {
+		ret = dw_spi_dma_config_rx(dws);
+		if (ret)
+			return ret;
+	}
+
+	/* Set the DMA handshaking interface */
+	dma_ctrl = SPI_DMA_TDMAE;
+	if (xfer->rx_buf)
+		dma_ctrl |= SPI_DMA_RDMAE;
+	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
+
+	/* Set the interrupt mask */
+	imr = SPI_INT_TXOI;
+	if (xfer->rx_buf)
+		imr |= SPI_INT_RXUI | SPI_INT_RXOI;
+	spi_umask_intr(dws, imr);
+
+	reinit_completion(&dws->dma_completion);
+
+	dws->transfer_handler = dw_spi_dma_transfer_handler;
+
+	return 0;
+}
+
+static int dw_spi_dma_transfer_all(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
+{
+	int ret;
+
+	/* Submit the DMA Tx transfer */
+	ret = dw_spi_dma_submit_tx(dws, xfer->tx_sg.sgl, xfer->tx_sg.nents);
+	if (ret)
+		goto err_clear_dmac;
+
+	/* Submit the DMA Rx transfer if required */
+	if (xfer->rx_buf) {
+		ret = dw_spi_dma_submit_rx(dws, xfer->rx_sg.sgl,
+					   xfer->rx_sg.nents);
+		if (ret)
+			goto err_clear_dmac;
+
+		/* rx must be started before tx due to spi instinct */
+		dma_async_issue_pending(dws->rxchan);
+	}
+
+	dma_async_issue_pending(dws->txchan);
+
+	ret = dw_spi_dma_wait(dws, xfer->len, xfer->effective_speed_hz);
+
+err_clear_dmac:
+	dw_writel(dws, DW_SPI_DMACR, 0);
+
+	return ret;
+}
+
+/*
+ * In case if at least one of the requested DMA channels doesn't support the
+ * hardware accelerated SG list entries traverse, the DMA driver will most
+ * likely work that around by performing the IRQ-based SG list entries
+ * resubmission. That might and will cause a problem if the DMA Tx channel is
+ * recharged and re-executed before the Rx DMA channel. Due to
+ * non-deterministic IRQ-handler execution latency the DMA Tx channel will
+ * start pushing data to the SPI bus before the Rx DMA channel is even
+ * reinitialized with the next inbound SG list entry. By doing so the DMA Tx
+ * channel will implicitly start filling the DW APB SSI Rx FIFO up, which while
+ * the DMA Rx channel being recharged and re-executed will eventually be
+ * overflown.
+ *
+ * In order to solve the problem we have to feed the DMA engine with SG list
+ * entries one-by-one. It shall keep the DW APB SSI Tx and Rx FIFOs
+ * synchronized and prevent the Rx FIFO overflow. Since in general the tx_sg
+ * and rx_sg lists may have different number of entries of different lengths
+ * (though total length should match) let's virtually split the SG-lists to the
+ * set of DMA transfers, which length is a minimum of the ordered SG-entries
+ * lengths. An ASCII-sketch of the implemented algo is following:
+ *                  xfer->len
+ *                |___________|
+ * tx_sg list:    |___|____|__|
+ * rx_sg list:    |_|____|____|
+ * DMA transfers: |_|_|__|_|__|
+ *
+ * Note in order to have this workaround solving the denoted problem the DMA
+ * engine driver should properly initialize the max_sg_burst capability and set
+ * the DMA device max segment size parameter with maximum data block size the
+ * DMA engine supports.
+ */
+
+static int dw_spi_dma_transfer_one(struct dw_spi *dws,
+				   struct spi_transfer *xfer)
+{
+	struct scatterlist *tx_sg = NULL, *rx_sg = NULL, tx_tmp, rx_tmp;
+	unsigned int tx_len = 0, rx_len = 0;
+	unsigned int base, len;
+	int ret;
+
+	sg_init_table(&tx_tmp, 1);
+	sg_init_table(&rx_tmp, 1);
+
+	for (base = 0, len = 0; base < xfer->len; base += len) {
+		/* Fetch next Tx DMA data chunk */
+		if (!tx_len) {
+			tx_sg = !tx_sg ? &xfer->tx_sg.sgl[0] : sg_next(tx_sg);
+			sg_dma_address(&tx_tmp) = sg_dma_address(tx_sg);
+			tx_len = sg_dma_len(tx_sg);
+		}
+
+		/* Fetch next Rx DMA data chunk */
+		if (!rx_len) {
+			rx_sg = !rx_sg ? &xfer->rx_sg.sgl[0] : sg_next(rx_sg);
+			sg_dma_address(&rx_tmp) = sg_dma_address(rx_sg);
+			rx_len = sg_dma_len(rx_sg);
+		}
+
+		len = min(tx_len, rx_len);
+
+		sg_dma_len(&tx_tmp) = len;
+		sg_dma_len(&rx_tmp) = len;
+
+		/* Submit DMA Tx transfer */
+		ret = dw_spi_dma_submit_tx(dws, &tx_tmp, 1);
+		if (ret)
+			break;
+
+		/* Submit DMA Rx transfer */
+		ret = dw_spi_dma_submit_rx(dws, &rx_tmp, 1);
+		if (ret)
+			break;
+
+		/* Rx must be started before Tx due to SPI instinct */
+		dma_async_issue_pending(dws->rxchan);
+
+		dma_async_issue_pending(dws->txchan);
+
+		/*
+		 * Here we only need to wait for the DMA transfer to be
+		 * finished since SPI controller is kept enabled during the
+		 * procedure this loop implements and there is no risk to lose
+		 * data left in the Tx/Rx FIFOs.
+		 */
+		ret = dw_spi_dma_wait(dws, len, xfer->effective_speed_hz);
+		if (ret)
+			break;
+
+		reinit_completion(&dws->dma_completion);
+
+		sg_dma_address(&tx_tmp) += len;
+		sg_dma_address(&rx_tmp) += len;
+		tx_len -= len;
+		rx_len -= len;
+	}
+
+	dw_writel(dws, DW_SPI_DMACR, 0);
+
+	return ret;
+}
+
+static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
+{
+	unsigned int nents;
+	int ret;
+
+	nents = max(xfer->tx_sg.nents, xfer->rx_sg.nents);
+
+	/*
+	 * Execute normal DMA-based transfer (which submits the Rx and Tx SG
+	 * lists directly to the DMA engine at once) if either full hardware
+	 * accelerated SG list traverse is supported by both channels, or the
+	 * Tx-only SPI transfer is requested, or the DMA engine is capable to
+	 * handle both SG lists on hardware accelerated basis.
+	 */
+	if (!dws->dma_sg_burst || !xfer->rx_buf || nents <= dws->dma_sg_burst)
+		ret = dw_spi_dma_transfer_all(dws, xfer);
+	else
+		ret = dw_spi_dma_transfer_one(dws, xfer);
+	if (ret)
+		return ret;
+
+	if (dws->master->cur_msg->status == -EINPROGRESS) {
+		ret = dw_spi_dma_wait_tx_done(dws, xfer);
+		if (ret)
+			return ret;
+	}
+
+	if (xfer->rx_buf && dws->master->cur_msg->status == -EINPROGRESS)
+		ret = dw_spi_dma_wait_rx_done(dws);
+
+	return ret;
+}
+
+static void dw_spi_dma_stop(struct dw_spi *dws)
+{
+	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_sync(dws->txchan);
+		clear_bit(TX_BUSY, &dws->dma_chan_busy);
+	}
+	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
+		dmaengine_terminate_sync(dws->rxchan);
+		clear_bit(RX_BUSY, &dws->dma_chan_busy);
+	}
+}
+
+static const struct dw_spi_dma_ops dw_spi_dma_mfld_ops = {
+	.dma_init	= dw_spi_dma_init_mfld,
+	.dma_exit	= dw_spi_dma_exit,
+	.dma_setup	= dw_spi_dma_setup,
+	.can_dma	= dw_spi_can_dma,
+	.dma_transfer	= dw_spi_dma_transfer,
+	.dma_stop	= dw_spi_dma_stop,
+};
+
+void dw_spi_dma_setup_mfld(struct dw_spi *dws)
+{
+	dws->dma_ops = &dw_spi_dma_mfld_ops;
+}
+EXPORT_SYMBOL_GPL(dw_spi_dma_setup_mfld);
+
+static const struct dw_spi_dma_ops dw_spi_dma_generic_ops = {
+	.dma_init	= dw_spi_dma_init_generic,
+	.dma_exit	= dw_spi_dma_exit,
+	.dma_setup	= dw_spi_dma_setup,
+	.can_dma	= dw_spi_can_dma,
+	.dma_transfer	= dw_spi_dma_transfer,
+	.dma_stop	= dw_spi_dma_stop,
+};
+
+void dw_spi_dma_setup_generic(struct dw_spi *dws)
+{
+	dws->dma_ops = &dw_spi_dma_generic_ops;
+}
+EXPORT_SYMBOL_GPL(dw_spi_dma_setup_generic);
diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c
deleted file mode 100644
index b07710c..0000000
--- a/drivers/spi/spi-dw-mid.c
+++ /dev/null
@@ -1,328 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Special handling for DW core on Intel MID platform
- *
- * Copyright (c) 2009, 2014 Intel Corporation.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/spi/spi.h>
-#include <linux/types.h>
-
-#include "spi-dw.h"
-
-#ifdef CONFIG_SPI_DW_MID_DMA
-#include <linux/pci.h>
-#include <linux/platform_data/dma-dw.h>
-
-#define RX_BUSY		0
-#define TX_BUSY		1
-
-static struct dw_dma_slave mid_dma_tx = { .dst_id = 1 };
-static struct dw_dma_slave mid_dma_rx = { .src_id = 0 };
-
-static bool mid_spi_dma_chan_filter(struct dma_chan *chan, void *param)
-{
-	struct dw_dma_slave *s = param;
-
-	if (s->dma_dev != chan->device->dev)
-		return false;
-
-	chan->private = s;
-	return true;
-}
-
-static int mid_spi_dma_init(struct dw_spi *dws)
-{
-	struct pci_dev *dma_dev;
-	struct dw_dma_slave *tx = dws->dma_tx;
-	struct dw_dma_slave *rx = dws->dma_rx;
-	dma_cap_mask_t mask;
-
-	/*
-	 * Get pci device for DMA controller, currently it could only
-	 * be the DMA controller of Medfield
-	 */
-	dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL);
-	if (!dma_dev)
-		return -ENODEV;
-
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
-
-	/* 1. Init rx channel */
-	rx->dma_dev = &dma_dev->dev;
-	dws->rxchan = dma_request_channel(mask, mid_spi_dma_chan_filter, rx);
-	if (!dws->rxchan)
-		goto err_exit;
-	dws->master->dma_rx = dws->rxchan;
-
-	/* 2. Init tx channel */
-	tx->dma_dev = &dma_dev->dev;
-	dws->txchan = dma_request_channel(mask, mid_spi_dma_chan_filter, tx);
-	if (!dws->txchan)
-		goto free_rxchan;
-	dws->master->dma_tx = dws->txchan;
-
-	dws->dma_inited = 1;
-	return 0;
-
-free_rxchan:
-	dma_release_channel(dws->rxchan);
-err_exit:
-	return -EBUSY;
-}
-
-static void mid_spi_dma_exit(struct dw_spi *dws)
-{
-	if (!dws->dma_inited)
-		return;
-
-	dmaengine_terminate_sync(dws->txchan);
-	dma_release_channel(dws->txchan);
-
-	dmaengine_terminate_sync(dws->rxchan);
-	dma_release_channel(dws->rxchan);
-}
-
-static irqreturn_t dma_transfer(struct dw_spi *dws)
-{
-	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
-
-	if (!irq_status)
-		return IRQ_NONE;
-
-	dw_readl(dws, DW_SPI_ICR);
-	spi_reset_chip(dws);
-
-	dev_err(&dws->master->dev, "%s: FIFO overrun/underrun\n", __func__);
-	dws->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(dws->master);
-	return IRQ_HANDLED;
-}
-
-static bool mid_spi_can_dma(struct spi_controller *master,
-		struct spi_device *spi, struct spi_transfer *xfer)
-{
-	struct dw_spi *dws = spi_controller_get_devdata(master);
-
-	if (!dws->dma_inited)
-		return false;
-
-	return xfer->len > dws->fifo_len;
-}
-
-static enum dma_slave_buswidth convert_dma_width(u32 dma_width) {
-	if (dma_width == 1)
-		return DMA_SLAVE_BUSWIDTH_1_BYTE;
-	else if (dma_width == 2)
-		return DMA_SLAVE_BUSWIDTH_2_BYTES;
-
-	return DMA_SLAVE_BUSWIDTH_UNDEFINED;
-}
-
-/*
- * dws->dma_chan_busy is set before the dma transfer starts, callback for tx
- * channel will clear a corresponding bit.
- */
-static void dw_spi_dma_tx_done(void *arg)
-{
-	struct dw_spi *dws = arg;
-
-	clear_bit(TX_BUSY, &dws->dma_chan_busy);
-	if (test_bit(RX_BUSY, &dws->dma_chan_busy))
-		return;
-	spi_finalize_current_transfer(dws->master);
-}
-
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws,
-		struct spi_transfer *xfer)
-{
-	struct dma_slave_config txconf;
-	struct dma_async_tx_descriptor *txdesc;
-
-	if (!xfer->tx_buf)
-		return NULL;
-
-	memset(&txconf, 0, sizeof(txconf));
-	txconf.direction = DMA_MEM_TO_DEV;
-	txconf.dst_addr = dws->dma_addr;
-	txconf.dst_maxburst = 16;
-	txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	txconf.dst_addr_width = convert_dma_width(dws->dma_width);
-	txconf.device_fc = false;
-
-	dmaengine_slave_config(dws->txchan, &txconf);
-
-	txdesc = dmaengine_prep_slave_sg(dws->txchan,
-				xfer->tx_sg.sgl,
-				xfer->tx_sg.nents,
-				DMA_MEM_TO_DEV,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!txdesc)
-		return NULL;
-
-	txdesc->callback = dw_spi_dma_tx_done;
-	txdesc->callback_param = dws;
-
-	return txdesc;
-}
-
-/*
- * dws->dma_chan_busy is set before the dma transfer starts, callback for rx
- * channel will clear a corresponding bit.
- */
-static void dw_spi_dma_rx_done(void *arg)
-{
-	struct dw_spi *dws = arg;
-
-	clear_bit(RX_BUSY, &dws->dma_chan_busy);
-	if (test_bit(TX_BUSY, &dws->dma_chan_busy))
-		return;
-	spi_finalize_current_transfer(dws->master);
-}
-
-static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws,
-		struct spi_transfer *xfer)
-{
-	struct dma_slave_config rxconf;
-	struct dma_async_tx_descriptor *rxdesc;
-
-	if (!xfer->rx_buf)
-		return NULL;
-
-	memset(&rxconf, 0, sizeof(rxconf));
-	rxconf.direction = DMA_DEV_TO_MEM;
-	rxconf.src_addr = dws->dma_addr;
-	rxconf.src_maxburst = 16;
-	rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	rxconf.src_addr_width = convert_dma_width(dws->dma_width);
-	rxconf.device_fc = false;
-
-	dmaengine_slave_config(dws->rxchan, &rxconf);
-
-	rxdesc = dmaengine_prep_slave_sg(dws->rxchan,
-				xfer->rx_sg.sgl,
-				xfer->rx_sg.nents,
-				DMA_DEV_TO_MEM,
-				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!rxdesc)
-		return NULL;
-
-	rxdesc->callback = dw_spi_dma_rx_done;
-	rxdesc->callback_param = dws;
-
-	return rxdesc;
-}
-
-static int mid_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer)
-{
-	u16 imr = 0, dma_ctrl = 0;
-
-	dw_writel(dws, DW_SPI_DMARDLR, 0xf);
-	dw_writel(dws, DW_SPI_DMATDLR, 0x10);
-
-	if (xfer->tx_buf) {
-		dma_ctrl |= SPI_DMA_TDMAE;
-		imr |= SPI_INT_TXOI;
-	}
-	if (xfer->rx_buf) {
-		dma_ctrl |= SPI_DMA_RDMAE;
-		imr |= SPI_INT_RXUI | SPI_INT_RXOI;
-	}
-	dw_writel(dws, DW_SPI_DMACR, dma_ctrl);
-
-	/* Set the interrupt mask */
-	spi_umask_intr(dws, imr);
-
-	dws->transfer_handler = dma_transfer;
-
-	return 0;
-}
-
-static int mid_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer)
-{
-	struct dma_async_tx_descriptor *txdesc, *rxdesc;
-
-	/* Prepare the TX dma transfer */
-	txdesc = dw_spi_dma_prepare_tx(dws, xfer);
-
-	/* Prepare the RX dma transfer */
-	rxdesc = dw_spi_dma_prepare_rx(dws, xfer);
-
-	/* rx must be started before tx due to spi instinct */
-	if (rxdesc) {
-		set_bit(RX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(dws->rxchan);
-	}
-
-	if (txdesc) {
-		set_bit(TX_BUSY, &dws->dma_chan_busy);
-		dmaengine_submit(txdesc);
-		dma_async_issue_pending(dws->txchan);
-	}
-
-	return 1;
-}
-
-static void mid_spi_dma_stop(struct dw_spi *dws)
-{
-	if (test_bit(TX_BUSY, &dws->dma_chan_busy)) {
-		dmaengine_terminate_sync(dws->txchan);
-		clear_bit(TX_BUSY, &dws->dma_chan_busy);
-	}
-	if (test_bit(RX_BUSY, &dws->dma_chan_busy)) {
-		dmaengine_terminate_sync(dws->rxchan);
-		clear_bit(RX_BUSY, &dws->dma_chan_busy);
-	}
-}
-
-static const struct dw_spi_dma_ops mid_dma_ops = {
-	.dma_init	= mid_spi_dma_init,
-	.dma_exit	= mid_spi_dma_exit,
-	.dma_setup	= mid_spi_dma_setup,
-	.can_dma	= mid_spi_can_dma,
-	.dma_transfer	= mid_spi_dma_transfer,
-	.dma_stop	= mid_spi_dma_stop,
-};
-#endif
-
-/* Some specific info for SPI0 controller on Intel MID */
-
-/* HW info for MRST Clk Control Unit, 32b reg per controller */
-#define MRST_SPI_CLK_BASE	100000000	/* 100m */
-#define MRST_CLK_SPI_REG	0xff11d86c
-#define CLK_SPI_BDIV_OFFSET	0
-#define CLK_SPI_BDIV_MASK	0x00000007
-#define CLK_SPI_CDIV_OFFSET	9
-#define CLK_SPI_CDIV_MASK	0x00000e00
-#define CLK_SPI_DISABLE_OFFSET	8
-
-int dw_spi_mid_init(struct dw_spi *dws)
-{
-	void __iomem *clk_reg;
-	u32 clk_cdiv;
-
-	clk_reg = ioremap_nocache(MRST_CLK_SPI_REG, 16);
-	if (!clk_reg)
-		return -ENOMEM;
-
-	/* Get SPI controller operating freq info */
-	clk_cdiv = readl(clk_reg + dws->bus_num * sizeof(u32));
-	clk_cdiv &= CLK_SPI_CDIV_MASK;
-	clk_cdiv >>= CLK_SPI_CDIV_OFFSET;
-	dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1);
-
-	iounmap(clk_reg);
-
-#ifdef CONFIG_SPI_DW_MID_DMA
-	dws->dma_tx = &mid_dma_tx;
-	dws->dma_rx = &mid_dma_rx;
-	dws->dma_ops = &mid_dma_ops;
-#endif
-	return 0;
-}
diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
index bd46fca..d0cc5bf 100644
--- a/drivers/spi/spi-dw-mmio.c
+++ b/drivers/spi/spi-dw-mmio.c
@@ -7,8 +7,8 @@
 
 #include <linux/clk.h>
 #include <linux/err.h>
-#include <linux/interrupt.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/scatterlist.h>
@@ -19,6 +19,7 @@
 #include <linux/acpi.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
+#include <linux/reset.h>
 
 #include "spi-dw.h"
 
@@ -29,6 +30,7 @@
 	struct clk     *clk;
 	struct clk     *pclk;
 	void           *priv;
+	struct reset_control *rstc;
 };
 
 #define MSCC_CPU_SYSTEM_CTRL_GENERAL_CTRL	0x24
@@ -43,9 +45,12 @@
 #define MSCC_SPI_MST_SW_MODE_SW_PIN_CTRL_MODE	BIT(13)
 #define MSCC_SPI_MST_SW_MODE_SW_SPI_CS(x)	(x << 5)
 
+#define SPARX5_FORCE_ENA			0xa4
+#define SPARX5_FORCE_VAL			0xa8
+
 struct dw_spi_mscc {
 	struct regmap       *syscon;
-	void __iomem        *spi_mst;
+	void __iomem        *spi_mst; /* Not sparx5 */
 };
 
 /*
@@ -122,10 +127,97 @@
 				JAGUAR2_IF_SI_OWNER_OFFSET);
 }
 
+/*
+ * The Designware SPI controller (referred to as master in the
+ * documentation) automatically deasserts chip select when the tx fifo
+ * is empty. The chip selects then needs to be driven by a CS override
+ * register. enable is an active low signal.
+ */
+static void dw_spi_sparx5_set_cs(struct spi_device *spi, bool enable)
+{
+	struct dw_spi *dws = spi_master_get_devdata(spi->master);
+	struct dw_spi_mmio *dwsmmio = container_of(dws, struct dw_spi_mmio, dws);
+	struct dw_spi_mscc *dwsmscc = dwsmmio->priv;
+	u8 cs = spi->chip_select;
+
+	if (!enable) {
+		/* CS override drive enable */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_ENA, 1);
+		/* Now set CSx enabled */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_VAL, ~BIT(cs));
+		/* Allow settle */
+		usleep_range(1, 5);
+	} else {
+		/* CS value */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_VAL, ~0);
+		/* Allow settle */
+		usleep_range(1, 5);
+		/* CS override drive disable */
+		regmap_write(dwsmscc->syscon, SPARX5_FORCE_ENA, 0);
+	}
+
+	dw_spi_set_cs(spi, enable);
+}
+
+static int dw_spi_mscc_sparx5_init(struct platform_device *pdev,
+				   struct dw_spi_mmio *dwsmmio)
+{
+	const char *syscon_name = "microchip,sparx5-cpu-syscon";
+	struct device *dev = &pdev->dev;
+	struct dw_spi_mscc *dwsmscc;
+
+	if (!IS_ENABLED(CONFIG_SPI_MUX)) {
+		dev_err(dev, "This driver needs CONFIG_SPI_MUX\n");
+		return -EOPNOTSUPP;
+	}
+
+	dwsmscc = devm_kzalloc(dev, sizeof(*dwsmscc), GFP_KERNEL);
+	if (!dwsmscc)
+		return -ENOMEM;
+
+	dwsmscc->syscon =
+		syscon_regmap_lookup_by_compatible(syscon_name);
+	if (IS_ERR(dwsmscc->syscon)) {
+		dev_err(dev, "No syscon map %s\n", syscon_name);
+		return PTR_ERR(dwsmscc->syscon);
+	}
+
+	dwsmmio->dws.set_cs = dw_spi_sparx5_set_cs;
+	dwsmmio->priv = dwsmscc;
+
+	return 0;
+}
+
 static int dw_spi_alpine_init(struct platform_device *pdev,
 			      struct dw_spi_mmio *dwsmmio)
 {
-	dwsmmio->dws.cs_override = 1;
+	dwsmmio->dws.caps = DW_SPI_CAP_CS_OVERRIDE;
+
+	return 0;
+}
+
+static int dw_spi_dw_apb_init(struct platform_device *pdev,
+			      struct dw_spi_mmio *dwsmmio)
+{
+	dw_spi_dma_setup_generic(&dwsmmio->dws);
+
+	return 0;
+}
+
+static int dw_spi_dwc_ssi_init(struct platform_device *pdev,
+			       struct dw_spi_mmio *dwsmmio)
+{
+	dwsmmio->dws.caps = DW_SPI_CAP_DWC_SSI;
+
+	dw_spi_dma_setup_generic(&dwsmmio->dws);
+
+	return 0;
+}
+
+static int dw_spi_keembay_init(struct platform_device *pdev,
+			       struct dw_spi_mmio *dwsmmio)
+{
+	dwsmmio->dws.caps = DW_SPI_CAP_KEEMBAY_MST | DW_SPI_CAP_DWC_SSI;
 
 	return 0;
 }
@@ -135,6 +227,7 @@
 	int (*init_func)(struct platform_device *pdev,
 			 struct dw_spi_mmio *dwsmmio);
 	struct dw_spi_mmio *dwsmmio;
+	struct resource *mem;
 	struct dw_spi *dws;
 	int ret;
 	int num_cs;
@@ -147,11 +240,11 @@
 	dws = &dwsmmio->dws;
 
 	/* Get basic io resource and map it */
-	dws->regs = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(dws->regs)) {
-		dev_err(&pdev->dev, "SPI region map failed\n");
+	dws->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &mem);
+	if (IS_ERR(dws->regs))
 		return PTR_ERR(dws->regs);
-	}
+
+	dws->paddr = mem->start;
 
 	dws->irq = platform_get_irq(pdev, 0);
 	if (dws->irq < 0)
@@ -174,6 +267,14 @@
 	if (ret)
 		goto out_clk;
 
+	/* find an optional reset controller */
+	dwsmmio->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, "spi");
+	if (IS_ERR(dwsmmio->rstc)) {
+		ret = PTR_ERR(dwsmmio->rstc);
+		goto out_clk;
+	}
+	reset_control_deassert(dwsmmio->rstc);
+
 	dws->bus_num = pdev->id;
 
 	dws->max_freq = clk_get_rate(dwsmmio->clk);
@@ -193,6 +294,8 @@
 			goto out;
 	}
 
+	pm_runtime_enable(&pdev->dev);
+
 	ret = dw_spi_add_host(&pdev->dev, dws);
 	if (ret)
 		goto out;
@@ -201,9 +304,12 @@
 	return 0;
 
 out:
+	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(dwsmmio->pclk);
 out_clk:
 	clk_disable_unprepare(dwsmmio->clk);
+	reset_control_assert(dwsmmio->rstc);
+
 	return ret;
 }
 
@@ -212,26 +318,34 @@
 	struct dw_spi_mmio *dwsmmio = platform_get_drvdata(pdev);
 
 	dw_spi_remove_host(&dwsmmio->dws);
+	pm_runtime_disable(&pdev->dev);
 	clk_disable_unprepare(dwsmmio->pclk);
 	clk_disable_unprepare(dwsmmio->clk);
+	reset_control_assert(dwsmmio->rstc);
 
 	return 0;
 }
 
 static const struct of_device_id dw_spi_mmio_of_match[] = {
-	{ .compatible = "snps,dw-apb-ssi", },
+	{ .compatible = "snps,dw-apb-ssi", .data = dw_spi_dw_apb_init},
 	{ .compatible = "mscc,ocelot-spi", .data = dw_spi_mscc_ocelot_init},
 	{ .compatible = "mscc,jaguar2-spi", .data = dw_spi_mscc_jaguar2_init},
 	{ .compatible = "amazon,alpine-dw-apb-ssi", .data = dw_spi_alpine_init},
+	{ .compatible = "renesas,rzn1-spi", .data = dw_spi_dw_apb_init},
+	{ .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_dwc_ssi_init},
+	{ .compatible = "intel,keembay-ssi", .data = dw_spi_keembay_init},
+	{ .compatible = "microchip,sparx5-spi", dw_spi_mscc_sparx5_init},
 	{ /* end of table */}
 };
 MODULE_DEVICE_TABLE(of, dw_spi_mmio_of_match);
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id dw_spi_mmio_acpi_match[] = {
-	{"HISI0173", 0},
+	{"HISI0173", (kernel_ulong_t)dw_spi_dw_apb_init},
 	{},
 };
 MODULE_DEVICE_TABLE(acpi, dw_spi_mmio_acpi_match);
+#endif
 
 static struct platform_driver dw_spi_mmio_driver = {
 	.probe		= dw_spi_mmio_probe,
diff --git a/drivers/spi/spi-dw-pci.c b/drivers/spi/spi-dw-pci.c
index 1406449..8a91cd5 100644
--- a/drivers/spi/spi-dw-pci.c
+++ b/drivers/spi/spi-dw-pci.c
@@ -5,8 +5,8 @@
  * Copyright (c) 2009, 2014 Intel Corporation.
  */
 
-#include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/module.h>
@@ -15,6 +15,15 @@
 
 #define DRIVER_NAME "dw_spi_pci"
 
+/* HW info for MRST Clk Control Unit, 32b reg per controller */
+#define MRST_SPI_CLK_BASE	100000000	/* 100m */
+#define MRST_CLK_SPI_REG	0xff11d86c
+#define CLK_SPI_BDIV_OFFSET	0
+#define CLK_SPI_BDIV_MASK	0x00000007
+#define CLK_SPI_CDIV_OFFSET	9
+#define CLK_SPI_CDIV_MASK	0x00000e00
+#define CLK_SPI_DISABLE_OFFSET	8
+
 struct spi_pci_desc {
 	int	(*setup)(struct dw_spi *);
 	u16	num_cs;
@@ -22,20 +31,50 @@
 	u32	max_freq;
 };
 
+static int spi_mid_init(struct dw_spi *dws)
+{
+	void __iomem *clk_reg;
+	u32 clk_cdiv;
+
+	clk_reg = ioremap(MRST_CLK_SPI_REG, 16);
+	if (!clk_reg)
+		return -ENOMEM;
+
+	/* Get SPI controller operating freq info */
+	clk_cdiv = readl(clk_reg + dws->bus_num * sizeof(u32));
+	clk_cdiv &= CLK_SPI_CDIV_MASK;
+	clk_cdiv >>= CLK_SPI_CDIV_OFFSET;
+	dws->max_freq = MRST_SPI_CLK_BASE / (clk_cdiv + 1);
+
+	iounmap(clk_reg);
+
+	dw_spi_dma_setup_mfld(dws);
+
+	return 0;
+}
+
+static int spi_generic_init(struct dw_spi *dws)
+{
+	dw_spi_dma_setup_generic(dws);
+
+	return 0;
+}
+
 static struct spi_pci_desc spi_pci_mid_desc_1 = {
-	.setup = dw_spi_mid_init,
+	.setup = spi_mid_init,
 	.num_cs = 5,
 	.bus_num = 0,
 };
 
 static struct spi_pci_desc spi_pci_mid_desc_2 = {
-	.setup = dw_spi_mid_init,
+	.setup = spi_mid_init,
 	.num_cs = 2,
 	.bus_num = 1,
 };
 
 static struct spi_pci_desc spi_pci_ehl_desc = {
-	.num_cs = 1,
+	.setup = spi_generic_init,
+	.num_cs = 2,
 	.bus_num = -1,
 	.max_freq = 100000000,
 };
@@ -57,13 +96,18 @@
 
 	/* Get basic io resource and map it */
 	dws->paddr = pci_resource_start(pdev, pci_bar);
+	pci_set_master(pdev);
 
 	ret = pcim_iomap_regions(pdev, 1 << pci_bar, pci_name(pdev));
 	if (ret)
 		return ret;
 
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		return ret;
+
 	dws->regs = pcim_iomap_table(pdev)[pci_bar];
-	dws->irq = pdev->irq;
+	dws->irq = pci_irq_vector(pdev, 0);
 
 	/*
 	 * Specific handling for platforms, like dma setup,
@@ -77,15 +121,16 @@
 		if (desc->setup) {
 			ret = desc->setup(dws);
 			if (ret)
-				return ret;
+				goto err_free_irq_vectors;
 		}
 	} else {
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err_free_irq_vectors;
 	}
 
 	ret = dw_spi_add_host(&pdev->dev, dws);
 	if (ret)
-		return ret;
+		goto err_free_irq_vectors;
 
 	/* PCI hook and SPI hook use the same drv data */
 	pci_set_drvdata(pdev, dws);
@@ -93,14 +138,27 @@
 	dev_info(&pdev->dev, "found PCI SPI controller(ID: %04x:%04x)\n",
 		pdev->vendor, pdev->device);
 
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 1000);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
+	pm_runtime_allow(&pdev->dev);
+
 	return 0;
+
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+	return ret;
 }
 
 static void spi_pci_remove(struct pci_dev *pdev)
 {
 	struct dw_spi *dws = pci_get_drvdata(pdev);
 
+	pm_runtime_forbid(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+
 	dw_spi_remove_host(dws);
+	pci_free_irq_vectors(pdev);
 }
 
 #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
deleted file mode 100644
index c2f9694..0000000
--- a/drivers/spi/spi-dw.c
+++ /dev/null
@@ -1,590 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Designware SPI core controller driver (refer pxa2xx_spi.c)
- *
- * Copyright (c) 2009, Intel Corporation.
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/highmem.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/spi/spi.h>
-
-#include "spi-dw.h"
-
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
-#endif
-
-/* Slave spi_dev related */
-struct chip_data {
-	u8 tmode;		/* TR/TO/RO/EEPROM */
-	u8 type;		/* SPI/SSP/MicroWire */
-
-	u8 poll_mode;		/* 1 means use poll mode */
-
-	u16 clk_div;		/* baud rate divider */
-	u32 speed_hz;		/* baud rate */
-	void (*cs_control)(u32 command);
-};
-
-#ifdef CONFIG_DEBUG_FS
-#define SPI_REGS_BUFSIZE	1024
-static ssize_t dw_spi_show_regs(struct file *file, char __user *user_buf,
-		size_t count, loff_t *ppos)
-{
-	struct dw_spi *dws = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(SPI_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"%s registers:\n", dev_name(&dws->master->dev));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"CTRL0: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL0));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"CTRL1: \t\t0x%08x\n", dw_readl(dws, DW_SPI_CTRL1));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SSIENR: \t0x%08x\n", dw_readl(dws, DW_SPI_SSIENR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SER: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SER));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"BAUDR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_BAUDR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"TXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_TXFLTR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"RXFTLR: \t0x%08x\n", dw_readl(dws, DW_SPI_RXFLTR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"TXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_TXFLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"RXFLR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_RXFLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"SR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_SR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"IMR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_IMR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"ISR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_ISR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMACR: \t\t0x%08x\n", dw_readl(dws, DW_SPI_DMACR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMATDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMATDLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"DMARDLR: \t0x%08x\n", dw_readl(dws, DW_SPI_DMARDLR));
-	len += scnprintf(buf + len, SPI_REGS_BUFSIZE - len,
-			"=================================\n");
-
-	ret = simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
-}
-
-static const struct file_operations dw_spi_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= dw_spi_show_regs,
-	.llseek		= default_llseek,
-};
-
-static int dw_spi_debugfs_init(struct dw_spi *dws)
-{
-	char name[32];
-
-	snprintf(name, 32, "dw_spi%d", dws->master->bus_num);
-	dws->debugfs = debugfs_create_dir(name, NULL);
-	if (!dws->debugfs)
-		return -ENOMEM;
-
-	debugfs_create_file("registers", S_IFREG | S_IRUGO,
-		dws->debugfs, (void *)dws, &dw_spi_regs_ops);
-	return 0;
-}
-
-static void dw_spi_debugfs_remove(struct dw_spi *dws)
-{
-	debugfs_remove_recursive(dws->debugfs);
-}
-
-#else
-static inline int dw_spi_debugfs_init(struct dw_spi *dws)
-{
-	return 0;
-}
-
-static inline void dw_spi_debugfs_remove(struct dw_spi *dws)
-{
-}
-#endif /* CONFIG_DEBUG_FS */
-
-void dw_spi_set_cs(struct spi_device *spi, bool enable)
-{
-	struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
-	struct chip_data *chip = spi_get_ctldata(spi);
-	bool cs_high = !!(spi->mode & SPI_CS_HIGH);
-
-	/* Chip select logic is inverted from spi_set_cs() */
-	if (chip && chip->cs_control)
-		chip->cs_control(!enable);
-
-	/*
-	 * DW SPI controller demands any native CS being set in order to
-	 * proceed with data transfer. So in order to activate the SPI
-	 * communications we must set a corresponding bit in the Slave
-	 * Enable register no matter whether the SPI core is configured to
-	 * support active-high or active-low CS level.
-	 */
-	if (cs_high == enable)
-		dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
-	else if (dws->cs_override)
-		dw_writel(dws, DW_SPI_SER, 0);
-}
-EXPORT_SYMBOL_GPL(dw_spi_set_cs);
-
-/* Return the max entries we can fill into tx fifo */
-static inline u32 tx_max(struct dw_spi *dws)
-{
-	u32 tx_left, tx_room, rxtx_gap;
-
-	tx_left = (dws->tx_end - dws->tx) / dws->n_bytes;
-	tx_room = dws->fifo_len - dw_readl(dws, DW_SPI_TXFLR);
-
-	/*
-	 * Another concern is about the tx/rx mismatch, we
-	 * though to use (dws->fifo_len - rxflr - txflr) as
-	 * one maximum value for tx, but it doesn't cover the
-	 * data which is out of tx/rx fifo and inside the
-	 * shift registers. So a control from sw point of
-	 * view is taken.
-	 */
-	rxtx_gap =  ((dws->rx_end - dws->rx) - (dws->tx_end - dws->tx))
-			/ dws->n_bytes;
-
-	return min3(tx_left, tx_room, (u32) (dws->fifo_len - rxtx_gap));
-}
-
-/* Return the max entries we should read out of rx fifo */
-static inline u32 rx_max(struct dw_spi *dws)
-{
-	u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes;
-
-	return min_t(u32, rx_left, dw_readl(dws, DW_SPI_RXFLR));
-}
-
-static void dw_writer(struct dw_spi *dws)
-{
-	u32 max;
-	u16 txw = 0;
-
-	spin_lock(&dws->buf_lock);
-	max = tx_max(dws);
-	while (max--) {
-		/* Set the tx word if the transfer's original "tx" is not null */
-		if (dws->tx_end - dws->len) {
-			if (dws->n_bytes == 1)
-				txw = *(u8 *)(dws->tx);
-			else
-				txw = *(u16 *)(dws->tx);
-		}
-		dw_write_io_reg(dws, DW_SPI_DR, txw);
-		dws->tx += dws->n_bytes;
-	}
-	spin_unlock(&dws->buf_lock);
-}
-
-static void dw_reader(struct dw_spi *dws)
-{
-	u32 max;
-	u16 rxw;
-
-	spin_lock(&dws->buf_lock);
-	max = rx_max(dws);
-	while (max--) {
-		rxw = dw_read_io_reg(dws, DW_SPI_DR);
-		/* Care rx only if the transfer's original "rx" is not null */
-		if (dws->rx_end - dws->len) {
-			if (dws->n_bytes == 1)
-				*(u8 *)(dws->rx) = rxw;
-			else
-				*(u16 *)(dws->rx) = rxw;
-		}
-		dws->rx += dws->n_bytes;
-	}
-	spin_unlock(&dws->buf_lock);
-}
-
-static void int_error_stop(struct dw_spi *dws, const char *msg)
-{
-	spi_reset_chip(dws);
-
-	dev_err(&dws->master->dev, "%s\n", msg);
-	dws->master->cur_msg->status = -EIO;
-	spi_finalize_current_transfer(dws->master);
-}
-
-static irqreturn_t interrupt_transfer(struct dw_spi *dws)
-{
-	u16 irq_status = dw_readl(dws, DW_SPI_ISR);
-
-	/* Error handling */
-	if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) {
-		dw_readl(dws, DW_SPI_ICR);
-		int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun");
-		return IRQ_HANDLED;
-	}
-
-	dw_reader(dws);
-	if (dws->rx_end == dws->rx) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
-		spi_finalize_current_transfer(dws->master);
-		return IRQ_HANDLED;
-	}
-	if (irq_status & SPI_INT_TXEI) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
-		dw_writer(dws);
-		/* Enable TX irq always, it will be disabled when RX finished */
-		spi_umask_intr(dws, SPI_INT_TXEI);
-	}
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t dw_spi_irq(int irq, void *dev_id)
-{
-	struct spi_controller *master = dev_id;
-	struct dw_spi *dws = spi_controller_get_devdata(master);
-	u16 irq_status = dw_readl(dws, DW_SPI_ISR) & 0x3f;
-
-	if (!irq_status)
-		return IRQ_NONE;
-
-	if (!master->cur_msg) {
-		spi_mask_intr(dws, SPI_INT_TXEI);
-		return IRQ_HANDLED;
-	}
-
-	return dws->transfer_handler(dws);
-}
-
-/* Must be called inside pump_transfers() */
-static int poll_transfer(struct dw_spi *dws)
-{
-	do {
-		dw_writer(dws);
-		dw_reader(dws);
-		cpu_relax();
-	} while (dws->rx_end > dws->rx);
-
-	return 0;
-}
-
-static int dw_spi_transfer_one(struct spi_controller *master,
-		struct spi_device *spi, struct spi_transfer *transfer)
-{
-	struct dw_spi *dws = spi_controller_get_devdata(master);
-	struct chip_data *chip = spi_get_ctldata(spi);
-	unsigned long flags;
-	u8 imask = 0;
-	u16 txlevel = 0;
-	u32 cr0;
-	int ret;
-
-	dws->dma_mapped = 0;
-	spin_lock_irqsave(&dws->buf_lock, flags);
-	dws->tx = (void *)transfer->tx_buf;
-	dws->tx_end = dws->tx + transfer->len;
-	dws->rx = transfer->rx_buf;
-	dws->rx_end = dws->rx + transfer->len;
-	dws->len = transfer->len;
-	spin_unlock_irqrestore(&dws->buf_lock, flags);
-
-	/* Ensure dw->rx and dw->rx_end are visible */
-	smp_mb();
-
-	spi_enable_chip(dws, 0);
-
-	/* Handle per transfer options for bpw and speed */
-	if (transfer->speed_hz != dws->current_freq) {
-		if (transfer->speed_hz != chip->speed_hz) {
-			/* clk_div doesn't support odd number */
-			chip->clk_div = (DIV_ROUND_UP(dws->max_freq, transfer->speed_hz) + 1) & 0xfffe;
-			chip->speed_hz = transfer->speed_hz;
-		}
-		dws->current_freq = transfer->speed_hz;
-		spi_set_clk(dws, chip->clk_div);
-	}
-
-	dws->n_bytes = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
-	dws->dma_width = DIV_ROUND_UP(transfer->bits_per_word, BITS_PER_BYTE);
-
-	/* Default SPI mode is SCPOL = 0, SCPH = 0 */
-	cr0 = (transfer->bits_per_word - 1)
-		| (chip->type << SPI_FRF_OFFSET)
-		| ((((spi->mode & SPI_CPOL) ? 1 : 0) << SPI_SCOL_OFFSET) |
-			(((spi->mode & SPI_CPHA) ? 1 : 0) << SPI_SCPH_OFFSET) |
-			(((spi->mode & SPI_LOOP) ? 1 : 0) << SPI_SRL_OFFSET))
-		| (chip->tmode << SPI_TMOD_OFFSET);
-
-	/*
-	 * Adjust transfer mode if necessary. Requires platform dependent
-	 * chipselect mechanism.
-	 */
-	if (chip->cs_control) {
-		if (dws->rx && dws->tx)
-			chip->tmode = SPI_TMOD_TR;
-		else if (dws->rx)
-			chip->tmode = SPI_TMOD_RO;
-		else
-			chip->tmode = SPI_TMOD_TO;
-
-		cr0 &= ~SPI_TMOD_MASK;
-		cr0 |= (chip->tmode << SPI_TMOD_OFFSET);
-	}
-
-	dw_writel(dws, DW_SPI_CTRL0, cr0);
-
-	/* Check if current transfer is a DMA transaction */
-	if (master->can_dma && master->can_dma(master, spi, transfer))
-		dws->dma_mapped = master->cur_msg_mapped;
-
-	/* For poll mode just disable all interrupts */
-	spi_mask_intr(dws, 0xff);
-
-	/*
-	 * Interrupt mode
-	 * we only need set the TXEI IRQ, as TX/RX always happen syncronizely
-	 */
-	if (dws->dma_mapped) {
-		ret = dws->dma_ops->dma_setup(dws, transfer);
-		if (ret < 0) {
-			spi_enable_chip(dws, 1);
-			return ret;
-		}
-	} else if (!chip->poll_mode) {
-		txlevel = min_t(u16, dws->fifo_len / 2, dws->len / dws->n_bytes);
-		dw_writel(dws, DW_SPI_TXFLTR, txlevel);
-
-		/* Set the interrupt mask */
-		imask |= SPI_INT_TXEI | SPI_INT_TXOI |
-			 SPI_INT_RXUI | SPI_INT_RXOI;
-		spi_umask_intr(dws, imask);
-
-		dws->transfer_handler = interrupt_transfer;
-	}
-
-	spi_enable_chip(dws, 1);
-
-	if (dws->dma_mapped)
-		return dws->dma_ops->dma_transfer(dws, transfer);
-
-	if (chip->poll_mode)
-		return poll_transfer(dws);
-
-	return 1;
-}
-
-static void dw_spi_handle_err(struct spi_controller *master,
-		struct spi_message *msg)
-{
-	struct dw_spi *dws = spi_controller_get_devdata(master);
-
-	if (dws->dma_mapped)
-		dws->dma_ops->dma_stop(dws);
-
-	spi_reset_chip(dws);
-}
-
-/* This may be called twice for each spi dev */
-static int dw_spi_setup(struct spi_device *spi)
-{
-	struct dw_spi_chip *chip_info = NULL;
-	struct chip_data *chip;
-
-	/* Only alloc on first setup */
-	chip = spi_get_ctldata(spi);
-	if (!chip) {
-		chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL);
-		if (!chip)
-			return -ENOMEM;
-		spi_set_ctldata(spi, chip);
-	}
-
-	/*
-	 * Protocol drivers may change the chip settings, so...
-	 * if chip_info exists, use it
-	 */
-	chip_info = spi->controller_data;
-
-	/* chip_info doesn't always exist */
-	if (chip_info) {
-		if (chip_info->cs_control)
-			chip->cs_control = chip_info->cs_control;
-
-		chip->poll_mode = chip_info->poll_mode;
-		chip->type = chip_info->type;
-	}
-
-	chip->tmode = SPI_TMOD_TR;
-
-	return 0;
-}
-
-static void dw_spi_cleanup(struct spi_device *spi)
-{
-	struct chip_data *chip = spi_get_ctldata(spi);
-
-	kfree(chip);
-	spi_set_ctldata(spi, NULL);
-}
-
-/* Restart the controller, disable all interrupts, clean rx fifo */
-static void spi_hw_init(struct device *dev, struct dw_spi *dws)
-{
-	spi_reset_chip(dws);
-
-	/*
-	 * Try to detect the FIFO depth if not set by interface driver,
-	 * the depth could be from 2 to 256 from HW spec
-	 */
-	if (!dws->fifo_len) {
-		u32 fifo;
-
-		for (fifo = 1; fifo < 256; fifo++) {
-			dw_writel(dws, DW_SPI_TXFLTR, fifo);
-			if (fifo != dw_readl(dws, DW_SPI_TXFLTR))
-				break;
-		}
-		dw_writel(dws, DW_SPI_TXFLTR, 0);
-
-		dws->fifo_len = (fifo == 1) ? 0 : fifo;
-		dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
-	}
-
-	/* enable HW fixup for explicit CS deselect for Amazon's alpine chip */
-	if (dws->cs_override)
-		dw_writel(dws, DW_SPI_CS_OVERRIDE, 0xF);
-}
-
-int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
-{
-	struct spi_controller *master;
-	int ret;
-
-	BUG_ON(dws == NULL);
-
-	master = spi_alloc_master(dev, 0);
-	if (!master)
-		return -ENOMEM;
-
-	dws->master = master;
-	dws->type = SSI_MOTO_SPI;
-	dws->dma_inited = 0;
-	dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
-	spin_lock_init(&dws->buf_lock);
-
-	spi_controller_set_devdata(master, dws);
-
-	ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev),
-			  master);
-	if (ret < 0) {
-		dev_err(dev, "can not get IRQ\n");
-		goto err_free_master;
-	}
-
-	master->use_gpio_descriptors = true;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP;
-	master->bits_per_word_mask =  SPI_BPW_RANGE_MASK(4, 16);
-	master->bus_num = dws->bus_num;
-	master->num_chipselect = dws->num_cs;
-	master->setup = dw_spi_setup;
-	master->cleanup = dw_spi_cleanup;
-	master->set_cs = dw_spi_set_cs;
-	master->transfer_one = dw_spi_transfer_one;
-	master->handle_err = dw_spi_handle_err;
-	master->max_speed_hz = dws->max_freq;
-	master->dev.of_node = dev->of_node;
-	master->dev.fwnode = dev->fwnode;
-	master->flags = SPI_MASTER_GPIO_SS;
-
-	if (dws->set_cs)
-		master->set_cs = dws->set_cs;
-
-	/* Basic HW init */
-	spi_hw_init(dev, dws);
-
-	if (dws->dma_ops && dws->dma_ops->dma_init) {
-		ret = dws->dma_ops->dma_init(dws);
-		if (ret) {
-			dev_warn(dev, "DMA init failed\n");
-			dws->dma_inited = 0;
-		} else {
-			master->can_dma = dws->dma_ops->can_dma;
-			master->flags |= SPI_CONTROLLER_MUST_TX;
-		}
-	}
-
-	ret = spi_register_controller(master);
-	if (ret) {
-		dev_err(&master->dev, "problem registering spi master\n");
-		goto err_dma_exit;
-	}
-
-	dw_spi_debugfs_init(dws);
-	return 0;
-
-err_dma_exit:
-	if (dws->dma_ops && dws->dma_ops->dma_exit)
-		dws->dma_ops->dma_exit(dws);
-	spi_enable_chip(dws, 0);
-	free_irq(dws->irq, master);
-err_free_master:
-	spi_controller_put(master);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dw_spi_add_host);
-
-void dw_spi_remove_host(struct dw_spi *dws)
-{
-	dw_spi_debugfs_remove(dws);
-
-	spi_unregister_controller(dws->master);
-
-	if (dws->dma_ops && dws->dma_ops->dma_exit)
-		dws->dma_ops->dma_exit(dws);
-
-	spi_shutdown_chip(dws);
-
-	free_irq(dws->irq, dws->master);
-}
-EXPORT_SYMBOL_GPL(dw_spi_remove_host);
-
-int dw_spi_suspend_host(struct dw_spi *dws)
-{
-	int ret;
-
-	ret = spi_controller_suspend(dws->master);
-	if (ret)
-		return ret;
-
-	spi_shutdown_chip(dws);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(dw_spi_suspend_host);
-
-int dw_spi_resume_host(struct dw_spi *dws)
-{
-	spi_hw_init(&dws->master->dev, dws);
-	return spi_controller_resume(dws->master);
-}
-EXPORT_SYMBOL_GPL(dw_spi_resume_host);
-
-MODULE_AUTHOR("Feng Tang <feng.tang@intel.com>");
-MODULE_DESCRIPTION("Driver for DesignWare SPI controller core");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index f3a2f15..faf40cb 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -2,19 +2,23 @@
 #ifndef DW_SPI_HEADER_H
 #define DW_SPI_HEADER_H
 
+#include <linux/bits.h>
+#include <linux/completion.h>
+#include <linux/debugfs.h>
+#include <linux/irqreturn.h>
 #include <linux/io.h>
 #include <linux/scatterlist.h>
-#include <linux/gpio.h>
+#include <linux/spi/spi-mem.h>
 
 /* Register offsets */
-#define DW_SPI_CTRL0			0x00
-#define DW_SPI_CTRL1			0x04
+#define DW_SPI_CTRLR0			0x00
+#define DW_SPI_CTRLR1			0x04
 #define DW_SPI_SSIENR			0x08
 #define DW_SPI_MWCR			0x0c
 #define DW_SPI_SER			0x10
 #define DW_SPI_BAUDR			0x14
-#define DW_SPI_TXFLTR			0x18
-#define DW_SPI_RXFLTR			0x1c
+#define DW_SPI_TXFTLR			0x18
+#define DW_SPI_RXFTLR			0x1c
 #define DW_SPI_TXFLR			0x20
 #define DW_SPI_RXFLR			0x24
 #define DW_SPI_SR			0x28
@@ -32,6 +36,7 @@
 #define DW_SPI_IDR			0x58
 #define DW_SPI_VERSION			0x5c
 #define DW_SPI_DR			0x60
+#define DW_SPI_RX_SAMPLE_DLY		0xf0
 #define DW_SPI_CS_OVERRIDE		0xf4
 
 /* Bit fields in CTRLR0 */
@@ -58,6 +63,25 @@
 #define SPI_SRL_OFFSET			11
 #define SPI_CFS_OFFSET			12
 
+/* Bit fields in CTRLR0 based on DWC_ssi_databook.pdf v1.01a */
+#define DWC_SSI_CTRLR0_SRL_OFFSET	13
+#define DWC_SSI_CTRLR0_TMOD_OFFSET	10
+#define DWC_SSI_CTRLR0_TMOD_MASK	GENMASK(11, 10)
+#define DWC_SSI_CTRLR0_SCPOL_OFFSET	9
+#define DWC_SSI_CTRLR0_SCPH_OFFSET	8
+#define DWC_SSI_CTRLR0_FRF_OFFSET	6
+#define DWC_SSI_CTRLR0_DFS_OFFSET	0
+
+/*
+ * For Keem Bay, CTRLR0[31] is used to select controller mode.
+ * 0: SSI is slave
+ * 1: SSI is master
+ */
+#define DWC_SSI_CTRLR0_KEEMBAY_MST	BIT(31)
+
+/* Bit fields in CTRLR1 */
+#define SPI_NDF_MASK			GENMASK(15, 0)
+
 /* Bit fields in SR, 7 bits */
 #define SR_MASK				0x7f		/* cover 7 bits */
 #define SR_BUSY				(1 << 0)
@@ -80,8 +104,12 @@
 #define SPI_DMA_RDMAE			(1 << 0)
 #define SPI_DMA_TDMAE			(1 << 1)
 
-/* TX RX interrupt level threshold, max can be 256 */
-#define SPI_INT_THRESHOLD		32
+#define SPI_WAIT_RETRIES		5
+#define SPI_BUF_SIZE \
+	(sizeof_field(struct spi_mem_op, cmd.opcode) + \
+	 sizeof_field(struct spi_mem_op, addr.val) + 256)
+#define SPI_GET_BYTE(_val, _idx) \
+	((_val) >> (BITS_PER_BYTE * (_idx)) & 0xff)
 
 enum dw_ssi_type {
 	SSI_MOTO_SPI = 0,
@@ -89,9 +117,22 @@
 	SSI_NS_MICROWIRE,
 };
 
+/* DW SPI capabilities */
+#define DW_SPI_CAP_CS_OVERRIDE		BIT(0)
+#define DW_SPI_CAP_KEEMBAY_MST		BIT(1)
+#define DW_SPI_CAP_DWC_SSI		BIT(2)
+
+/* Slave spi_transfer/spi_mem_op related */
+struct dw_spi_cfg {
+	u8 tmode;
+	u8 dfs;
+	u32 ndf;
+	u32 freq;
+};
+
 struct dw_spi;
 struct dw_spi_dma_ops {
-	int (*dma_init)(struct dw_spi *dws);
+	int (*dma_init)(struct device *dev, struct dw_spi *dws);
 	void (*dma_exit)(struct dw_spi *dws);
 	int (*dma_setup)(struct dw_spi *dws, struct spi_transfer *xfer);
 	bool (*can_dma)(struct spi_controller *master, struct spi_device *spi,
@@ -102,47 +143,51 @@
 
 struct dw_spi {
 	struct spi_controller	*master;
-	enum dw_ssi_type	type;
 
 	void __iomem		*regs;
 	unsigned long		paddr;
 	int			irq;
 	u32			fifo_len;	/* depth of the FIFO buffer */
+	u32			max_mem_freq;	/* max mem-ops bus freq */
 	u32			max_freq;	/* max bus freq supported */
 
-	int			cs_override;
+	u32			caps;		/* DW SPI capabilities */
+
 	u32			reg_io_width;	/* DR I/O width in bytes */
 	u16			bus_num;
 	u16			num_cs;		/* supported slave numbers */
 	void (*set_cs)(struct spi_device *spi, bool enable);
 
 	/* Current message transfer state info */
-	size_t			len;
 	void			*tx;
-	void			*tx_end;
-	spinlock_t		buf_lock;
+	unsigned int		tx_len;
 	void			*rx;
-	void			*rx_end;
+	unsigned int		rx_len;
+	u8			buf[SPI_BUF_SIZE];
 	int			dma_mapped;
 	u8			n_bytes;	/* current is a 1/2 bytes op */
-	u32			dma_width;
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
 	u32			current_freq;	/* frequency in hz */
+	u32			cur_rx_sample_dly;
+	u32			def_rx_sample_dly_ns;
+
+	/* Custom memory operations */
+	struct spi_controller_mem_ops mem_ops;
 
 	/* DMA info */
-	int			dma_inited;
 	struct dma_chan		*txchan;
+	u32			txburst;
 	struct dma_chan		*rxchan;
+	u32			rxburst;
+	u32			dma_sg_burst;
 	unsigned long		dma_chan_busy;
 	dma_addr_t		dma_addr; /* phy address of the Data register */
 	const struct dw_spi_dma_ops *dma_ops;
-	void			*dma_tx;
-	void			*dma_rx;
+	struct completion	dma_completion;
 
-	/* Bus interface info */
-	void			*priv;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs;
+	struct debugfs_regset32 regset;
 #endif
 };
 
@@ -151,29 +196,19 @@
 	return __raw_readl(dws->regs + offset);
 }
 
-static inline u16 dw_readw(struct dw_spi *dws, u32 offset)
-{
-	return __raw_readw(dws->regs + offset);
-}
-
 static inline void dw_writel(struct dw_spi *dws, u32 offset, u32 val)
 {
 	__raw_writel(val, dws->regs + offset);
 }
 
-static inline void dw_writew(struct dw_spi *dws, u32 offset, u16 val)
-{
-	__raw_writew(val, dws->regs + offset);
-}
-
 static inline u32 dw_read_io_reg(struct dw_spi *dws, u32 offset)
 {
 	switch (dws->reg_io_width) {
 	case 2:
-		return dw_readw(dws, offset);
+		return readw_relaxed(dws->regs + offset);
 	case 4:
 	default:
-		return dw_readl(dws, offset);
+		return readl_relaxed(dws->regs + offset);
 	}
 }
 
@@ -181,11 +216,11 @@
 {
 	switch (dws->reg_io_width) {
 	case 2:
-		dw_writew(dws, offset, val);
+		writew_relaxed(val, dws->regs + offset);
 		break;
 	case 4:
 	default:
-		dw_writel(dws, offset, val);
+		writel_relaxed(val, dws->regs + offset);
 		break;
 	}
 }
@@ -219,14 +254,16 @@
 }
 
 /*
- * This does disable the SPI controller, interrupts, and re-enable the
- * controller back. Transmit and receive FIFO buffers are cleared when the
- * device is disabled.
+ * This disables the SPI controller, interrupts, clears the interrupts status
+ * and CS, then re-enables the controller back. Transmit and receive FIFO
+ * buffers are cleared when the device is disabled.
  */
 static inline void spi_reset_chip(struct dw_spi *dws)
 {
 	spi_enable_chip(dws, 0);
 	spi_mask_intr(dws, 0xff);
+	dw_readl(dws, DW_SPI_ICR);
+	dw_writel(dws, DW_SPI_SER, 0);
 	spi_enable_chip(dws, 1);
 }
 
@@ -236,24 +273,25 @@
 	spi_set_clk(dws, 0);
 }
 
-/*
- * Each SPI slave device to work with dw_api controller should
- * has such a structure claiming its working mode (poll or PIO/DMA),
- * which can be save in the "controller_data" member of the
- * struct spi_device.
- */
-struct dw_spi_chip {
-	u8 poll_mode;	/* 1 for controller polling mode */
-	u8 type;	/* SPI/SSP/MicroWire */
-	void (*cs_control)(u32 command);
-};
-
 extern void dw_spi_set_cs(struct spi_device *spi, bool enable);
+extern void dw_spi_update_config(struct dw_spi *dws, struct spi_device *spi,
+				 struct dw_spi_cfg *cfg);
+extern int dw_spi_check_status(struct dw_spi *dws, bool raw);
 extern int dw_spi_add_host(struct device *dev, struct dw_spi *dws);
 extern void dw_spi_remove_host(struct dw_spi *dws);
 extern int dw_spi_suspend_host(struct dw_spi *dws);
 extern int dw_spi_resume_host(struct dw_spi *dws);
 
-/* platform related setup */
-extern int dw_spi_mid_init(struct dw_spi *dws); /* Intel MID platforms */
+#ifdef CONFIG_SPI_DW_DMA
+
+extern void dw_spi_dma_setup_mfld(struct dw_spi *dws);
+extern void dw_spi_dma_setup_generic(struct dw_spi *dws);
+
+#else
+
+static inline void dw_spi_dma_setup_mfld(struct dw_spi *dws) {}
+static inline void dw_spi_dma_setup_generic(struct dw_spi *dws) {}
+
+#endif /* !CONFIG_SPI_DW_DMA */
+
 #endif /* DW_SPI_HEADER_H */
diff --git a/drivers/spi/spi-efm32.c b/drivers/spi/spi-efm32.c
index 64d4c44..ea6e4a7 100644
--- a/drivers/spi/spi-efm32.c
+++ b/drivers/spi/spi-efm32.c
@@ -6,14 +6,13 @@
 #include <linux/io.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
 #include <linux/platform_data/efm32-spi.h>
+#include <linux/of.h>
 
 #define DRIVER_NAME "efm32-spi"
 
@@ -82,9 +81,6 @@
 	const u8 *tx_buf;
 	u8 *rx_buf;
 	unsigned tx_len, rx_len;
-
-	/* chip selects */
-	unsigned csgpio[];
 };
 
 #define ddata_to_dev(ddata)	(&(ddata->bitbang.master->dev))
@@ -102,14 +98,6 @@
 	return readl_relaxed(ddata->base + offset);
 }
 
-static void efm32_spi_chipselect(struct spi_device *spi, int is_on)
-{
-	struct efm32_spi_ddata *ddata = spi_master_get_devdata(spi->master);
-	int value = !(spi->mode & SPI_CS_HIGH) == !(is_on == BITBANG_CS_ACTIVE);
-
-	gpio_set_value(ddata->csgpio[spi->chip_select], value);
-}
-
 static int efm32_spi_setup_transfer(struct spi_device *spi,
 		struct spi_transfer *t)
 {
@@ -320,17 +308,11 @@
 	int ret;
 	struct spi_master *master;
 	struct device_node *np = pdev->dev.of_node;
-	int num_cs, i;
 
 	if (!np)
 		return -EINVAL;
 
-	num_cs = of_gpio_named_count(np, "cs-gpios");
-	if (num_cs < 0)
-		return num_cs;
-
-	master = spi_alloc_master(&pdev->dev,
-			sizeof(*ddata) + num_cs * sizeof(unsigned));
+	master = spi_alloc_master(&pdev->dev, sizeof(*ddata));
 	if (!master) {
 		dev_dbg(&pdev->dev,
 				"failed to allocate spi master controller\n");
@@ -340,14 +322,13 @@
 
 	master->dev.of_node = pdev->dev.of_node;
 
-	master->num_chipselect = num_cs;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+	master->use_gpio_descriptors = true;
 
 	ddata = spi_master_get_devdata(master);
 
 	ddata->bitbang.master = master;
-	ddata->bitbang.chipselect = efm32_spi_chipselect;
 	ddata->bitbang.setup_transfer = efm32_spi_setup_transfer;
 	ddata->bitbang.txrx_bufs = efm32_spi_txrx_bufs;
 
@@ -361,25 +342,6 @@
 		goto err;
 	}
 
-	for (i = 0; i < num_cs; ++i) {
-		ret = of_get_named_gpio(np, "cs-gpios", i);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "failed to get csgpio#%u (%d)\n",
-					i, ret);
-			goto err;
-		}
-		ddata->csgpio[i] = ret;
-		dev_dbg(&pdev->dev, "csgpio#%u = %u\n", i, ddata->csgpio[i]);
-		ret = devm_gpio_request_one(&pdev->dev, ddata->csgpio[i],
-				GPIOF_OUT_INIT_LOW, DRIVER_NAME);
-		if (ret < 0) {
-			dev_err(&pdev->dev,
-					"failed to configure csgpio#%u (%d)\n",
-					i, ret);
-			goto err;
-		}
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		ret = -ENODEV;
diff --git a/drivers/spi/spi-ep93xx.c b/drivers/spi/spi-ep93xx.c
index 4e1ccd4..aa67655 100644
--- a/drivers/spi/spi-ep93xx.c
+++ b/drivers/spi/spi-ep93xx.c
@@ -10,7 +10,7 @@
  *
  * For more information about the SPI controller see documentation on Cirrus
  * Logic web site:
- *     http://www.cirrus.com/en/pubs/manual/EP93xx_Users_Guide_UM1.pdf
+ *     https://www.cirrus.com/en/pubs/manual/EP93xx_Users_Guide_UM1.pdf
  */
 
 #include <linux/io.h>
@@ -31,7 +31,8 @@
 #include <linux/platform_data/spi-ep93xx.h>
 
 #define SSPCR0			0x0000
-#define SSPCR0_MODE_SHIFT	6
+#define SSPCR0_SPO		BIT(6)
+#define SSPCR0_SPH		BIT(7)
 #define SSPCR0_SCR_SHIFT	8
 
 #define SSPCR1			0x0004
@@ -159,7 +160,10 @@
 		return err;
 
 	cr0 = div_scr << SSPCR0_SCR_SHIFT;
-	cr0 |= (spi->mode & (SPI_CPHA | SPI_CPOL)) << SSPCR0_MODE_SHIFT;
+	if (spi->mode & SPI_CPOL)
+		cr0 |= SSPCR0_SPO;
+	if (spi->mode & SPI_CPHA)
+		cr0 |= SSPCR0_SPH;
 	cr0 |= dss;
 
 	dev_dbg(&master->dev, "setup: mode %d, cpsr %d, scr %d, dss %d\n",
@@ -210,7 +214,7 @@
 
 /**
  * ep93xx_spi_read_write() - perform next RX/TX transfer
- * @espi: ep93xx SPI controller struct
+ * @master: SPI master
  *
  * This function transfers next bytes (or half-words) to/from RX/TX FIFOs. If
  * called several times, the whole transfer will be completed. Returns
diff --git a/drivers/spi/spi-falcon.c b/drivers/spi/spi-falcon.c
index 00f46c8..d3336a6 100644
--- a/drivers/spi/spi-falcon.c
+++ b/drivers/spi/spi-falcon.c
@@ -377,7 +377,7 @@
 
 		m->actual_length += t->len;
 
-		WARN_ON(t->delay_usecs || t->cs_change);
+		WARN_ON(t->delay_usecs || t->delay.value || t->cs_change);
 		spi_flags = 0;
 	}
 
diff --git a/drivers/spi/spi-fsi.c b/drivers/spi/spi-fsi.c
new file mode 100644
index 0000000..3920cd3
--- /dev/null
+++ b/drivers/spi/spi-fsi.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (C) IBM Corporation 2020
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/fsi.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/spi/spi.h>
+
+#define FSI_ENGID_SPI			0x23
+#define FSI_MBOX_ROOT_CTRL_8		0x2860
+#define  FSI_MBOX_ROOT_CTRL_8_SPI_MUX	 0xf0000000
+
+#define FSI2SPI_DATA0			0x00
+#define FSI2SPI_DATA1			0x04
+#define FSI2SPI_CMD			0x08
+#define  FSI2SPI_CMD_WRITE		 BIT(31)
+#define FSI2SPI_RESET			0x18
+#define FSI2SPI_STATUS			0x1c
+#define  FSI2SPI_STATUS_ANY_ERROR	 BIT(31)
+#define FSI2SPI_IRQ			0x20
+
+#define SPI_FSI_BASE			0x70000
+#define SPI_FSI_INIT_TIMEOUT_MS		1000
+#define SPI_FSI_MAX_XFR_SIZE		2048
+#define SPI_FSI_MAX_XFR_SIZE_RESTRICTED	32
+
+#define SPI_FSI_ERROR			0x0
+#define SPI_FSI_COUNTER_CFG		0x1
+#define  SPI_FSI_COUNTER_CFG_LOOPS(x)	 (((u64)(x) & 0xffULL) << 32)
+#define  SPI_FSI_COUNTER_CFG_N2_RX	 BIT_ULL(8)
+#define  SPI_FSI_COUNTER_CFG_N2_TX	 BIT_ULL(9)
+#define  SPI_FSI_COUNTER_CFG_N2_IMPLICIT BIT_ULL(10)
+#define  SPI_FSI_COUNTER_CFG_N2_RELOAD	 BIT_ULL(11)
+#define SPI_FSI_CFG1			0x2
+#define SPI_FSI_CLOCK_CFG		0x3
+#define  SPI_FSI_CLOCK_CFG_MM_ENABLE	 BIT_ULL(32)
+#define  SPI_FSI_CLOCK_CFG_ECC_DISABLE	 (BIT_ULL(35) | BIT_ULL(33))
+#define  SPI_FSI_CLOCK_CFG_RESET1	 (BIT_ULL(36) | BIT_ULL(38))
+#define  SPI_FSI_CLOCK_CFG_RESET2	 (BIT_ULL(37) | BIT_ULL(39))
+#define  SPI_FSI_CLOCK_CFG_MODE		 (BIT_ULL(41) | BIT_ULL(42))
+#define  SPI_FSI_CLOCK_CFG_SCK_RECV_DEL	 GENMASK_ULL(51, 44)
+#define   SPI_FSI_CLOCK_CFG_SCK_NO_DEL	  BIT_ULL(51)
+#define  SPI_FSI_CLOCK_CFG_SCK_DIV	 GENMASK_ULL(63, 52)
+#define SPI_FSI_MMAP			0x4
+#define SPI_FSI_DATA_TX			0x5
+#define SPI_FSI_DATA_RX			0x6
+#define SPI_FSI_SEQUENCE		0x7
+#define  SPI_FSI_SEQUENCE_STOP		 0x00
+#define  SPI_FSI_SEQUENCE_SEL_SLAVE(x)	 (0x10 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_SHIFT_OUT(x)	 (0x30 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_SHIFT_IN(x)	 (0x40 | ((x) & 0xf))
+#define  SPI_FSI_SEQUENCE_COPY_DATA_TX	 0xc0
+#define  SPI_FSI_SEQUENCE_BRANCH(x)	 (0xe0 | ((x) & 0xf))
+#define SPI_FSI_STATUS			0x8
+#define  SPI_FSI_STATUS_ERROR		 \
+	(GENMASK_ULL(31, 21) | GENMASK_ULL(15, 12))
+#define  SPI_FSI_STATUS_SEQ_STATE	 GENMASK_ULL(55, 48)
+#define   SPI_FSI_STATUS_SEQ_STATE_IDLE	  BIT_ULL(48)
+#define  SPI_FSI_STATUS_TDR_UNDERRUN	 BIT_ULL(57)
+#define  SPI_FSI_STATUS_TDR_OVERRUN	 BIT_ULL(58)
+#define  SPI_FSI_STATUS_TDR_FULL	 BIT_ULL(59)
+#define  SPI_FSI_STATUS_RDR_UNDERRUN	 BIT_ULL(61)
+#define  SPI_FSI_STATUS_RDR_OVERRUN	 BIT_ULL(62)
+#define  SPI_FSI_STATUS_RDR_FULL	 BIT_ULL(63)
+#define  SPI_FSI_STATUS_ANY_ERROR	 \
+	(SPI_FSI_STATUS_ERROR | \
+	 SPI_FSI_STATUS_TDR_OVERRUN | SPI_FSI_STATUS_RDR_UNDERRUN | \
+	 SPI_FSI_STATUS_RDR_OVERRUN)
+#define SPI_FSI_PORT_CTRL		0x9
+
+struct fsi_spi {
+	struct device *dev;	/* SPI controller device */
+	struct fsi_device *fsi;	/* FSI2SPI CFAM engine device */
+	u32 base;
+	size_t max_xfr_size;
+	bool restricted;
+};
+
+struct fsi_spi_sequence {
+	int bit;
+	u64 data;
+};
+
+static int fsi_spi_check_mux(struct fsi_device *fsi, struct device *dev)
+{
+	int rc;
+	u32 root_ctrl_8;
+	__be32 root_ctrl_8_be;
+
+	rc = fsi_slave_read(fsi->slave, FSI_MBOX_ROOT_CTRL_8, &root_ctrl_8_be,
+			    sizeof(root_ctrl_8_be));
+	if (rc)
+		return rc;
+
+	root_ctrl_8 = be32_to_cpu(root_ctrl_8_be);
+	dev_dbg(dev, "Root control register 8: %08x\n", root_ctrl_8);
+	if ((root_ctrl_8 & FSI_MBOX_ROOT_CTRL_8_SPI_MUX) ==
+	     FSI_MBOX_ROOT_CTRL_8_SPI_MUX)
+		return 0;
+
+	return -ENOLINK;
+}
+
+static int fsi_spi_check_status(struct fsi_spi *ctx)
+{
+	int rc;
+	u32 sts;
+	__be32 sts_be;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_STATUS, &sts_be,
+			     sizeof(sts_be));
+	if (rc)
+		return rc;
+
+	sts = be32_to_cpu(sts_be);
+	if (sts & FSI2SPI_STATUS_ANY_ERROR) {
+		dev_err(ctx->dev, "Error with FSI2SPI interface: %08x.\n", sts);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int fsi_spi_read_reg(struct fsi_spi *ctx, u32 offset, u64 *value)
+{
+	int rc;
+	__be32 cmd_be;
+	__be32 data_be;
+	u32 cmd = offset + ctx->base;
+
+	*value = 0ULL;
+
+	if (cmd & FSI2SPI_CMD_WRITE)
+		return -EINVAL;
+
+	cmd_be = cpu_to_be32(cmd);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+	if (rc)
+		return rc;
+
+	rc = fsi_spi_check_status(ctx);
+	if (rc)
+		return rc;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA0, &data_be,
+			     sizeof(data_be));
+	if (rc)
+		return rc;
+
+	*value |= (u64)be32_to_cpu(data_be) << 32;
+
+	rc = fsi_device_read(ctx->fsi, FSI2SPI_DATA1, &data_be,
+			     sizeof(data_be));
+	if (rc)
+		return rc;
+
+	*value |= (u64)be32_to_cpu(data_be);
+	dev_dbg(ctx->dev, "Read %02x[%016llx].\n", offset, *value);
+
+	return 0;
+}
+
+static int fsi_spi_write_reg(struct fsi_spi *ctx, u32 offset, u64 value)
+{
+	int rc;
+	__be32 cmd_be;
+	__be32 data_be;
+	u32 cmd = offset + ctx->base;
+
+	if (cmd & FSI2SPI_CMD_WRITE)
+		return -EINVAL;
+
+	dev_dbg(ctx->dev, "Write %02x[%016llx].\n", offset, value);
+
+	data_be = cpu_to_be32(upper_32_bits(value));
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA0, &data_be,
+			      sizeof(data_be));
+	if (rc)
+		return rc;
+
+	data_be = cpu_to_be32(lower_32_bits(value));
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_DATA1, &data_be,
+			      sizeof(data_be));
+	if (rc)
+		return rc;
+
+	cmd_be = cpu_to_be32(cmd | FSI2SPI_CMD_WRITE);
+	rc = fsi_device_write(ctx->fsi, FSI2SPI_CMD, &cmd_be, sizeof(cmd_be));
+	if (rc)
+		return rc;
+
+	return fsi_spi_check_status(ctx);
+}
+
+static int fsi_spi_data_in(u64 in, u8 *rx, int len)
+{
+	int i;
+	int num_bytes = min(len, 8);
+
+	for (i = 0; i < num_bytes; ++i)
+		rx[i] = (u8)(in >> (8 * ((num_bytes - 1) - i)));
+
+	return num_bytes;
+}
+
+static int fsi_spi_data_out(u64 *out, const u8 *tx, int len)
+{
+	int i;
+	int num_bytes = min(len, 8);
+	u8 *out_bytes = (u8 *)out;
+
+	/* Unused bytes of the tx data should be 0. */
+	*out = 0ULL;
+
+	for (i = 0; i < num_bytes; ++i)
+		out_bytes[8 - (i + 1)] = tx[i];
+
+	return num_bytes;
+}
+
+static int fsi_spi_reset(struct fsi_spi *ctx)
+{
+	int rc;
+
+	dev_dbg(ctx->dev, "Resetting SPI controller.\n");
+
+	rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+			       SPI_FSI_CLOCK_CFG_RESET1);
+	if (rc)
+		return rc;
+
+	rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+			       SPI_FSI_CLOCK_CFG_RESET2);
+	if (rc)
+		return rc;
+
+	return fsi_spi_write_reg(ctx, SPI_FSI_STATUS, 0ULL);
+}
+
+static int fsi_spi_sequence_add(struct fsi_spi_sequence *seq, u8 val)
+{
+	/*
+	 * Add the next byte of instruction to the 8-byte sequence register.
+	 * Then decrement the counter so that the next instruction will go in
+	 * the right place. Return the index of the slot we just filled in the
+	 * sequence register.
+	 */
+	seq->data |= (u64)val << seq->bit;
+	seq->bit -= 8;
+
+	return ((64 - seq->bit) / 8) - 2;
+}
+
+static void fsi_spi_sequence_init(struct fsi_spi_sequence *seq)
+{
+	seq->bit = 56;
+	seq->data = 0ULL;
+}
+
+static int fsi_spi_sequence_transfer(struct fsi_spi *ctx,
+				     struct fsi_spi_sequence *seq,
+				     struct spi_transfer *transfer)
+{
+	bool docfg = false;
+	int loops;
+	int idx;
+	int rc;
+	u8 val = 0;
+	u8 len = min(transfer->len, 8U);
+	u8 rem = transfer->len % len;
+	u64 cfg = 0ULL;
+
+	loops = transfer->len / len;
+
+	if (transfer->tx_buf) {
+		val = SPI_FSI_SEQUENCE_SHIFT_OUT(len);
+		idx = fsi_spi_sequence_add(seq, val);
+
+		if (rem)
+			rem = SPI_FSI_SEQUENCE_SHIFT_OUT(rem);
+	} else if (transfer->rx_buf) {
+		val = SPI_FSI_SEQUENCE_SHIFT_IN(len);
+		idx = fsi_spi_sequence_add(seq, val);
+
+		if (rem)
+			rem = SPI_FSI_SEQUENCE_SHIFT_IN(rem);
+	} else {
+		return -EINVAL;
+	}
+
+	if (ctx->restricted) {
+		const int eidx = rem ? 5 : 6;
+
+		while (loops > 1 && idx <= eidx) {
+			idx = fsi_spi_sequence_add(seq, val);
+			loops--;
+			docfg = true;
+		}
+
+		if (loops > 1) {
+			dev_warn(ctx->dev, "No sequencer slots; aborting.\n");
+			return -EINVAL;
+		}
+	}
+
+	if (loops > 1) {
+		fsi_spi_sequence_add(seq, SPI_FSI_SEQUENCE_BRANCH(idx));
+		docfg = true;
+	}
+
+	if (docfg) {
+		cfg = SPI_FSI_COUNTER_CFG_LOOPS(loops - 1);
+		if (transfer->rx_buf)
+			cfg |= SPI_FSI_COUNTER_CFG_N2_RX |
+				SPI_FSI_COUNTER_CFG_N2_TX |
+				SPI_FSI_COUNTER_CFG_N2_IMPLICIT |
+				SPI_FSI_COUNTER_CFG_N2_RELOAD;
+
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, cfg);
+		if (rc)
+			return rc;
+	} else {
+		fsi_spi_write_reg(ctx, SPI_FSI_COUNTER_CFG, 0ULL);
+	}
+
+	if (rem)
+		fsi_spi_sequence_add(seq, rem);
+
+	return 0;
+}
+
+static int fsi_spi_transfer_data(struct fsi_spi *ctx,
+				 struct spi_transfer *transfer)
+{
+	int rc = 0;
+	u64 status = 0ULL;
+	u64 cfg = 0ULL;
+
+	if (transfer->tx_buf) {
+		int nb;
+		int sent = 0;
+		u64 out = 0ULL;
+		const u8 *tx = transfer->tx_buf;
+
+		while (transfer->len > sent) {
+			nb = fsi_spi_data_out(&out, &tx[sent],
+					      (int)transfer->len - sent);
+
+			rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, out);
+			if (rc)
+				return rc;
+
+			do {
+				rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+						      &status);
+				if (rc)
+					return rc;
+
+				if (status & SPI_FSI_STATUS_ANY_ERROR) {
+					rc = fsi_spi_reset(ctx);
+					if (rc)
+						return rc;
+
+					return -EREMOTEIO;
+				}
+			} while (status & SPI_FSI_STATUS_TDR_FULL);
+
+			sent += nb;
+		}
+	} else if (transfer->rx_buf) {
+		int recv = 0;
+		u64 in = 0ULL;
+		u8 *rx = transfer->rx_buf;
+
+		rc = fsi_spi_read_reg(ctx, SPI_FSI_COUNTER_CFG, &cfg);
+		if (rc)
+			return rc;
+
+		if (cfg & SPI_FSI_COUNTER_CFG_N2_IMPLICIT) {
+			rc = fsi_spi_write_reg(ctx, SPI_FSI_DATA_TX, 0);
+			if (rc)
+				return rc;
+		}
+
+		while (transfer->len > recv) {
+			do {
+				rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS,
+						      &status);
+				if (rc)
+					return rc;
+
+				if (status & SPI_FSI_STATUS_ANY_ERROR) {
+					rc = fsi_spi_reset(ctx);
+					if (rc)
+						return rc;
+
+					return -EREMOTEIO;
+				}
+			} while (!(status & SPI_FSI_STATUS_RDR_FULL));
+
+			rc = fsi_spi_read_reg(ctx, SPI_FSI_DATA_RX, &in);
+			if (rc)
+				return rc;
+
+			recv += fsi_spi_data_in(in, &rx[recv],
+						(int)transfer->len - recv);
+		}
+	}
+
+	return 0;
+}
+
+static int fsi_spi_transfer_init(struct fsi_spi *ctx)
+{
+	int rc;
+	bool reset = false;
+	unsigned long end;
+	u64 seq_state;
+	u64 clock_cfg = 0ULL;
+	u64 status = 0ULL;
+	u64 wanted_clock_cfg = SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+		SPI_FSI_CLOCK_CFG_SCK_NO_DEL |
+		FIELD_PREP(SPI_FSI_CLOCK_CFG_SCK_DIV, 19);
+
+	end = jiffies + msecs_to_jiffies(SPI_FSI_INIT_TIMEOUT_MS);
+	do {
+		if (time_after(jiffies, end))
+			return -ETIMEDOUT;
+
+		rc = fsi_spi_read_reg(ctx, SPI_FSI_STATUS, &status);
+		if (rc)
+			return rc;
+
+		seq_state = status & SPI_FSI_STATUS_SEQ_STATE;
+
+		if (status & (SPI_FSI_STATUS_ANY_ERROR |
+			      SPI_FSI_STATUS_TDR_FULL |
+			      SPI_FSI_STATUS_RDR_FULL)) {
+			if (reset)
+				return -EIO;
+
+			rc = fsi_spi_reset(ctx);
+			if (rc)
+				return rc;
+
+			reset = true;
+			continue;
+		}
+	} while (seq_state && (seq_state != SPI_FSI_STATUS_SEQ_STATE_IDLE));
+
+	rc = fsi_spi_read_reg(ctx, SPI_FSI_CLOCK_CFG, &clock_cfg);
+	if (rc)
+		return rc;
+
+	if ((clock_cfg & (SPI_FSI_CLOCK_CFG_MM_ENABLE |
+			  SPI_FSI_CLOCK_CFG_ECC_DISABLE |
+			  SPI_FSI_CLOCK_CFG_MODE |
+			  SPI_FSI_CLOCK_CFG_SCK_RECV_DEL |
+			  SPI_FSI_CLOCK_CFG_SCK_DIV)) != wanted_clock_cfg)
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_CLOCK_CFG,
+				       wanted_clock_cfg);
+
+	return rc;
+}
+
+static int fsi_spi_transfer_one_message(struct spi_controller *ctlr,
+					struct spi_message *mesg)
+{
+	int rc;
+	u8 seq_slave = SPI_FSI_SEQUENCE_SEL_SLAVE(mesg->spi->chip_select + 1);
+	struct spi_transfer *transfer;
+	struct fsi_spi *ctx = spi_controller_get_devdata(ctlr);
+
+	rc = fsi_spi_check_mux(ctx->fsi, ctx->dev);
+	if (rc)
+		goto error;
+
+	list_for_each_entry(transfer, &mesg->transfers, transfer_list) {
+		struct fsi_spi_sequence seq;
+		struct spi_transfer *next = NULL;
+
+		/* Sequencer must do shift out (tx) first. */
+		if (!transfer->tx_buf ||
+		    transfer->len > (ctx->max_xfr_size + 8)) {
+			rc = -EINVAL;
+			goto error;
+		}
+
+		dev_dbg(ctx->dev, "Start tx of %d bytes.\n", transfer->len);
+
+		rc = fsi_spi_transfer_init(ctx);
+		if (rc < 0)
+			goto error;
+
+		fsi_spi_sequence_init(&seq);
+		fsi_spi_sequence_add(&seq, seq_slave);
+
+		rc = fsi_spi_sequence_transfer(ctx, &seq, transfer);
+		if (rc)
+			goto error;
+
+		if (!list_is_last(&transfer->transfer_list,
+				  &mesg->transfers)) {
+			next = list_next_entry(transfer, transfer_list);
+
+			/* Sequencer can only do shift in (rx) after tx. */
+			if (next->rx_buf) {
+				if (next->len > ctx->max_xfr_size) {
+					rc = -EINVAL;
+					goto error;
+				}
+
+				dev_dbg(ctx->dev, "Sequence rx of %d bytes.\n",
+					next->len);
+
+				rc = fsi_spi_sequence_transfer(ctx, &seq,
+							       next);
+				if (rc)
+					goto error;
+			} else {
+				next = NULL;
+			}
+		}
+
+		fsi_spi_sequence_add(&seq, SPI_FSI_SEQUENCE_SEL_SLAVE(0));
+
+		rc = fsi_spi_write_reg(ctx, SPI_FSI_SEQUENCE, seq.data);
+		if (rc)
+			goto error;
+
+		rc = fsi_spi_transfer_data(ctx, transfer);
+		if (rc)
+			goto error;
+
+		if (next) {
+			rc = fsi_spi_transfer_data(ctx, next);
+			if (rc)
+				goto error;
+
+			transfer = next;
+		}
+	}
+
+error:
+	mesg->status = rc;
+	spi_finalize_current_message(ctlr);
+
+	return rc;
+}
+
+static size_t fsi_spi_max_transfer_size(struct spi_device *spi)
+{
+	struct fsi_spi *ctx = spi_controller_get_devdata(spi->controller);
+
+	return ctx->max_xfr_size;
+}
+
+static int fsi_spi_probe(struct device *dev)
+{
+	int rc;
+	struct device_node *np;
+	int num_controllers_registered = 0;
+	struct fsi_device *fsi = to_fsi_dev(dev);
+
+	rc = fsi_spi_check_mux(fsi, dev);
+	if (rc)
+		return -ENODEV;
+
+	for_each_available_child_of_node(dev->of_node, np) {
+		u32 base;
+		struct fsi_spi *ctx;
+		struct spi_controller *ctlr;
+
+		if (of_property_read_u32(np, "reg", &base))
+			continue;
+
+		ctlr = spi_alloc_master(dev, sizeof(*ctx));
+		if (!ctlr)
+			break;
+
+		ctlr->dev.of_node = np;
+		ctlr->num_chipselect = of_get_available_child_count(np) ?: 1;
+		ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+		ctlr->max_transfer_size = fsi_spi_max_transfer_size;
+		ctlr->transfer_one_message = fsi_spi_transfer_one_message;
+
+		ctx = spi_controller_get_devdata(ctlr);
+		ctx->dev = &ctlr->dev;
+		ctx->fsi = fsi;
+		ctx->base = base + SPI_FSI_BASE;
+
+		if (of_device_is_compatible(np, "ibm,fsi2spi-restricted")) {
+			ctx->restricted = true;
+			ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE_RESTRICTED;
+		} else {
+			ctx->restricted = false;
+			ctx->max_xfr_size = SPI_FSI_MAX_XFR_SIZE;
+		}
+
+		rc = devm_spi_register_controller(dev, ctlr);
+		if (rc)
+			spi_controller_put(ctlr);
+		else
+			num_controllers_registered++;
+	}
+
+	if (!num_controllers_registered)
+		return -ENODEV;
+
+	return 0;
+}
+
+static const struct fsi_device_id fsi_spi_ids[] = {
+	{ FSI_ENGID_SPI, FSI_VERSION_ANY },
+	{ }
+};
+MODULE_DEVICE_TABLE(fsi, fsi_spi_ids);
+
+static struct fsi_driver fsi_spi_driver = {
+	.id_table = fsi_spi_ids,
+	.drv = {
+		.name = "spi-fsi",
+		.bus = &fsi_bus_type,
+		.probe = fsi_spi_probe,
+	},
+};
+module_fsi_driver(fsi_spi_driver);
+
+MODULE_AUTHOR("Eddie James <eajames@linux.ibm.com>");
+MODULE_DESCRIPTION("FSI attached SPI controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index 858f054..ee90588 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -226,7 +226,7 @@
 	case 1:
 		kfree(fsl_dummy_rx);
 		fsl_dummy_rx = NULL;
-		/* fall through */
+		fallthrough;
 	default:
 		fsl_dummy_rx_refcnt--;
 		break;
@@ -294,7 +294,7 @@
 		switch (mspi->subblock) {
 		default:
 			dev_warn(dev, "cell-index unspecified, assuming SPI1\n");
-			/* fall through */
+			fallthrough;
 		case 0:
 			mspi->subblock = QE_CR_SUBBLOCK_SPI1;
 			break;
@@ -392,7 +392,8 @@
 	dma_unmap_single(dev, mspi->dma_dummy_rx, SPI_MRBLR, DMA_FROM_DEVICE);
 	dma_unmap_single(dev, mspi->dma_dummy_tx, PAGE_SIZE, DMA_TO_DEVICE);
 	cpm_muram_free(cpm_muram_offset(mspi->tx_bd));
-	cpm_muram_free(cpm_muram_offset(mspi->pram));
+	if (!(mspi->flags & SPI_CPM1))
+		cpm_muram_free(cpm_muram_offset(mspi->pram));
 	fsl_spi_free_dummy_rx();
 }
 EXPORT_SYMBOL_GPL(fsl_spi_cpm_free);
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 3e02006..fd004c9 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -21,16 +21,9 @@
 
 #define DRIVER_NAME			"fsl-dspi"
 
-#ifdef CONFIG_M5441x
-#define DSPI_FIFO_SIZE			16
-#else
-#define DSPI_FIFO_SIZE			4
-#endif
-#define DSPI_DMA_BUFSIZE		(DSPI_FIFO_SIZE * 1024)
-
 #define SPI_MCR				0x00
 #define SPI_MCR_MASTER			BIT(31)
-#define SPI_MCR_PCSIS			(0x3F << 16)
+#define SPI_MCR_PCSIS(x)		((x) << 16)
 #define SPI_MCR_CLR_TXF			BIT(11)
 #define SPI_MCR_CLR_RXF			BIT(10)
 #define SPI_MCR_XSPI			BIT(3)
@@ -60,7 +53,6 @@
 
 #define SPI_SR				0x2c
 #define SPI_SR_TCFQF			BIT(31)
-#define SPI_SR_EOQF			BIT(28)
 #define SPI_SR_TFUF			BIT(27)
 #define SPI_SR_TFFF			BIT(25)
 #define SPI_SR_CMDTCF			BIT(23)
@@ -69,7 +61,7 @@
 #define SPI_SR_TFIWF			BIT(18)
 #define SPI_SR_RFDF			BIT(17)
 #define SPI_SR_CMDFFF			BIT(16)
-#define SPI_SR_CLEAR			(SPI_SR_TCFQF | SPI_SR_EOQF | \
+#define SPI_SR_CLEAR			(SPI_SR_TCFQF | \
 					SPI_SR_TFUF | SPI_SR_TFFF | \
 					SPI_SR_CMDTCF | SPI_SR_SPEF | \
 					SPI_SR_RFOF | SPI_SR_TFIWF | \
@@ -82,7 +74,7 @@
 
 #define SPI_RSER			0x30
 #define SPI_RSER_TCFQE			BIT(31)
-#define SPI_RSER_EOQFE			BIT(28)
+#define SPI_RSER_CMDTCFE		BIT(23)
 
 #define SPI_PUSHR			0x34
 #define SPI_PUSHR_CMD_CONT		BIT(15)
@@ -113,54 +105,94 @@
 #define SPI_FRAME_BITS(bits)		SPI_CTAR_FMSZ((bits) - 1)
 #define SPI_FRAME_EBITS(bits)		SPI_CTARE_FMSZE(((bits) - 1) >> 4)
 
-/* Register offsets for regmap_pushr */
-#define PUSHR_CMD			0x0
-#define PUSHR_TX			0x2
-
 #define DMA_COMPLETION_TIMEOUT		msecs_to_jiffies(3000)
 
 struct chip_data {
 	u32			ctar_val;
-	u16			void_write_data;
 };
 
 enum dspi_trans_mode {
-	DSPI_EOQ_MODE = 0,
-	DSPI_TCFQ_MODE,
+	DSPI_XSPI_MODE,
 	DSPI_DMA_MODE,
 };
 
 struct fsl_dspi_devtype_data {
 	enum dspi_trans_mode	trans_mode;
 	u8			max_clock_factor;
-	bool			xspi_mode;
+	int			fifo_size;
 };
 
-static const struct fsl_dspi_devtype_data vf610_data = {
-	.trans_mode		= DSPI_DMA_MODE,
-	.max_clock_factor	= 2,
+enum {
+	LS1021A,
+	LS1012A,
+	LS1028A,
+	LS1043A,
+	LS1046A,
+	LS2080A,
+	LS2085A,
+	LX2160A,
+	MCF5441X,
+	VF610,
 };
 
-static const struct fsl_dspi_devtype_data ls1021a_v1_data = {
-	.trans_mode		= DSPI_TCFQ_MODE,
-	.max_clock_factor	= 8,
-	.xspi_mode		= true,
-};
-
-static const struct fsl_dspi_devtype_data ls2085a_data = {
-	.trans_mode		= DSPI_TCFQ_MODE,
-	.max_clock_factor	= 8,
-};
-
-static const struct fsl_dspi_devtype_data coldfire_data = {
-	.trans_mode		= DSPI_EOQ_MODE,
-	.max_clock_factor	= 8,
+static const struct fsl_dspi_devtype_data devtype_data[] = {
+	[VF610] = {
+		.trans_mode		= DSPI_DMA_MODE,
+		.max_clock_factor	= 2,
+		.fifo_size		= 4,
+	},
+	[LS1021A] = {
+		/* Has A-011218 DMA erratum */
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 4,
+	},
+	[LS1012A] = {
+		/* Has A-011218 DMA erratum */
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 16,
+	},
+	[LS1028A] = {
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 4,
+	},
+	[LS1043A] = {
+		/* Has A-011218 DMA erratum */
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 16,
+	},
+	[LS1046A] = {
+		/* Has A-011218 DMA erratum */
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 16,
+	},
+	[LS2080A] = {
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 4,
+	},
+	[LS2085A] = {
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 4,
+	},
+	[LX2160A] = {
+		.trans_mode		= DSPI_XSPI_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 4,
+	},
+	[MCF5441X] = {
+		.trans_mode		= DSPI_DMA_MODE,
+		.max_clock_factor	= 8,
+		.fifo_size		= 16,
+	},
 };
 
 struct fsl_dspi_dma {
-	/* Length of transfer in words of DSPI_FIFO_SIZE */
-	u32					curr_xfer_len;
-
 	u32					*tx_dma_buf;
 	struct dma_chan				*chan_tx;
 	dma_addr_t				tx_dma_phys;
@@ -186,38 +218,123 @@
 	struct spi_transfer			*cur_transfer;
 	struct spi_message			*cur_msg;
 	struct chip_data			*cur_chip;
+	size_t					progress;
 	size_t					len;
 	const void				*tx;
 	void					*rx;
-	void					*rx_end;
-	u16					void_write_data;
 	u16					tx_cmd;
-	u8					bits_per_word;
-	u8					bytes_per_word;
 	const struct fsl_dspi_devtype_data	*devtype_data;
 
 	struct completion			xfer_done;
 
 	struct fsl_dspi_dma			*dma;
+
+	int					oper_word_size;
+	int					oper_bits_per_word;
+
+	int					words_in_flight;
+
+	/*
+	 * Offsets for CMD and TXDATA within SPI_PUSHR when accessed
+	 * individually (in XSPI mode)
+	 */
+	int					pushr_cmd;
+	int					pushr_tx;
+
+	void (*host_to_dev)(struct fsl_dspi *dspi, u32 *txdata);
+	void (*dev_to_host)(struct fsl_dspi *dspi, u32 rxdata);
 };
 
+static void dspi_native_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	switch (dspi->oper_word_size) {
+	case 1:
+		*txdata = *(u8 *)dspi->tx;
+		break;
+	case 2:
+		*txdata = *(u16 *)dspi->tx;
+		break;
+	case 4:
+		*txdata = *(u32 *)dspi->tx;
+		break;
+	}
+	dspi->tx += dspi->oper_word_size;
+}
+
+static void dspi_native_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	switch (dspi->oper_word_size) {
+	case 1:
+		*(u8 *)dspi->rx = rxdata;
+		break;
+	case 2:
+		*(u16 *)dspi->rx = rxdata;
+		break;
+	case 4:
+		*(u32 *)dspi->rx = rxdata;
+		break;
+	}
+	dspi->rx += dspi->oper_word_size;
+}
+
+static void dspi_8on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	*txdata = cpu_to_be32(*(u32 *)dspi->tx);
+	dspi->tx += sizeof(u32);
+}
+
+static void dspi_8on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	*(u32 *)dspi->rx = be32_to_cpu(rxdata);
+	dspi->rx += sizeof(u32);
+}
+
+static void dspi_8on16_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	*txdata = cpu_to_be16(*(u16 *)dspi->tx);
+	dspi->tx += sizeof(u16);
+}
+
+static void dspi_8on16_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	*(u16 *)dspi->rx = be16_to_cpu(rxdata);
+	dspi->rx += sizeof(u16);
+}
+
+static void dspi_16on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	u16 hi = *(u16 *)dspi->tx;
+	u16 lo = *(u16 *)(dspi->tx + 2);
+
+	*txdata = (u32)hi << 16 | lo;
+	dspi->tx += sizeof(u32);
+}
+
+static void dspi_16on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	u16 hi = rxdata & 0xffff;
+	u16 lo = rxdata >> 16;
+
+	*(u16 *)dspi->rx = lo;
+	*(u16 *)(dspi->rx + 2) = hi;
+	dspi->rx += sizeof(u32);
+}
+
+/*
+ * Pop one word from the TX buffer for pushing into the
+ * PUSHR register (TX FIFO)
+ */
 static u32 dspi_pop_tx(struct fsl_dspi *dspi)
 {
 	u32 txdata = 0;
 
-	if (dspi->tx) {
-		if (dspi->bytes_per_word == 1)
-			txdata = *(u8 *)dspi->tx;
-		else if (dspi->bytes_per_word == 2)
-			txdata = *(u16 *)dspi->tx;
-		else  /* dspi->bytes_per_word == 4 */
-			txdata = *(u32 *)dspi->tx;
-		dspi->tx += dspi->bytes_per_word;
-	}
-	dspi->len -= dspi->bytes_per_word;
+	if (dspi->tx)
+		dspi->host_to_dev(dspi, &txdata);
+	dspi->len -= dspi->oper_word_size;
 	return txdata;
 }
 
+/* Prepare one TX FIFO entry (txdata plus cmd) */
 static u32 dspi_pop_tx_pushr(struct fsl_dspi *dspi)
 {
 	u16 cmd = dspi->tx_cmd, data = dspi_pop_tx(dspi);
@@ -230,21 +347,12 @@
 	return cmd << 16 | data;
 }
 
+/* Push one word to the RX buffer from the POPR register (RX FIFO) */
 static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata)
 {
 	if (!dspi->rx)
 		return;
-
-	/* Mask off undefined bits */
-	rxdata &= (1 << dspi->bits_per_word) - 1;
-
-	if (dspi->bytes_per_word == 1)
-		*(u8 *)dspi->rx = rxdata;
-	else if (dspi->bytes_per_word == 2)
-		*(u16 *)dspi->rx = rxdata;
-	else /* dspi->bytes_per_word == 4 */
-		*(u32 *)dspi->rx = rxdata;
-	dspi->rx += dspi->bytes_per_word;
+	dspi->dev_to_host(dspi, rxdata);
 }
 
 static void dspi_tx_dma_callback(void *arg)
@@ -262,7 +370,7 @@
 	int i;
 
 	if (dspi->rx) {
-		for (i = 0; i < dma->curr_xfer_len; i++)
+		for (i = 0; i < dspi->words_in_flight; i++)
 			dspi_push_rx(dspi, dspi->dma->rx_dma_buf[i]);
 	}
 
@@ -276,12 +384,12 @@
 	int time_left;
 	int i;
 
-	for (i = 0; i < dma->curr_xfer_len; i++)
+	for (i = 0; i < dspi->words_in_flight; i++)
 		dspi->dma->tx_dma_buf[i] = dspi_pop_tx_pushr(dspi);
 
 	dma->tx_desc = dmaengine_prep_slave_single(dma->chan_tx,
 					dma->tx_dma_phys,
-					dma->curr_xfer_len *
+					dspi->words_in_flight *
 					DMA_SLAVE_BUSWIDTH_4_BYTES,
 					DMA_MEM_TO_DEV,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -299,7 +407,7 @@
 
 	dma->rx_desc = dmaengine_prep_slave_single(dma->chan_rx,
 					dma->rx_dma_phys,
-					dma->curr_xfer_len *
+					dspi->words_in_flight *
 					DMA_SLAVE_BUSWIDTH_4_BYTES,
 					DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -347,45 +455,42 @@
 	return 0;
 }
 
+static void dspi_setup_accel(struct fsl_dspi *dspi);
+
 static int dspi_dma_xfer(struct fsl_dspi *dspi)
 {
 	struct spi_message *message = dspi->cur_msg;
 	struct device *dev = &dspi->pdev->dev;
-	struct fsl_dspi_dma *dma = dspi->dma;
-	int curr_remaining_bytes;
-	int bytes_per_buffer;
 	int ret = 0;
 
-	curr_remaining_bytes = dspi->len;
-	bytes_per_buffer = DSPI_DMA_BUFSIZE / DSPI_FIFO_SIZE;
-	while (curr_remaining_bytes) {
-		/* Check if current transfer fits the DMA buffer */
-		dma->curr_xfer_len = curr_remaining_bytes
-			/ dspi->bytes_per_word;
-		if (dma->curr_xfer_len > bytes_per_buffer)
-			dma->curr_xfer_len = bytes_per_buffer;
+	/*
+	 * dspi->len gets decremented by dspi_pop_tx_pushr in
+	 * dspi_next_xfer_dma_submit
+	 */
+	while (dspi->len) {
+		/* Figure out operational bits-per-word for this chunk */
+		dspi_setup_accel(dspi);
+
+		dspi->words_in_flight = dspi->len / dspi->oper_word_size;
+		if (dspi->words_in_flight > dspi->devtype_data->fifo_size)
+			dspi->words_in_flight = dspi->devtype_data->fifo_size;
+
+		message->actual_length += dspi->words_in_flight *
+					  dspi->oper_word_size;
 
 		ret = dspi_next_xfer_dma_submit(dspi);
 		if (ret) {
 			dev_err(dev, "DMA transfer failed\n");
-			goto exit;
-
-		} else {
-			const int len =
-				dma->curr_xfer_len * dspi->bytes_per_word;
-			curr_remaining_bytes -= len;
-			message->actual_length += len;
-			if (curr_remaining_bytes < 0)
-				curr_remaining_bytes = 0;
+			break;
 		}
 	}
 
-exit:
 	return ret;
 }
 
 static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
 {
+	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
 	struct device *dev = &dspi->pdev->dev;
 	struct dma_slave_config cfg;
 	struct fsl_dspi_dma *dma;
@@ -395,29 +500,31 @@
 	if (!dma)
 		return -ENOMEM;
 
-	dma->chan_rx = dma_request_slave_channel(dev, "rx");
-	if (!dma->chan_rx) {
+	dma->chan_rx = dma_request_chan(dev, "rx");
+	if (IS_ERR(dma->chan_rx)) {
 		dev_err(dev, "rx dma channel not available\n");
-		ret = -ENODEV;
+		ret = PTR_ERR(dma->chan_rx);
 		return ret;
 	}
 
-	dma->chan_tx = dma_request_slave_channel(dev, "tx");
-	if (!dma->chan_tx) {
+	dma->chan_tx = dma_request_chan(dev, "tx");
+	if (IS_ERR(dma->chan_tx)) {
 		dev_err(dev, "tx dma channel not available\n");
-		ret = -ENODEV;
+		ret = PTR_ERR(dma->chan_tx);
 		goto err_tx_channel;
 	}
 
-	dma->tx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
-					     &dma->tx_dma_phys, GFP_KERNEL);
+	dma->tx_dma_buf = dma_alloc_coherent(dma->chan_tx->device->dev,
+					     dma_bufsize, &dma->tx_dma_phys,
+					     GFP_KERNEL);
 	if (!dma->tx_dma_buf) {
 		ret = -ENOMEM;
 		goto err_tx_dma_buf;
 	}
 
-	dma->rx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
-					     &dma->rx_dma_phys, GFP_KERNEL);
+	dma->rx_dma_buf = dma_alloc_coherent(dma->chan_rx->device->dev,
+					     dma_bufsize, &dma->rx_dma_phys,
+					     GFP_KERNEL);
 	if (!dma->rx_dma_buf) {
 		ret = -ENOMEM;
 		goto err_rx_dma_buf;
@@ -454,11 +561,11 @@
 	return 0;
 
 err_slave_config:
-	dma_free_coherent(dev, DSPI_DMA_BUFSIZE,
-			dma->rx_dma_buf, dma->rx_dma_phys);
+	dma_free_coherent(dma->chan_rx->device->dev,
+			  dma_bufsize, dma->rx_dma_buf, dma->rx_dma_phys);
 err_rx_dma_buf:
-	dma_free_coherent(dev, DSPI_DMA_BUFSIZE,
-			dma->tx_dma_buf, dma->tx_dma_phys);
+	dma_free_coherent(dma->chan_tx->device->dev,
+			  dma_bufsize, dma->tx_dma_buf, dma->tx_dma_phys);
 err_tx_dma_buf:
 	dma_release_channel(dma->chan_tx);
 err_tx_channel:
@@ -472,21 +579,21 @@
 
 static void dspi_release_dma(struct fsl_dspi *dspi)
 {
+	int dma_bufsize = dspi->devtype_data->fifo_size * 2;
 	struct fsl_dspi_dma *dma = dspi->dma;
-	struct device *dev = &dspi->pdev->dev;
 
 	if (!dma)
 		return;
 
 	if (dma->chan_tx) {
-		dma_unmap_single(dev, dma->tx_dma_phys,
-				 DSPI_DMA_BUFSIZE, DMA_TO_DEVICE);
+		dma_free_coherent(dma->chan_tx->device->dev, dma_bufsize,
+				  dma->tx_dma_buf, dma->tx_dma_phys);
 		dma_release_channel(dma->chan_tx);
 	}
 
 	if (dma->chan_rx) {
-		dma_unmap_single(dev, dma->rx_dma_phys,
-				 DSPI_DMA_BUFSIZE, DMA_FROM_DEVICE);
+		dma_free_coherent(dma->chan_rx->device->dev, dma_bufsize,
+				  dma->rx_dma_buf, dma->rx_dma_phys);
 		dma_release_channel(dma->chan_rx);
 	}
 }
@@ -562,48 +669,66 @@
 	}
 }
 
-static void fifo_write(struct fsl_dspi *dspi)
+static void dspi_pushr_cmd_write(struct fsl_dspi *dspi, u16 cmd)
 {
-	regmap_write(dspi->regmap, SPI_PUSHR, dspi_pop_tx_pushr(dspi));
-}
-
-static void cmd_fifo_write(struct fsl_dspi *dspi)
-{
-	u16 cmd = dspi->tx_cmd;
-
-	if (dspi->len > 0)
+	/*
+	 * The only time when the PCS doesn't need continuation after this word
+	 * is when it's last. We need to look ahead, because we actually call
+	 * dspi_pop_tx (the function that decrements dspi->len) _after_
+	 * dspi_pushr_cmd_write with XSPI mode. As for how much in advance? One
+	 * word is enough. If there's more to transmit than that,
+	 * dspi_xspi_write will know to split the FIFO writes in 2, and
+	 * generate a new PUSHR command with the final word that will have PCS
+	 * deasserted (not continued) here.
+	 */
+	if (dspi->len > dspi->oper_word_size)
 		cmd |= SPI_PUSHR_CMD_CONT;
-	regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd);
+	regmap_write(dspi->regmap_pushr, dspi->pushr_cmd, cmd);
 }
 
-static void tx_fifo_write(struct fsl_dspi *dspi, u16 txdata)
+static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata)
 {
-	regmap_write(dspi->regmap_pushr, PUSHR_TX, txdata);
+	regmap_write(dspi->regmap_pushr, dspi->pushr_tx, txdata);
 }
 
-static void dspi_tcfq_write(struct fsl_dspi *dspi)
+static void dspi_xspi_fifo_write(struct fsl_dspi *dspi, int num_words)
 {
-	/* Clear transfer count */
-	dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT;
+	int num_bytes = num_words * dspi->oper_word_size;
+	u16 tx_cmd = dspi->tx_cmd;
 
-	if (dspi->devtype_data->xspi_mode && dspi->bits_per_word > 16) {
-		/* Write the CMD FIFO entry first, and then the two
-		 * corresponding TX FIFO entries.
-		 */
+	/*
+	 * If the PCS needs to de-assert (i.e. we're at the end of the buffer
+	 * and cs_change does not want the PCS to stay on), then we need a new
+	 * PUSHR command, since this one (for the body of the buffer)
+	 * necessarily has the CONT bit set.
+	 * So send one word less during this go, to force a split and a command
+	 * with a single word next time, when CONT will be unset.
+	 */
+	if (!(dspi->tx_cmd & SPI_PUSHR_CMD_CONT) && num_bytes == dspi->len)
+		tx_cmd |= SPI_PUSHR_CMD_EOQ;
+
+	/* Update CTARE */
+	regmap_write(dspi->regmap, SPI_CTARE(0),
+		     SPI_FRAME_EBITS(dspi->oper_bits_per_word) |
+		     SPI_CTARE_DTCP(num_words));
+
+	/*
+	 * Write the CMD FIFO entry first, and then the two
+	 * corresponding TX FIFO entries (or one...).
+	 */
+	dspi_pushr_cmd_write(dspi, tx_cmd);
+
+	/* Fill TX FIFO with as many transfers as possible */
+	while (num_words--) {
 		u32 data = dspi_pop_tx(dspi);
 
-		cmd_fifo_write(dspi);
-		tx_fifo_write(dspi, data & 0xFFFF);
-		tx_fifo_write(dspi, data >> 16);
-	} else {
-		/* Write one entry to both TX FIFO and CMD FIFO
-		 * simultaneously.
-		 */
-		fifo_write(dspi);
+		dspi_pushr_txdata_write(dspi, data & 0xFFFF);
+		if (dspi->oper_bits_per_word > 16)
+			dspi_pushr_txdata_write(dspi, data >> 16);
 	}
 }
 
-static u32 fifo_read(struct fsl_dspi *dspi)
+static u32 dspi_popr_read(struct fsl_dspi *dspi)
 {
 	u32 rxdata = 0;
 
@@ -611,68 +736,130 @@
 	return rxdata;
 }
 
-static void dspi_tcfq_read(struct fsl_dspi *dspi)
+static void dspi_fifo_read(struct fsl_dspi *dspi)
 {
-	dspi_push_rx(dspi, fifo_read(dspi));
-}
-
-static void dspi_eoq_write(struct fsl_dspi *dspi)
-{
-	int fifo_size = DSPI_FIFO_SIZE;
-	u16 xfer_cmd = dspi->tx_cmd;
-
-	/* Fill TX FIFO with as many transfers as possible */
-	while (dspi->len && fifo_size--) {
-		dspi->tx_cmd = xfer_cmd;
-		/* Request EOQF for last transfer in FIFO */
-		if (dspi->len == dspi->bytes_per_word || fifo_size == 0)
-			dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
-		/* Clear transfer count for first transfer in FIFO */
-		if (fifo_size == (DSPI_FIFO_SIZE - 1))
-			dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT;
-		/* Write combined TX FIFO and CMD FIFO entry */
-		fifo_write(dspi);
-	}
-}
-
-static void dspi_eoq_read(struct fsl_dspi *dspi)
-{
-	int fifo_size = DSPI_FIFO_SIZE;
+	int num_fifo_entries = dspi->words_in_flight;
 
 	/* Read one FIFO entry and push to rx buffer */
-	while ((dspi->rx < dspi->rx_end) && fifo_size--)
-		dspi_push_rx(dspi, fifo_read(dspi));
+	while (num_fifo_entries--)
+		dspi_push_rx(dspi, dspi_popr_read(dspi));
+}
+
+static void dspi_setup_accel(struct fsl_dspi *dspi)
+{
+	struct spi_transfer *xfer = dspi->cur_transfer;
+	bool odd = !!(dspi->len & 1);
+
+	/* No accel for frames not multiple of 8 bits at the moment */
+	if (xfer->bits_per_word % 8)
+		goto no_accel;
+
+	if (!odd && dspi->len <= dspi->devtype_data->fifo_size * 2) {
+		dspi->oper_bits_per_word = 16;
+	} else if (odd && dspi->len <= dspi->devtype_data->fifo_size) {
+		dspi->oper_bits_per_word = 8;
+	} else {
+		/* Start off with maximum supported by hardware */
+		if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
+			dspi->oper_bits_per_word = 32;
+		else
+			dspi->oper_bits_per_word = 16;
+
+		/*
+		 * And go down only if the buffer can't be sent with
+		 * words this big
+		 */
+		do {
+			if (dspi->len >= DIV_ROUND_UP(dspi->oper_bits_per_word, 8))
+				break;
+
+			dspi->oper_bits_per_word /= 2;
+		} while (dspi->oper_bits_per_word > 8);
+	}
+
+	if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 32) {
+		dspi->dev_to_host = dspi_8on32_dev_to_host;
+		dspi->host_to_dev = dspi_8on32_host_to_dev;
+	} else if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 16) {
+		dspi->dev_to_host = dspi_8on16_dev_to_host;
+		dspi->host_to_dev = dspi_8on16_host_to_dev;
+	} else if (xfer->bits_per_word == 16 && dspi->oper_bits_per_word == 32) {
+		dspi->dev_to_host = dspi_16on32_dev_to_host;
+		dspi->host_to_dev = dspi_16on32_host_to_dev;
+	} else {
+no_accel:
+		dspi->dev_to_host = dspi_native_dev_to_host;
+		dspi->host_to_dev = dspi_native_host_to_dev;
+		dspi->oper_bits_per_word = xfer->bits_per_word;
+	}
+
+	dspi->oper_word_size = DIV_ROUND_UP(dspi->oper_bits_per_word, 8);
+
+	/*
+	 * Update CTAR here (code is common for XSPI and DMA modes).
+	 * We will update CTARE in the portion specific to XSPI, when we
+	 * also know the preload value (DTCP).
+	 */
+	regmap_write(dspi->regmap, SPI_CTAR(0),
+		     dspi->cur_chip->ctar_val |
+		     SPI_FRAME_BITS(dspi->oper_bits_per_word));
+}
+
+static void dspi_fifo_write(struct fsl_dspi *dspi)
+{
+	int num_fifo_entries = dspi->devtype_data->fifo_size;
+	struct spi_transfer *xfer = dspi->cur_transfer;
+	struct spi_message *msg = dspi->cur_msg;
+	int num_words, num_bytes;
+
+	dspi_setup_accel(dspi);
+
+	/* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */
+	if (dspi->oper_word_size == 4)
+		num_fifo_entries /= 2;
+
+	/*
+	 * Integer division intentionally trims off odd (or non-multiple of 4)
+	 * numbers of bytes at the end of the buffer, which will be sent next
+	 * time using a smaller oper_word_size.
+	 */
+	num_words = dspi->len / dspi->oper_word_size;
+	if (num_words > num_fifo_entries)
+		num_words = num_fifo_entries;
+
+	/* Update total number of bytes that were transferred */
+	num_bytes = num_words * dspi->oper_word_size;
+	msg->actual_length += num_bytes;
+	dspi->progress += num_bytes / DIV_ROUND_UP(xfer->bits_per_word, 8);
+
+	/*
+	 * Update shared variable for use in the next interrupt (both in
+	 * dspi_fifo_read and in dspi_fifo_write).
+	 */
+	dspi->words_in_flight = num_words;
+
+	spi_take_timestamp_pre(dspi->ctlr, xfer, dspi->progress, !dspi->irq);
+
+	dspi_xspi_fifo_write(dspi, num_words);
+	/*
+	 * Everything after this point is in a potential race with the next
+	 * interrupt, so we must never use dspi->words_in_flight again since it
+	 * might already be modified by the next dspi_fifo_write.
+	 */
+
+	spi_take_timestamp_post(dspi->ctlr, dspi->cur_transfer,
+				dspi->progress, !dspi->irq);
 }
 
 static int dspi_rxtx(struct fsl_dspi *dspi)
 {
-	struct spi_message *msg = dspi->cur_msg;
-	enum dspi_trans_mode trans_mode;
-	u16 spi_tcnt;
-	u32 spi_tcr;
-
-	/* Get transfer counter (in number of SPI transfers). It was
-	 * reset to 0 when transfer(s) were started.
-	 */
-	regmap_read(dspi->regmap, SPI_TCR, &spi_tcr);
-	spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr);
-	/* Update total number of bytes that were transferred */
-	msg->actual_length += spi_tcnt * dspi->bytes_per_word;
-
-	trans_mode = dspi->devtype_data->trans_mode;
-	if (trans_mode == DSPI_EOQ_MODE)
-		dspi_eoq_read(dspi);
-	else if (trans_mode == DSPI_TCFQ_MODE)
-		dspi_tcfq_read(dspi);
+	dspi_fifo_read(dspi);
 
 	if (!dspi->len)
 		/* Success! */
 		return 0;
 
-	if (trans_mode == DSPI_EOQ_MODE)
-		dspi_eoq_write(dspi);
-	else if (trans_mode == DSPI_TCFQ_MODE)
-		dspi_tcfq_write(dspi);
+	dspi_fifo_write(dspi);
 
 	return -EINPROGRESS;
 }
@@ -686,7 +873,7 @@
 		regmap_read(dspi->regmap, SPI_SR, &spi_sr);
 		regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-		if (spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF))
+		if (spi_sr & SPI_SR_CMDTCF)
 			break;
 	} while (--tries);
 
@@ -704,7 +891,7 @@
 	regmap_read(dspi->regmap, SPI_SR, &spi_sr);
 	regmap_write(dspi->regmap, SPI_SR, spi_sr);
 
-	if (!(spi_sr & (SPI_SR_EOQF | SPI_SR_TCFQF)))
+	if (!(spi_sr & SPI_SR_CMDTCF))
 		return IRQ_NONE;
 
 	if (dspi_rxtx(dspi) == 0)
@@ -718,7 +905,6 @@
 {
 	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
 	struct spi_device *spi = message->spi;
-	enum dspi_trans_mode trans_mode;
 	struct spi_transfer *transfer;
 	int status = 0;
 
@@ -748,69 +934,38 @@
 				dspi->tx_cmd |= SPI_PUSHR_CMD_CONT;
 		}
 
-		dspi->void_write_data = dspi->cur_chip->void_write_data;
-
 		dspi->tx = transfer->tx_buf;
 		dspi->rx = transfer->rx_buf;
-		dspi->rx_end = dspi->rx + transfer->len;
 		dspi->len = transfer->len;
-		/* Validated transfer specific frame size (defaults applied) */
-		dspi->bits_per_word = transfer->bits_per_word;
-		if (transfer->bits_per_word <= 8)
-			dspi->bytes_per_word = 1;
-		else if (transfer->bits_per_word <= 16)
-			dspi->bytes_per_word = 2;
-		else
-			dspi->bytes_per_word = 4;
+		dspi->progress = 0;
 
 		regmap_update_bits(dspi->regmap, SPI_MCR,
 				   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
 				   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
-		regmap_write(dspi->regmap, SPI_CTAR(0),
-			     dspi->cur_chip->ctar_val |
-			     SPI_FRAME_BITS(transfer->bits_per_word));
-		if (dspi->devtype_data->xspi_mode)
-			regmap_write(dspi->regmap, SPI_CTARE(0),
-				     SPI_FRAME_EBITS(transfer->bits_per_word) |
-				     SPI_CTARE_DTCP(1));
 
-		trans_mode = dspi->devtype_data->trans_mode;
-		switch (trans_mode) {
-		case DSPI_EOQ_MODE:
-			regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_EOQFE);
-			dspi_eoq_write(dspi);
-			break;
-		case DSPI_TCFQ_MODE:
-			regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_TCFQE);
-			dspi_tcfq_write(dspi);
-			break;
-		case DSPI_DMA_MODE:
-			regmap_write(dspi->regmap, SPI_RSER,
-				     SPI_RSER_TFFFE | SPI_RSER_TFFFD |
-				     SPI_RSER_RFDFE | SPI_RSER_RFDFD);
+		spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
+				       dspi->progress, !dspi->irq);
+
+		if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
 			status = dspi_dma_xfer(dspi);
+		} else {
+			dspi_fifo_write(dspi);
+
+			if (dspi->irq) {
+				wait_for_completion(&dspi->xfer_done);
+				reinit_completion(&dspi->xfer_done);
+			} else {
+				do {
+					status = dspi_poll(dspi);
+				} while (status == -EINPROGRESS);
+			}
+		}
+		if (status)
 			break;
-		default:
-			dev_err(&dspi->pdev->dev, "unsupported trans_mode %u\n",
-				trans_mode);
-			status = -EINVAL;
-			goto out;
-		}
 
-		if (!dspi->irq) {
-			do {
-				status = dspi_poll(dspi);
-			} while (status == -EINPROGRESS);
-		} else if (trans_mode != DSPI_DMA_MODE) {
-			wait_for_completion(&dspi->xfer_done);
-			reinit_completion(&dspi->xfer_done);
-		}
-
-		if (transfer->delay_usecs)
-			udelay(transfer->delay_usecs);
+		spi_transfer_delay_exec(transfer);
 	}
 
-out:
 	message->status = status;
 	spi_finalize_current_message(ctlr);
 
@@ -848,8 +1003,6 @@
 		sck_cs_delay = pdata->sck_cs_delay;
 	}
 
-	chip->void_write_data = 0;
-
 	clkrate = clk_get_rate(dspi->clk);
 	hz_to_spi_baud(&pbr, &br, spi->max_speed_hz, clkrate);
 
@@ -893,9 +1046,34 @@
 }
 
 static const struct of_device_id fsl_dspi_dt_ids[] = {
-	{ .compatible = "fsl,vf610-dspi", .data = &vf610_data, },
-	{ .compatible = "fsl,ls1021a-v1.0-dspi", .data = &ls1021a_v1_data, },
-	{ .compatible = "fsl,ls2085a-dspi", .data = &ls2085a_data, },
+	{
+		.compatible = "fsl,vf610-dspi",
+		.data = &devtype_data[VF610],
+	}, {
+		.compatible = "fsl,ls1021a-v1.0-dspi",
+		.data = &devtype_data[LS1021A],
+	}, {
+		.compatible = "fsl,ls1012a-dspi",
+		.data = &devtype_data[LS1012A],
+	}, {
+		.compatible = "fsl,ls1028a-dspi",
+		.data = &devtype_data[LS1028A],
+	}, {
+		.compatible = "fsl,ls1043a-dspi",
+		.data = &devtype_data[LS1043A],
+	}, {
+		.compatible = "fsl,ls1046a-dspi",
+		.data = &devtype_data[LS1046A],
+	}, {
+		.compatible = "fsl,ls2080a-dspi",
+		.data = &devtype_data[LS2080A],
+	}, {
+		.compatible = "fsl,ls2085a-dspi",
+		.data = &devtype_data[LS2085A],
+	}, {
+		.compatible = "fsl,lx2160a-dspi",
+		.data = &devtype_data[LX2160A],
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, fsl_dspi_dt_ids);
@@ -903,12 +1081,11 @@
 #ifdef CONFIG_PM_SLEEP
 static int dspi_suspend(struct device *dev)
 {
-	struct spi_controller *ctlr = dev_get_drvdata(dev);
-	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
+	struct fsl_dspi *dspi = dev_get_drvdata(dev);
 
 	if (dspi->irq)
 		disable_irq(dspi->irq);
-	spi_controller_suspend(ctlr);
+	spi_controller_suspend(dspi->ctlr);
 	clk_disable_unprepare(dspi->clk);
 
 	pinctrl_pm_select_sleep_state(dev);
@@ -918,8 +1095,7 @@
 
 static int dspi_resume(struct device *dev)
 {
-	struct spi_controller *ctlr = dev_get_drvdata(dev);
-	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
+	struct fsl_dspi *dspi = dev_get_drvdata(dev);
 	int ret;
 
 	pinctrl_pm_select_default_state(dev);
@@ -927,7 +1103,7 @@
 	ret = clk_prepare_enable(dspi->clk);
 	if (ret)
 		return ret;
-	spi_controller_resume(ctlr);
+	spi_controller_resume(dspi->ctlr);
 	if (dspi->irq)
 		enable_irq(dspi->irq);
 
@@ -985,20 +1161,58 @@
 	},
 };
 
-static void dspi_init(struct fsl_dspi *dspi)
+static int dspi_init(struct fsl_dspi *dspi)
 {
-	unsigned int mcr = SPI_MCR_PCSIS;
+	unsigned int mcr;
 
-	if (dspi->devtype_data->xspi_mode)
+	/* Set idle states for all chip select signals to high */
+	mcr = SPI_MCR_PCSIS(GENMASK(dspi->ctlr->max_native_cs - 1, 0));
+
+	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
 		mcr |= SPI_MCR_XSPI;
 	if (!spi_controller_is_slave(dspi->ctlr))
 		mcr |= SPI_MCR_MASTER;
 
 	regmap_write(dspi->regmap, SPI_MCR, mcr);
 	regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR);
-	if (dspi->devtype_data->xspi_mode)
-		regmap_write(dspi->regmap, SPI_CTARE(0),
-			     SPI_CTARE_FMSZE(0) | SPI_CTARE_DTCP(1));
+
+	switch (dspi->devtype_data->trans_mode) {
+	case DSPI_XSPI_MODE:
+		regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_CMDTCFE);
+		break;
+	case DSPI_DMA_MODE:
+		regmap_write(dspi->regmap, SPI_RSER,
+			     SPI_RSER_TFFFE | SPI_RSER_TFFFD |
+			     SPI_RSER_RFDFE | SPI_RSER_RFDFD);
+		break;
+	default:
+		dev_err(&dspi->pdev->dev, "unsupported trans_mode %u\n",
+			dspi->devtype_data->trans_mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int dspi_slave_abort(struct spi_master *master)
+{
+	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+
+	/*
+	 * Terminate all pending DMA transactions for the SPI working
+	 * in SLAVE mode.
+	 */
+	if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
+		dmaengine_terminate_sync(dspi->dma->chan_rx);
+		dmaengine_terminate_sync(dspi->dma->chan_tx);
+	}
+
+	/* Clear the internal DSPI RX and TX FIFO buffers */
+	regmap_update_bits(dspi->regmap, SPI_MCR,
+			   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
+			   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
+
+	return 0;
 }
 
 static int dspi_probe(struct platform_device *pdev)
@@ -1007,16 +1221,23 @@
 	const struct regmap_config *regmap_config;
 	struct fsl_dspi_platform_data *pdata;
 	struct spi_controller *ctlr;
-	int ret, cs_num, bus_num;
+	int ret, cs_num, bus_num = -1;
 	struct fsl_dspi *dspi;
 	struct resource *res;
 	void __iomem *base;
+	bool big_endian;
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
+	dspi = devm_kzalloc(&pdev->dev, sizeof(*dspi), GFP_KERNEL);
+	if (!dspi)
+		return -ENOMEM;
+
+	ctlr = spi_alloc_master(&pdev->dev, 0);
 	if (!ctlr)
 		return -ENOMEM;
 
-	dspi = spi_controller_get_devdata(ctlr);
+	spi_controller_set_devdata(ctlr, dspi);
+	platform_set_drvdata(pdev, dspi);
+
 	dspi->pdev = pdev;
 	dspi->ctlr = ctlr;
 
@@ -1025,14 +1246,17 @@
 	ctlr->dev.of_node = pdev->dev.of_node;
 
 	ctlr->cleanup = dspi_cleanup;
+	ctlr->slave_abort = dspi_slave_abort;
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
 
 	pdata = dev_get_platdata(&pdev->dev);
 	if (pdata) {
-		ctlr->num_chipselect = pdata->cs_num;
+		ctlr->num_chipselect = ctlr->max_native_cs = pdata->cs_num;
 		ctlr->bus_num = pdata->bus_num;
 
-		dspi->devtype_data = &coldfire_data;
+		/* Only Coldfire uses platform data */
+		dspi->devtype_data = &devtype_data[MCF5441X];
+		big_endian = true;
 	} else {
 
 		ret = of_property_read_u32(np, "spi-num-chipselects", &cs_num);
@@ -1040,13 +1264,9 @@
 			dev_err(&pdev->dev, "can't get spi-num-chipselects\n");
 			goto out_ctlr_put;
 		}
-		ctlr->num_chipselect = cs_num;
+		ctlr->num_chipselect = ctlr->max_native_cs = cs_num;
 
-		ret = of_property_read_u32(np, "bus-num", &bus_num);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "can't get bus-num\n");
-			goto out_ctlr_put;
-		}
+		of_property_read_u32(np, "bus-num", &bus_num);
 		ctlr->bus_num = bus_num;
 
 		if (of_property_read_bool(np, "spi-slave"))
@@ -1058,9 +1278,18 @@
 			ret = -EFAULT;
 			goto out_ctlr_put;
 		}
+
+		big_endian = of_device_is_big_endian(np);
+	}
+	if (big_endian) {
+		dspi->pushr_cmd = 0;
+		dspi->pushr_tx = 2;
+	} else {
+		dspi->pushr_cmd = 2;
+		dspi->pushr_tx = 0;
 	}
 
-	if (dspi->devtype_data->xspi_mode)
+	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
 		ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	else
 		ctlr->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
@@ -1072,7 +1301,7 @@
 		goto out_ctlr_put;
 	}
 
-	if (dspi->devtype_data->xspi_mode)
+	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
 		regmap_config = &dspi_xspi_regmap_config[0];
 	else
 		regmap_config = &dspi_regmap_config;
@@ -1084,7 +1313,7 @@
 		goto out_ctlr_put;
 	}
 
-	if (dspi->devtype_data->xspi_mode) {
+	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE) {
 		dspi->regmap_pushr = devm_regmap_init_mmio(
 			&pdev->dev, base + SPI_PUSHR,
 			&dspi_xspi_regmap_config[1]);
@@ -1107,7 +1336,9 @@
 	if (ret)
 		goto out_ctlr_put;
 
-	dspi_init(dspi);
+	ret = dspi_init(dspi);
+	if (ret)
+		goto out_clk_put;
 
 	dspi->irq = platform_get_irq(pdev, 0);
 	if (dspi->irq <= 0) {
@@ -1117,6 +1348,8 @@
 		goto poll_mode;
 	}
 
+	init_completion(&dspi->xfer_done);
+
 	ret = request_threaded_irq(dspi->irq, dspi_interrupt, NULL,
 				   IRQF_SHARED, pdev->name, dspi);
 	if (ret < 0) {
@@ -1124,9 +1357,8 @@
 		goto out_clk_put;
 	}
 
-	init_completion(&dspi->xfer_done);
-
 poll_mode:
+
 	if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
 		ret = dspi_request_dma(dspi, res->start);
 		if (ret < 0) {
@@ -1138,7 +1370,8 @@
 	ctlr->max_speed_hz =
 		clk_get_rate(dspi->clk) / dspi->devtype_data->max_clock_factor;
 
-	platform_set_drvdata(pdev, ctlr);
+	if (dspi->devtype_data->trans_mode != DSPI_DMA_MODE)
+		ctlr->ptp_sts_supported = true;
 
 	ret = spi_register_controller(ctlr);
 	if (ret != 0) {
@@ -1163,8 +1396,7 @@
 
 static int dspi_remove(struct platform_device *pdev)
 {
-	struct spi_controller *ctlr = platform_get_drvdata(pdev);
-	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
+	struct fsl_dspi *dspi = platform_get_drvdata(pdev);
 
 	/* Disconnect from the SPI framework */
 	spi_unregister_controller(dspi->ctlr);
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 215bf66..cf2b947 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -427,8 +427,7 @@
 
 	ret = fsl_espi_bufs(spi, trans);
 
-	if (trans->delay_usecs)
-		udelay(trans->delay_usecs);
+	spi_transfer_delay_exec(trans);
 
 	return ret;
 }
@@ -437,6 +436,7 @@
 			       struct spi_message *m)
 {
 	unsigned int delay_usecs = 0, rx_nbits = 0;
+	unsigned int delay_nsecs = 0, delay_nsecs1 = 0;
 	struct spi_transfer *t, trans = {};
 	int ret;
 
@@ -445,8 +445,16 @@
 		goto out;
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->delay_usecs > delay_usecs)
-			delay_usecs = t->delay_usecs;
+		if (t->delay_usecs) {
+			if (t->delay_usecs > delay_usecs) {
+				delay_usecs = t->delay_usecs;
+				delay_nsecs = delay_usecs * 1000;
+			}
+		} else {
+			delay_nsecs1 = spi_delay_to_ns(&t->delay, t);
+			if (delay_nsecs1 > delay_nsecs)
+				delay_nsecs = delay_nsecs1;
+		}
 		if (t->rx_nbits > rx_nbits)
 			rx_nbits = t->rx_nbits;
 	}
@@ -457,7 +465,8 @@
 	trans.len = m->frame_length;
 	trans.speed_hz = t->speed_hz;
 	trans.bits_per_word = t->bits_per_word;
-	trans.delay_usecs = delay_usecs;
+	trans.delay.value = delay_nsecs;
+	trans.delay.unit = SPI_DELAY_UNIT_NSECS;
 	trans.rx_nbits = rx_nbits;
 
 	if (trans.len)
@@ -722,7 +731,7 @@
 	if (ret < 0)
 		goto err_pm;
 
-	dev_info(dev, "at 0x%p (irq = %u)\n", espi->reg_base, irq);
+	dev_info(dev, "irq = %u\n", irq);
 
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
index 58b2da9..5d98611 100644
--- a/drivers/spi/spi-fsl-lpspi.c
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -11,7 +11,6 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -19,11 +18,9 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
 #include <linux/platform_data/dma-imx.h>
-#include <linux/platform_data/spi-imx.h>
 #include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
@@ -86,8 +83,6 @@
 #define TCR_RXMSK	BIT(19)
 #define TCR_TXMSK	BIT(18)
 
-static int clkdivs[] = {1, 2, 4, 8, 16, 32, 64, 128};
-
 struct lpspi_config {
 	u8 bpw;
 	u8 chip_select;
@@ -103,6 +98,7 @@
 	struct clk *clk_ipg;
 	struct clk *clk_per;
 	bool is_slave;
+	bool is_only_cs1;
 	bool is_first_byte;
 
 	void *rx_buf;
@@ -124,8 +120,6 @@
 	bool usedma;
 	struct completion dma_rx_completion;
 	struct completion dma_tx_completion;
-
-	int chipselect[0];
 };
 
 static const struct of_device_id fsl_lpspi_dt_ids[] = {
@@ -188,14 +182,13 @@
 
 	bytes_per_word = fsl_lpspi_bytes_per_word(transfer->bits_per_word);
 
-	switch (bytes_per_word)
-	{
-		case 1:
-		case 2:
-		case 4:
-			break;
-		default:
-			return false;
+	switch (bytes_per_word) {
+	case 1:
+	case 2:
+	case 4:
+		break;
+	default:
+		return false;
 	}
 
 	return true;
@@ -227,20 +220,6 @@
 	return 0;
 }
 
-static int fsl_lpspi_prepare_message(struct spi_controller *controller,
-				     struct spi_message *msg)
-{
-	struct fsl_lpspi_data *fsl_lpspi =
-					spi_controller_get_devdata(controller);
-	struct spi_device *spi = msg->spi;
-	int gpio = fsl_lpspi->chipselect[spi->chip_select];
-
-	if (gpio_is_valid(gpio))
-		gpio_direction_output(gpio, spi->mode & SPI_CS_HIGH ? 0 : 1);
-
-	return 0;
-}
-
 static void fsl_lpspi_write_tx_fifo(struct fsl_lpspi_data *fsl_lpspi)
 {
 	u8 txfifo_cnt;
@@ -279,10 +258,9 @@
 
 	temp |= fsl_lpspi->config.bpw - 1;
 	temp |= (fsl_lpspi->config.mode & 0x3) << 30;
+	temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
 	if (!fsl_lpspi->is_slave) {
 		temp |= fsl_lpspi->config.prescale << 27;
-		temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
-
 		/*
 		 * Set TCR_CONT will keep SS asserted after current transfer.
 		 * For the first transfer, clear TCR_CONTC to assert SS.
@@ -331,15 +309,14 @@
 	}
 
 	for (prescale = 0; prescale < 8; prescale++) {
-		scldiv = perclk_rate /
-			 (clkdivs[prescale] * config.speed_hz) - 2;
+		scldiv = perclk_rate / config.speed_hz / (1 << prescale) - 2;
 		if (scldiv < 256) {
 			fsl_lpspi->config.prescale = prescale;
 			break;
 		}
 	}
 
-	if (prescale == 8 && scldiv >= 256)
+	if (scldiv >= 256)
 		return -EINVAL;
 
 	writel(scldiv | (scldiv << 8) | ((scldiv >> 1) << 16),
@@ -444,7 +421,10 @@
 	fsl_lpspi->config.mode = spi->mode;
 	fsl_lpspi->config.bpw = t->bits_per_word;
 	fsl_lpspi->config.speed_hz = t->speed_hz;
-	fsl_lpspi->config.chip_select = spi->chip_select;
+	if (fsl_lpspi->is_only_cs1)
+		fsl_lpspi->config.chip_select = 1;
+	else
+		fsl_lpspi->config.chip_select = spi->chip_select;
 
 	if (!fsl_lpspi->config.speed_hz)
 		fsl_lpspi->config.speed_hz = spi->max_speed_hz;
@@ -469,9 +449,9 @@
 		fsl_lpspi->watermark = fsl_lpspi->txfifosize;
 
 	if (fsl_lpspi_can_dma(controller, spi, t))
-		fsl_lpspi->usedma = 1;
+		fsl_lpspi->usedma = true;
 	else
-		fsl_lpspi->usedma = 0;
+		fsl_lpspi->usedma = false;
 
 	return fsl_lpspi_config(fsl_lpspi);
 }
@@ -675,7 +655,7 @@
 	int ret;
 
 	/* Prepare for TX DMA: */
-	controller->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	controller->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(controller->dma_tx)) {
 		ret = PTR_ERR(controller->dma_tx);
 		dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret);
@@ -684,7 +664,7 @@
 	}
 
 	/* Prepare for RX DMA: */
-	controller->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	controller->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(controller->dma_rx)) {
 		ret = PTR_ERR(controller->dma_rx);
 		dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret);
@@ -779,7 +759,7 @@
 
 	if (temp_SR & SR_FCF && (temp_IER & IER_FCIE)) {
 		writel(SR_FCF, fsl_lpspi->base + IMX7ULP_SR);
-			complete(&fsl_lpspi->xfer_done);
+		complete(&fsl_lpspi->xfer_done);
 		return IRQ_HANDLED;
 	}
 
@@ -835,13 +815,10 @@
 
 static int fsl_lpspi_probe(struct platform_device *pdev)
 {
-	struct device_node *np = pdev->dev.of_node;
 	struct fsl_lpspi_data *fsl_lpspi;
 	struct spi_controller *controller;
-	struct spi_imx_master *lpspi_platform_info =
-		dev_get_platdata(&pdev->dev);
 	struct resource *res;
-	int i, ret, irq;
+	int ret, irq;
 	u32 temp;
 	bool is_slave;
 
@@ -861,6 +838,8 @@
 	fsl_lpspi = spi_controller_get_devdata(controller);
 	fsl_lpspi->dev = &pdev->dev;
 	fsl_lpspi->is_slave = is_slave;
+	fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node,
+						"fsl,spi-only-use-cs1-sel");
 
 	controller->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
 	controller->transfer_one = fsl_lpspi_transfer_one;
@@ -871,35 +850,8 @@
 	controller->dev.of_node = pdev->dev.of_node;
 	controller->bus_num = pdev->id;
 	controller->slave_abort = fsl_lpspi_slave_abort;
-
-	ret = devm_spi_register_controller(&pdev->dev, controller);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "spi_register_controller error.\n");
-		goto out_controller_put;
-	}
-
-	if (!fsl_lpspi->is_slave) {
-		for (i = 0; i < controller->num_chipselect; i++) {
-			int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
-
-			if (!gpio_is_valid(cs_gpio) && lpspi_platform_info)
-				cs_gpio = lpspi_platform_info->chipselect[i];
-
-			fsl_lpspi->chipselect[i] = cs_gpio;
-			if (!gpio_is_valid(cs_gpio))
-				continue;
-
-			ret = devm_gpio_request(&pdev->dev,
-						fsl_lpspi->chipselect[i],
-						DRIVER_NAME);
-			if (ret) {
-				dev_err(&pdev->dev, "can't get cs gpios\n");
-				goto out_controller_put;
-			}
-		}
-		controller->cs_gpios = fsl_lpspi->chipselect;
-		controller->prepare_message = fsl_lpspi_prepare_message;
-	}
+	if (!fsl_lpspi->is_slave)
+		controller->use_gpio_descriptors = true;
 
 	init_completion(&fsl_lpspi->xfer_done);
 
@@ -944,7 +896,7 @@
 	ret = pm_runtime_get_sync(fsl_lpspi->dev);
 	if (ret < 0) {
 		dev_err(fsl_lpspi->dev, "failed to enable clock\n");
-		goto out_controller_put;
+		goto out_pm_get;
 	}
 
 	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
@@ -953,13 +905,26 @@
 
 	ret = fsl_lpspi_dma_init(&pdev->dev, fsl_lpspi, controller);
 	if (ret == -EPROBE_DEFER)
-		goto out_controller_put;
+		goto out_pm_get;
 
 	if (ret < 0)
 		dev_err(&pdev->dev, "dma setup error %d, use pio\n", ret);
 
+	ret = devm_spi_register_controller(&pdev->dev, controller);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "spi_register_controller error.\n");
+		goto out_pm_get;
+	}
+
+	pm_runtime_mark_last_busy(fsl_lpspi->dev);
+	pm_runtime_put_autosuspend(fsl_lpspi->dev);
+
 	return 0;
 
+out_pm_get:
+	pm_runtime_dont_use_autosuspend(fsl_lpspi->dev);
+	pm_runtime_put_sync(fsl_lpspi->dev);
+	pm_runtime_disable(fsl_lpspi->dev);
 out_controller_put:
 	spi_controller_put(controller);
 
@@ -976,8 +941,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int fsl_lpspi_suspend(struct device *dev)
+static int __maybe_unused fsl_lpspi_suspend(struct device *dev)
 {
 	int ret;
 
@@ -986,7 +950,7 @@
 	return ret;
 }
 
-static int fsl_lpspi_resume(struct device *dev)
+static int __maybe_unused fsl_lpspi_resume(struct device *dev)
 {
 	int ret;
 
@@ -1000,7 +964,6 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops fsl_lpspi_pm_ops = {
 	SET_RUNTIME_PM_OPS(fsl_lpspi_runtime_suspend,
diff --git a/drivers/spi/spi-fsl-qspi.c b/drivers/spi/spi-fsl-qspi.c
index 43078ba..9851551 100644
--- a/drivers/spi/spi-fsl-qspi.c
+++ b/drivers/spi/spi-fsl-qspi.c
@@ -15,7 +15,7 @@
  *     Yogesh Gaur <yogeshnarayan.gaur@nxp.com>
  *     Suresh Gupta <suresh.gupta@nxp.com>
  *
- * Based on the original fsl-quadspi.c spi-nor driver:
+ * Based on the original fsl-quadspi.c SPI NOR driver:
  * Author: Freescale Semiconductor, Inc.
  *
  */
@@ -68,6 +68,11 @@
 #define QUADSPI_FLSHCR_TCSH_MASK	GENMASK(11, 8)
 #define QUADSPI_FLSHCR_TDH_MASK		GENMASK(17, 16)
 
+#define QUADSPI_BUF0CR                  0x10
+#define QUADSPI_BUF1CR                  0x14
+#define QUADSPI_BUF2CR                  0x18
+#define QUADSPI_BUFXCR_INVALID_MSTRID   0xe
+
 #define QUADSPI_BUF3CR			0x1c
 #define QUADSPI_BUF3CR_ALLMST_MASK	BIT(31)
 #define QUADSPI_BUF3CR_ADATSZ(x)	((x) << 8)
@@ -195,6 +200,7 @@
 struct fsl_qspi_devtype_data {
 	unsigned int rxfifo;
 	unsigned int txfifo;
+	int invalid_mstrid;
 	unsigned int ahb_buf_size;
 	unsigned int quirks;
 	bool little_endian;
@@ -203,6 +209,7 @@
 static const struct fsl_qspi_devtype_data vybrid_data = {
 	.rxfifo = SZ_128,
 	.txfifo = SZ_64,
+	.invalid_mstrid = QUADSPI_BUFXCR_INVALID_MSTRID,
 	.ahb_buf_size = SZ_1K,
 	.quirks = QUADSPI_QUIRK_SWAP_ENDIAN,
 	.little_endian = true,
@@ -211,6 +218,7 @@
 static const struct fsl_qspi_devtype_data imx6sx_data = {
 	.rxfifo = SZ_128,
 	.txfifo = SZ_512,
+	.invalid_mstrid = QUADSPI_BUFXCR_INVALID_MSTRID,
 	.ahb_buf_size = SZ_1K,
 	.quirks = QUADSPI_QUIRK_4X_INT_CLK | QUADSPI_QUIRK_TKT245618,
 	.little_endian = true,
@@ -219,6 +227,7 @@
 static const struct fsl_qspi_devtype_data imx7d_data = {
 	.rxfifo = SZ_128,
 	.txfifo = SZ_512,
+	.invalid_mstrid = QUADSPI_BUFXCR_INVALID_MSTRID,
 	.ahb_buf_size = SZ_1K,
 	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK |
 		  QUADSPI_QUIRK_USE_TDH_SETTING,
@@ -228,6 +237,7 @@
 static const struct fsl_qspi_devtype_data imx6ul_data = {
 	.rxfifo = SZ_128,
 	.txfifo = SZ_512,
+	.invalid_mstrid = QUADSPI_BUFXCR_INVALID_MSTRID,
 	.ahb_buf_size = SZ_1K,
 	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_4X_INT_CLK |
 		  QUADSPI_QUIRK_USE_TDH_SETTING,
@@ -237,6 +247,7 @@
 static const struct fsl_qspi_devtype_data ls1021a_data = {
 	.rxfifo = SZ_128,
 	.txfifo = SZ_64,
+	.invalid_mstrid = QUADSPI_BUFXCR_INVALID_MSTRID,
 	.ahb_buf_size = SZ_1K,
 	.quirks = 0,
 	.little_endian = false,
@@ -246,6 +257,7 @@
 	.rxfifo = SZ_128,
 	.txfifo = SZ_64,
 	.ahb_buf_size = SZ_1K,
+	.invalid_mstrid = 0x0,
 	.quirks = QUADSPI_QUIRK_TKT253890 | QUADSPI_QUIRK_BASE_INTERNAL,
 	.little_endian = true,
 };
@@ -472,7 +484,7 @@
 	}
 
 	if (needs_wakeup_wait_mode(q))
-		pm_qos_add_request(&q->pm_qos_req, PM_QOS_CPU_DMA_LATENCY, 0);
+		cpu_latency_qos_add_request(&q->pm_qos_req, 0);
 
 	return 0;
 }
@@ -480,7 +492,7 @@
 static void fsl_qspi_clk_disable_unprep(struct fsl_qspi *q)
 {
 	if (needs_wakeup_wait_mode(q))
-		pm_qos_remove_request(&q->pm_qos_req);
+		cpu_latency_qos_remove_request(&q->pm_qos_req);
 
 	clk_disable_unprepare(q->clk);
 	clk_disable_unprepare(q->clk_en);
@@ -633,6 +645,7 @@
 	void __iomem *base = q->iobase;
 	u32 addr_offset = 0;
 	int err = 0;
+	int invalid_mstrid = q->devtype_data->invalid_mstrid;
 
 	mutex_lock(&q->lock);
 
@@ -656,6 +669,10 @@
 	qspi_writel(q, QUADSPI_SPTRCLR_BFPTRC | QUADSPI_SPTRCLR_IPPTRC,
 		    base + QUADSPI_SPTRCLR);
 
+	qspi_writel(q, invalid_mstrid, base + QUADSPI_BUF0CR);
+	qspi_writel(q, invalid_mstrid, base + QUADSPI_BUF1CR);
+	qspi_writel(q, invalid_mstrid, base + QUADSPI_BUF2CR);
+
 	fsl_qspi_prepare_lut(q, op);
 
 	/*
@@ -859,14 +876,15 @@
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					"QuadSPI-memory");
-	q->ahb_addr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(q->ahb_addr)) {
-		ret = PTR_ERR(q->ahb_addr);
+	q->memmap_phy = res->start;
+	/* Since there are 4 cs, map size required is 4 times ahb_buf_size */
+	q->ahb_addr = devm_ioremap(dev, q->memmap_phy,
+				   (q->devtype_data->ahb_buf_size * 4));
+	if (!q->ahb_addr) {
+		ret = -ENOMEM;
 		goto err_put_ctrl;
 	}
 
-	q->memmap_phy = res->start;
-
 	/* find the clocks */
 	q->clk_en = devm_clk_get(dev, "qspi_en");
 	if (IS_ERR(q->clk_en)) {
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 02b999d..bdf94cc 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -90,7 +90,7 @@
 {
 	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
 	struct spi_mpc8xxx_cs *cs = spi->controller_state;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 	__be32 __iomem *mode = &reg_base->mode;
 	unsigned long flags;
 
@@ -115,14 +115,13 @@
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
 	struct fsl_spi_platform_data *pdata;
-	bool pol = spi->mode & SPI_CS_HIGH;
 	struct spi_mpc8xxx_cs	*cs = spi->controller_state;
 
 	pdata = spi->dev.parent->parent->platform_data;
 
 	if (value == BITBANG_CS_INACTIVE) {
 		if (pdata->cs_control)
-			pdata->cs_control(spi, !pol);
+			pdata->cs_control(spi, false);
 	}
 
 	if (value == BITBANG_CS_ACTIVE) {
@@ -134,7 +133,7 @@
 		fsl_spi_change_mode(spi);
 
 		if (pdata->cs_control)
-			pdata->cs_control(spi, pol);
+			pdata->cs_control(spi, true);
 	}
 }
 
@@ -291,7 +290,7 @@
 				struct spi_transfer *t, unsigned int len)
 {
 	u32 word;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	mspi->count = len;
 
@@ -309,7 +308,7 @@
 			    bool is_dma_mapped)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	struct fsl_spi_reg *reg_base;
+	struct fsl_spi_reg __iomem *reg_base;
 	unsigned int len = t->len;
 	u8 bits_per_word;
 	int ret;
@@ -416,8 +415,7 @@
 		}
 		m->actual_length += t->len;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		if (cs_change) {
 			ndelay(nsecs);
@@ -441,7 +439,7 @@
 static int fsl_spi_setup(struct spi_device *spi)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct fsl_spi_reg *reg_base;
+	struct fsl_spi_reg __iomem *reg_base;
 	bool initial_setup = false;
 	int retval;
 	u32 hw_mode;
@@ -500,7 +498,7 @@
 
 static void fsl_spi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
 {
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	/* We need handle RX first */
 	if (events & SPIE_NE) {
@@ -535,7 +533,7 @@
 	struct mpc8xxx_spi *mspi = context_data;
 	irqreturn_t ret = IRQ_NONE;
 	u32 events;
-	struct fsl_spi_reg *reg_base = mspi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mspi->reg_base;
 
 	/* Get interrupt events(tx/rx) */
 	events = mpc8xxx_spi_read_reg(&reg_base->event);
@@ -555,7 +553,7 @@
 static void fsl_spi_grlib_cs_control(struct spi_device *spi, bool on)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	struct fsl_spi_reg *reg_base = mpc8xxx_spi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mpc8xxx_spi->reg_base;
 	u32 slvsel;
 	u16 cs = spi->chip_select;
 
@@ -573,7 +571,7 @@
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
-	struct fsl_spi_reg *reg_base = mpc8xxx_spi->reg_base;
+	struct fsl_spi_reg __iomem *reg_base = mpc8xxx_spi->reg_base;
 	int mbits;
 	u32 capabilities;
 
@@ -593,13 +591,13 @@
 	pdata->cs_control = fsl_spi_grlib_cs_control;
 }
 
-static struct spi_master * fsl_spi_probe(struct device *dev,
+static struct spi_master *fsl_spi_probe(struct device *dev,
 		struct resource *mem, unsigned int irq)
 {
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master;
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct fsl_spi_reg *reg_base;
+	struct fsl_spi_reg __iomem *reg_base;
 	u32 regval;
 	int ret = 0;
 
@@ -701,7 +699,7 @@
 
 		if (WARN_ON_ONCE(!pinfo->immr_spi_cs))
 			return;
-		iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs);
+		iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs);
 	}
 }
 
@@ -711,8 +709,13 @@
 	struct device_node *np = ofdev->dev.of_node;
 	struct spi_master *master;
 	struct resource mem;
-	int irq = 0, type;
-	int ret = -ENOMEM;
+	int irq, type;
+	int ret;
+	bool spisel_boot = false;
+#if IS_ENABLED(CONFIG_FSL_SOC)
+	struct mpc8xxx_spi_probe_info *pinfo = NULL;
+#endif
+
 
 	ret = of_mpc8xxx_spi_probe(ofdev);
 	if (ret)
@@ -721,17 +724,14 @@
 	type = fsl_spi_get_type(&ofdev->dev);
 	if (type == TYPE_FSL) {
 		struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-		bool spisel_boot = false;
 #if IS_ENABLED(CONFIG_FSL_SOC)
-		struct mpc8xxx_spi_probe_info *pinfo = to_of_pinfo(pdata);
+		pinfo = to_of_pinfo(pdata);
 
 		spisel_boot = of_property_read_bool(np, "fsl,spisel_boot");
 		if (spisel_boot) {
 			pinfo->immr_spi_cs = ioremap(get_immrbase() + IMMR_SPI_CS_OFFSET, 4);
-			if (!pinfo->immr_spi_cs) {
-				ret = -ENOMEM;
-				goto err;
-			}
+			if (!pinfo->immr_spi_cs)
+				return -ENOMEM;
 		}
 #endif
 		/*
@@ -754,23 +754,23 @@
 
 	ret = of_address_to_resource(np, 0, &mem);
 	if (ret)
-		goto err;
+		goto unmap_out;
 
 	irq = platform_get_irq(ofdev, 0);
 	if (irq < 0) {
 		ret = irq;
-		goto err;
+		goto unmap_out;
 	}
 
 	master = fsl_spi_probe(dev, &mem, irq);
-	if (IS_ERR(master)) {
-		ret = PTR_ERR(master);
-		goto err;
-	}
 
-	return 0;
+	return PTR_ERR_OR_ZERO(master);
 
-err:
+unmap_out:
+#if IS_ENABLED(CONFIG_FSL_SOC)
+	if (spisel_boot)
+		iounmap(pinfo->immr_spi_cs);
+#endif
 	return ret;
 }
 
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 01b53d8..01ef79f 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -6,8 +6,8 @@
 #include <linux/io.h>
 #include <linux/log2.h>
 #include <linux/module.h>
-#include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/qcom-geni-se.h>
 #include <linux/spi/spi.h>
@@ -52,7 +52,6 @@
 /* M_CMD OP codes for SPI */
 #define SPI_TX_ONLY		1
 #define SPI_RX_ONLY		2
-#define SPI_FULL_DUPLEX		3
 #define SPI_TX_RX		7
 #define SPI_CS_ASSERT		8
 #define SPI_CS_DEASSERT		9
@@ -64,29 +63,27 @@
 #define TIMESTAMP_AFTER		BIT(3)
 #define POST_CMD_DELAY		BIT(4)
 
-enum spi_m_cmd_opcode {
-	CMD_NONE,
-	CMD_XFER,
-	CMD_CS,
-	CMD_CANCEL,
-};
-
 struct spi_geni_master {
 	struct geni_se se;
 	struct device *dev;
 	u32 tx_fifo_depth;
 	u32 fifo_width_bits;
 	u32 tx_wm;
+	u32 last_mode;
 	unsigned long cur_speed_hz;
+	unsigned long cur_sclk_hz;
 	unsigned int cur_bits_per_word;
 	unsigned int tx_rem_bytes;
 	unsigned int rx_rem_bytes;
 	const struct spi_transfer *cur_xfer;
-	struct completion xfer_done;
+	struct completion cs_done;
+	struct completion cancel_done;
+	struct completion abort_done;
 	unsigned int oversampling;
 	spinlock_t lock;
-	enum spi_m_cmd_opcode cur_mcmd;
 	int irq;
+	bool cs_flag;
+	bool abort_failed;
 };
 
 static int get_spi_clk_cfg(unsigned int speed_hz,
@@ -96,7 +93,6 @@
 {
 	unsigned long sclk_freq;
 	unsigned int actual_hz;
-	struct geni_se *se = &mas->se;
 	int ret;
 
 	ret = geni_se_clk_freq_match(&mas->se,
@@ -113,9 +109,12 @@
 
 	dev_dbg(mas->dev, "req %u=>%u sclk %lu, idx %d, div %d\n", speed_hz,
 				actual_hz, sclk_freq, *clk_idx, *clk_div);
-	ret = clk_set_rate(se->clk, sclk_freq);
+	ret = dev_pm_opp_set_rate(mas->dev, sclk_freq);
 	if (ret)
-		dev_err(mas->dev, "clk_set_rate failed %d\n", ret);
+		dev_err(mas->dev, "dev_pm_opp_set_rate failed %d\n", ret);
+	else
+		mas->cur_sclk_hz = sclk_freq;
+
 	return ret;
 }
 
@@ -123,26 +122,69 @@
 				struct spi_message *msg)
 {
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
-	unsigned long time_left, flags;
+	unsigned long time_left;
 	struct geni_se *se = &mas->se;
 
-	spin_lock_irqsave(&mas->lock, flags);
-	reinit_completion(&mas->xfer_done);
-	mas->cur_mcmd = CMD_CANCEL;
-	geni_se_cancel_m_cmd(se);
+	spin_lock_irq(&mas->lock);
+	reinit_completion(&mas->cancel_done);
 	writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
-	spin_unlock_irqrestore(&mas->lock, flags);
-	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	mas->cur_xfer = NULL;
+	geni_se_cancel_m_cmd(se);
+	spin_unlock_irq(&mas->lock);
+
+	time_left = wait_for_completion_timeout(&mas->cancel_done, HZ);
 	if (time_left)
 		return;
 
-	spin_lock_irqsave(&mas->lock, flags);
-	reinit_completion(&mas->xfer_done);
+	spin_lock_irq(&mas->lock);
+	reinit_completion(&mas->abort_done);
 	geni_se_abort_m_cmd(se);
-	spin_unlock_irqrestore(&mas->lock, flags);
-	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
-	if (!time_left)
+	spin_unlock_irq(&mas->lock);
+
+	time_left = wait_for_completion_timeout(&mas->abort_done, HZ);
+	if (!time_left) {
 		dev_err(mas->dev, "Failed to cancel/abort m_cmd\n");
+
+		/*
+		 * No need for a lock since SPI core has a lock and we never
+		 * access this from an interrupt.
+		 */
+		mas->abort_failed = true;
+	}
+}
+
+static bool spi_geni_is_abort_still_pending(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	u32 m_irq, m_irq_en;
+
+	if (!mas->abort_failed)
+		return false;
+
+	/*
+	 * The only known case where a transfer times out and then a cancel
+	 * times out then an abort times out is if something is blocking our
+	 * interrupt handler from running.  Avoid starting any new transfers
+	 * until that sorts itself out.
+	 */
+	spin_lock_irq(&mas->lock);
+	m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+	m_irq_en = readl(se->base + SE_GENI_M_IRQ_EN);
+	spin_unlock_irq(&mas->lock);
+
+	if (m_irq & m_irq_en) {
+		dev_err(mas->dev, "Interrupts pending after abort: %#010x\n",
+			m_irq & m_irq_en);
+		return true;
+	}
+
+	/*
+	 * If we're here the problem resolved itself so no need to check more
+	 * on future transfers.
+	 */
+	mas->abort_failed = false;
+
+	return false;
 }
 
 static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
@@ -152,21 +194,34 @@
 	struct geni_se *se = &mas->se;
 	unsigned long time_left;
 
-	reinit_completion(&mas->xfer_done);
-	pm_runtime_get_sync(mas->dev);
 	if (!(slv->mode & SPI_CS_HIGH))
 		set_flag = !set_flag;
 
-	mas->cur_mcmd = CMD_CS;
+	if (set_flag == mas->cs_flag)
+		return;
+
+	pm_runtime_get_sync(mas->dev);
+
+	if (spi_geni_is_abort_still_pending(mas)) {
+		dev_err(mas->dev, "Can't set chip select\n");
+		goto exit;
+	}
+
+	mas->cs_flag = set_flag;
+
+	spin_lock_irq(&mas->lock);
+	reinit_completion(&mas->cs_done);
 	if (set_flag)
 		geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0);
 	else
 		geni_se_setup_m_cmd(se, SPI_CS_DEASSERT, 0);
+	spin_unlock_irq(&mas->lock);
 
-	time_left = wait_for_completion_timeout(&mas->xfer_done, HZ);
+	time_left = wait_for_completion_timeout(&mas->cs_done, HZ);
 	if (!time_left)
 		handle_fifo_timeout(spi, NULL);
 
+exit:
 	pm_runtime_put(mas->dev);
 }
 
@@ -178,8 +233,6 @@
 	struct geni_se *se = &mas->se;
 	u32 word_len;
 
-	word_len = readl(se->base + SE_SPI_WORD_LEN);
-
 	/*
 	 * If bits_per_word isn't a byte aligned value, set the packing to be
 	 * 1 SPI word per FIFO word.
@@ -188,64 +241,86 @@
 		pack_words = mas->fifo_width_bits / bits_per_word;
 	else
 		pack_words = 1;
-	word_len &= ~WORD_LEN_MSK;
-	word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
 	geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
 								true, true);
+	word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK;
 	writel(word_len, se->base + SE_SPI_WORD_LEN);
 }
 
+static int geni_spi_set_clock_and_bw(struct spi_geni_master *mas,
+					unsigned long clk_hz)
+{
+	u32 clk_sel, m_clk_cfg, idx, div;
+	struct geni_se *se = &mas->se;
+	int ret;
+
+	if (clk_hz == mas->cur_speed_hz)
+		return 0;
+
+	ret = get_spi_clk_cfg(clk_hz, mas, &idx, &div);
+	if (ret) {
+		dev_err(mas->dev, "Err setting clk to %lu: %d\n", clk_hz, ret);
+		return ret;
+	}
+
+	/*
+	 * SPI core clock gets configured with the requested frequency
+	 * or the frequency closer to the requested frequency.
+	 * For that reason requested frequency is stored in the
+	 * cur_speed_hz and referred in the consecutive transfer instead
+	 * of calling clk_get_rate() API.
+	 */
+	mas->cur_speed_hz = clk_hz;
+
+	clk_sel = idx & CLK_SEL_MSK;
+	m_clk_cfg = (div << CLK_DIV_SHFT) | SER_CLK_EN;
+	writel(clk_sel, se->base + SE_GENI_CLK_SEL);
+	writel(m_clk_cfg, se->base + GENI_SER_M_CLK_CFG);
+
+	/* Set BW quota for CPU as driver supports FIFO mode only. */
+	se->icc_paths[CPU_TO_GENI].avg_bw = Bps_to_icc(mas->cur_speed_hz);
+	ret = geni_icc_set_bw(se);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int setup_fifo_params(struct spi_device *spi_slv,
 					struct spi_master *spi)
 {
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
 	struct geni_se *se = &mas->se;
-	u32 loopback_cfg, cpol, cpha, demux_output_inv;
-	u32 demux_sel, clk_sel, m_clk_cfg, idx, div;
-	int ret;
+	u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0;
+	u32 demux_sel;
 
-	loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
-	cpol = readl(se->base + SE_SPI_CPOL);
-	cpha = readl(se->base + SE_SPI_CPHA);
-	demux_output_inv = 0;
-	loopback_cfg &= ~LOOPBACK_MSK;
-	cpol &= ~CPOL;
-	cpha &= ~CPHA;
+	if (mas->last_mode != spi_slv->mode) {
+		if (spi_slv->mode & SPI_LOOP)
+			loopback_cfg = LOOPBACK_ENABLE;
 
-	if (spi_slv->mode & SPI_LOOP)
-		loopback_cfg |= LOOPBACK_ENABLE;
+		if (spi_slv->mode & SPI_CPOL)
+			cpol = CPOL;
 
-	if (spi_slv->mode & SPI_CPOL)
-		cpol |= CPOL;
+		if (spi_slv->mode & SPI_CPHA)
+			cpha = CPHA;
 
-	if (spi_slv->mode & SPI_CPHA)
-		cpha |= CPHA;
+		if (spi_slv->mode & SPI_CS_HIGH)
+			demux_output_inv = BIT(spi_slv->chip_select);
 
-	if (spi_slv->mode & SPI_CS_HIGH)
-		demux_output_inv = BIT(spi_slv->chip_select);
+		demux_sel = spi_slv->chip_select;
+		mas->cur_bits_per_word = spi_slv->bits_per_word;
 
-	demux_sel = spi_slv->chip_select;
-	mas->cur_speed_hz = spi_slv->max_speed_hz;
-	mas->cur_bits_per_word = spi_slv->bits_per_word;
+		spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
+		writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
+		writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
+		writel(cpha, se->base + SE_SPI_CPHA);
+		writel(cpol, se->base + SE_SPI_CPOL);
+		writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
 
-	ret = get_spi_clk_cfg(mas->cur_speed_hz, mas, &idx, &div);
-	if (ret) {
-		dev_err(mas->dev, "Err setting clks ret(%d) for %ld\n",
-							ret, mas->cur_speed_hz);
-		return ret;
+		mas->last_mode = spi_slv->mode;
 	}
 
-	clk_sel = idx & CLK_SEL_MSK;
-	m_clk_cfg = (div << CLK_DIV_SHFT) | SER_CLK_EN;
-	spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
-	writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
-	writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
-	writel(cpha, se->base + SE_SPI_CPHA);
-	writel(cpol, se->base + SE_SPI_CPOL);
-	writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
-	writel(clk_sel, se->base + SE_GENI_CLK_SEL);
-	writel(m_clk_cfg, se->base + GENI_SER_M_CLK_CFG);
-	return 0;
+	return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
 }
 
 static int spi_geni_prepare_message(struct spi_master *spi,
@@ -253,9 +328,10 @@
 {
 	int ret;
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
-	struct geni_se *se = &mas->se;
 
-	geni_se_select_mode(se, GENI_SE_FIFO);
+	if (spi_geni_is_abort_still_pending(mas))
+		return -EBUSY;
+
 	ret = setup_fifo_params(spi_msg->spi, spi);
 	if (ret)
 		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
@@ -266,6 +342,7 @@
 {
 	struct geni_se *se = &mas->se;
 	unsigned int proto, major, minor, ver;
+	u32 spi_tx_cfg;
 
 	pm_runtime_get_sync(mas->dev);
 
@@ -284,7 +361,7 @@
 	 * Hardware programming guide suggests to configure
 	 * RX FIFO RFR level to fifo_depth-2.
 	 */
-	geni_se_init(se, 0x0, mas->tx_fifo_depth - 2);
+	geni_se_init(se, mas->tx_fifo_depth - 3, mas->tx_fifo_depth - 2);
 	/* Transmit an entire FIFO worth of data per IRQ */
 	mas->tx_wm = 1;
 	ver = geni_se_get_qup_hw_version(se);
@@ -296,103 +373,17 @@
 	else
 		mas->oversampling = 1;
 
+	geni_se_select_mode(se, GENI_SE_FIFO);
+
+	/* We always control CS manually */
+	spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
+	spi_tx_cfg &= ~CS_TOGGLE;
+	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
+
 	pm_runtime_put(mas->dev);
 	return 0;
 }
 
-static void setup_fifo_xfer(struct spi_transfer *xfer,
-				struct spi_geni_master *mas,
-				u16 mode, struct spi_master *spi)
-{
-	u32 m_cmd = 0;
-	u32 spi_tx_cfg, len;
-	struct geni_se *se = &mas->se;
-
-	spi_tx_cfg = readl(se->base + SE_SPI_TRANS_CFG);
-	if (xfer->bits_per_word != mas->cur_bits_per_word) {
-		spi_setup_word_len(mas, mode, xfer->bits_per_word);
-		mas->cur_bits_per_word = xfer->bits_per_word;
-	}
-
-	/* Speed and bits per word can be overridden per transfer */
-	if (xfer->speed_hz != mas->cur_speed_hz) {
-		int ret;
-		u32 clk_sel, m_clk_cfg;
-		unsigned int idx, div;
-
-		ret = get_spi_clk_cfg(xfer->speed_hz, mas, &idx, &div);
-		if (ret) {
-			dev_err(mas->dev, "Err setting clks:%d\n", ret);
-			return;
-		}
-		/*
-		 * SPI core clock gets configured with the requested frequency
-		 * or the frequency closer to the requested frequency.
-		 * For that reason requested frequency is stored in the
-		 * cur_speed_hz and referred in the consecutive transfer instead
-		 * of calling clk_get_rate() API.
-		 */
-		mas->cur_speed_hz = xfer->speed_hz;
-		clk_sel = idx & CLK_SEL_MSK;
-		m_clk_cfg = (div << CLK_DIV_SHFT) | SER_CLK_EN;
-		writel(clk_sel, se->base + SE_GENI_CLK_SEL);
-		writel(m_clk_cfg, se->base + GENI_SER_M_CLK_CFG);
-	}
-
-	mas->tx_rem_bytes = 0;
-	mas->rx_rem_bytes = 0;
-	if (xfer->tx_buf && xfer->rx_buf)
-		m_cmd = SPI_FULL_DUPLEX;
-	else if (xfer->tx_buf)
-		m_cmd = SPI_TX_ONLY;
-	else if (xfer->rx_buf)
-		m_cmd = SPI_RX_ONLY;
-
-	spi_tx_cfg &= ~CS_TOGGLE;
-
-	if (!(mas->cur_bits_per_word % MIN_WORD_LEN))
-		len = xfer->len * BITS_PER_BYTE / mas->cur_bits_per_word;
-	else
-		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
-	len &= TRANS_LEN_MSK;
-
-	mas->cur_xfer = xfer;
-	if (m_cmd & SPI_TX_ONLY) {
-		mas->tx_rem_bytes = xfer->len;
-		writel(len, se->base + SE_SPI_TX_TRANS_LEN);
-	}
-
-	if (m_cmd & SPI_RX_ONLY) {
-		writel(len, se->base + SE_SPI_RX_TRANS_LEN);
-		mas->rx_rem_bytes = xfer->len;
-	}
-	writel(spi_tx_cfg, se->base + SE_SPI_TRANS_CFG);
-	mas->cur_mcmd = CMD_XFER;
-	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
-
-	/*
-	 * TX_WATERMARK_REG should be set after SPI configuration and
-	 * setting up GENI SE engine, as driver starts data transfer
-	 * for the watermark interrupt.
-	 */
-	if (m_cmd & SPI_TX_ONLY)
-		writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
-}
-
-static int spi_geni_transfer_one(struct spi_master *spi,
-				struct spi_device *slv,
-				struct spi_transfer *xfer)
-{
-	struct spi_geni_master *mas = spi_master_get_devdata(spi);
-
-	/* Terminate and return success for 0 byte length transfer */
-	if (!xfer->len)
-		return 0;
-
-	setup_fifo_xfer(xfer, mas, slv->mode, spi);
-	return 1;
-}
-
 static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
 {
 	/*
@@ -407,7 +398,7 @@
 	return mas->fifo_width_bits / BITS_PER_BYTE;
 }
 
-static void geni_spi_handle_tx(struct spi_geni_master *mas)
+static bool geni_spi_handle_tx(struct spi_geni_master *mas)
 {
 	struct geni_se *se = &mas->se;
 	unsigned int max_bytes;
@@ -415,6 +406,12 @@
 	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
 	unsigned int i = 0;
 
+	/* Stop the watermark IRQ if nothing to send */
+	if (!mas->cur_xfer) {
+		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+
 	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
 	if (mas->tx_rem_bytes < max_bytes)
 		max_bytes = mas->tx_rem_bytes;
@@ -432,8 +429,11 @@
 		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
 	}
 	mas->tx_rem_bytes -= max_bytes;
-	if (!mas->tx_rem_bytes)
+	if (!mas->tx_rem_bytes) {
 		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+	return true;
 }
 
 static void geni_spi_handle_rx(struct spi_geni_master *mas)
@@ -454,6 +454,14 @@
 		if (rx_last_byte_valid && rx_last_byte_valid < 4)
 			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
 	}
+
+	/* Clear out the FIFO and bail if nowhere to put it */
+	if (!mas->cur_xfer) {
+		for (i = 0; i < DIV_ROUND_UP(rx_bytes, bytes_per_fifo_word); i++)
+			readl(se->base + SE_GENI_RX_FIFOn);
+		return;
+	}
+
 	if (mas->rx_rem_bytes < rx_bytes)
 		rx_bytes = mas->rx_rem_bytes;
 
@@ -472,19 +480,115 @@
 	mas->rx_rem_bytes -= rx_bytes;
 }
 
+static void setup_fifo_xfer(struct spi_transfer *xfer,
+				struct spi_geni_master *mas,
+				u16 mode, struct spi_master *spi)
+{
+	u32 m_cmd = 0;
+	u32 len;
+	struct geni_se *se = &mas->se;
+	int ret;
+
+	/*
+	 * Ensure that our interrupt handler isn't still running from some
+	 * prior command before we start messing with the hardware behind
+	 * its back.  We don't need to _keep_ the lock here since we're only
+	 * worried about racing with out interrupt handler.  The SPI core
+	 * already handles making sure that we're not trying to do two
+	 * transfers at once or setting a chip select and doing a transfer
+	 * concurrently.
+	 *
+	 * NOTE: we actually _can't_ hold the lock here because possibly we
+	 * might call clk_set_rate() which needs to be able to sleep.
+	 */
+	spin_lock_irq(&mas->lock);
+	spin_unlock_irq(&mas->lock);
+
+	if (xfer->bits_per_word != mas->cur_bits_per_word) {
+		spi_setup_word_len(mas, mode, xfer->bits_per_word);
+		mas->cur_bits_per_word = xfer->bits_per_word;
+	}
+
+	/* Speed and bits per word can be overridden per transfer */
+	ret = geni_spi_set_clock_and_bw(mas, xfer->speed_hz);
+	if (ret)
+		return;
+
+	mas->tx_rem_bytes = 0;
+	mas->rx_rem_bytes = 0;
+
+	if (!(mas->cur_bits_per_word % MIN_WORD_LEN))
+		len = xfer->len * BITS_PER_BYTE / mas->cur_bits_per_word;
+	else
+		len = xfer->len / (mas->cur_bits_per_word / BITS_PER_BYTE + 1);
+	len &= TRANS_LEN_MSK;
+
+	mas->cur_xfer = xfer;
+	if (xfer->tx_buf) {
+		m_cmd |= SPI_TX_ONLY;
+		mas->tx_rem_bytes = xfer->len;
+		writel(len, se->base + SE_SPI_TX_TRANS_LEN);
+	}
+
+	if (xfer->rx_buf) {
+		m_cmd |= SPI_RX_ONLY;
+		writel(len, se->base + SE_SPI_RX_TRANS_LEN);
+		mas->rx_rem_bytes = xfer->len;
+	}
+
+	/*
+	 * Lock around right before we start the transfer since our
+	 * interrupt could come in at any time now.
+	 */
+	spin_lock_irq(&mas->lock);
+	geni_se_setup_m_cmd(se, m_cmd, FRAGMENTATION);
+
+	/*
+	 * TX_WATERMARK_REG should be set after SPI configuration and
+	 * setting up GENI SE engine, as driver starts data transfer
+	 * for the watermark interrupt.
+	 */
+	if (m_cmd & SPI_TX_ONLY) {
+		if (geni_spi_handle_tx(mas))
+			writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
+	}
+	spin_unlock_irq(&mas->lock);
+}
+
+static int spi_geni_transfer_one(struct spi_master *spi,
+				struct spi_device *slv,
+				struct spi_transfer *xfer)
+{
+	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+
+	if (spi_geni_is_abort_still_pending(mas))
+		return -EBUSY;
+
+	/* Terminate and return success for 0 byte length transfer */
+	if (!xfer->len)
+		return 0;
+
+	setup_fifo_xfer(xfer, mas, slv->mode, spi);
+	return 1;
+}
+
 static irqreturn_t geni_spi_isr(int irq, void *data)
 {
 	struct spi_master *spi = data;
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
 	struct geni_se *se = &mas->se;
 	u32 m_irq;
-	unsigned long flags;
 
-	if (mas->cur_mcmd == CMD_NONE)
+	m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+	if (!m_irq)
 		return IRQ_NONE;
 
-	spin_lock_irqsave(&mas->lock, flags);
-	m_irq = readl(se->base + SE_GENI_M_IRQ_STATUS);
+	if (m_irq & (M_CMD_OVERRUN_EN | M_ILLEGAL_CMD_EN | M_CMD_FAILURE_EN |
+		     M_RX_FIFO_RD_ERR_EN | M_RX_FIFO_WR_ERR_EN |
+		     M_TX_FIFO_RD_ERR_EN | M_TX_FIFO_WR_ERR_EN))
+		dev_warn(mas->dev, "Unexpected IRQ err status %#010x\n", m_irq);
+
+	spin_lock(&mas->lock);
 
 	if ((m_irq & M_RX_FIFO_WATERMARK_EN) || (m_irq & M_RX_FIFO_LAST_EN))
 		geni_spi_handle_rx(mas);
@@ -493,39 +597,57 @@
 		geni_spi_handle_tx(mas);
 
 	if (m_irq & M_CMD_DONE_EN) {
-		if (mas->cur_mcmd == CMD_XFER)
+		if (mas->cur_xfer) {
 			spi_finalize_current_transfer(spi);
-		else if (mas->cur_mcmd == CMD_CS)
-			complete(&mas->xfer_done);
-		mas->cur_mcmd = CMD_NONE;
-		/*
-		 * If this happens, then a CMD_DONE came before all the Tx
-		 * buffer bytes were sent out. This is unusual, log this
-		 * condition and disable the WM interrupt to prevent the
-		 * system from stalling due an interrupt storm.
-		 * If this happens when all Rx bytes haven't been received, log
-		 * the condition.
-		 * The only known time this can happen is if bits_per_word != 8
-		 * and some registers that expect xfer lengths in num spi_words
-		 * weren't written correctly.
-		 */
-		if (mas->tx_rem_bytes) {
-			writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
-			dev_err(mas->dev, "Premature done. tx_rem = %d bpw%d\n",
-				mas->tx_rem_bytes, mas->cur_bits_per_word);
+			mas->cur_xfer = NULL;
+			/*
+			 * If this happens, then a CMD_DONE came before all the
+			 * Tx buffer bytes were sent out. This is unusual, log
+			 * this condition and disable the WM interrupt to
+			 * prevent the system from stalling due an interrupt
+			 * storm.
+			 *
+			 * If this happens when all Rx bytes haven't been
+			 * received, log the condition. The only known time
+			 * this can happen is if bits_per_word != 8 and some
+			 * registers that expect xfer lengths in num spi_words
+			 * weren't written correctly.
+			 */
+			if (mas->tx_rem_bytes) {
+				writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+				dev_err(mas->dev, "Premature done. tx_rem = %d bpw%d\n",
+					mas->tx_rem_bytes, mas->cur_bits_per_word);
+			}
+			if (mas->rx_rem_bytes)
+				dev_err(mas->dev, "Premature done. rx_rem = %d bpw%d\n",
+					mas->rx_rem_bytes, mas->cur_bits_per_word);
+		} else {
+			complete(&mas->cs_done);
 		}
-		if (mas->rx_rem_bytes)
-			dev_err(mas->dev, "Premature done. rx_rem = %d bpw%d\n",
-				mas->rx_rem_bytes, mas->cur_bits_per_word);
 	}
 
-	if ((m_irq & M_CMD_CANCEL_EN) || (m_irq & M_CMD_ABORT_EN)) {
-		mas->cur_mcmd = CMD_NONE;
-		complete(&mas->xfer_done);
-	}
+	if (m_irq & M_CMD_CANCEL_EN)
+		complete(&mas->cancel_done);
+	if (m_irq & M_CMD_ABORT_EN)
+		complete(&mas->abort_done);
 
+	/*
+	 * It's safe or a good idea to Ack all of our our interrupts at the
+	 * end of the function. Specifically:
+	 * - M_CMD_DONE_EN / M_RX_FIFO_LAST_EN: Edge triggered interrupts and
+	 *   clearing Acks. Clearing at the end relies on nobody else having
+	 *   started a new transfer yet or else we could be clearing _their_
+	 *   done bit, but everyone grabs the spinlock before starting a new
+	 *   transfer.
+	 * - M_RX_FIFO_WATERMARK_EN / M_TX_FIFO_WATERMARK_EN: These appear
+	 *   to be "latched level" interrupts so it's important to clear them
+	 *   _after_ you've handled the condition and always safe to do so
+	 *   since they'll re-assert if they're still happening.
+	 */
 	writel(m_irq, se->base + SE_GENI_M_IRQ_CLEAR);
-	spin_unlock_irqrestore(&mas->lock, flags);
+
+	spin_unlock(&mas->lock);
+
 	return IRQ_HANDLED;
 }
 
@@ -536,6 +658,7 @@
 	struct spi_geni_master *mas;
 	void __iomem *base;
 	struct clk *clk;
+	struct device *dev = &pdev->dev;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -545,28 +668,34 @@
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-	clk = devm_clk_get(&pdev->dev, "se");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "Err getting SE Core clk %ld\n",
-						PTR_ERR(clk));
+	clk = devm_clk_get(dev, "se");
+	if (IS_ERR(clk))
 		return PTR_ERR(clk);
-	}
 
-	spi = devm_spi_alloc_master(&pdev->dev, sizeof(*mas));
+	spi = devm_spi_alloc_master(dev, sizeof(*mas));
 	if (!spi)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, spi);
 	mas = spi_master_get_devdata(spi);
 	mas->irq = irq;
-	mas->dev = &pdev->dev;
-	mas->se.dev = &pdev->dev;
-	mas->se.wrapper = dev_get_drvdata(pdev->dev.parent);
+	mas->dev = dev;
+	mas->se.dev = dev;
+	mas->se.wrapper = dev_get_drvdata(dev->parent);
 	mas->se.base = base;
 	mas->se.clk = clk;
+	mas->se.opp_table = dev_pm_opp_set_clkname(&pdev->dev, "se");
+	if (IS_ERR(mas->se.opp_table))
+		return PTR_ERR(mas->se.opp_table);
+	/* OPP table is optional */
+	ret = dev_pm_opp_of_add_table(&pdev->dev);
+	if (ret && ret != -ENODEV) {
+		dev_err(&pdev->dev, "invalid OPP table in device tree\n");
+		goto put_clkname;
+	}
 
 	spi->bus_num = -1;
-	spi->dev.of_node = pdev->dev.of_node;
+	spi->dev.of_node = dev->of_node;
 	spi->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_CS_HIGH;
 	spi->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
 	spi->num_chipselect = 4;
@@ -577,16 +706,30 @@
 	spi->handle_err = handle_fifo_timeout;
 	spi->set_cs = spi_geni_set_cs;
 
-	init_completion(&mas->xfer_done);
+	init_completion(&mas->cs_done);
+	init_completion(&mas->cancel_done);
+	init_completion(&mas->abort_done);
 	spin_lock_init(&mas->lock);
-	pm_runtime_enable(&pdev->dev);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 250);
+	pm_runtime_enable(dev);
+
+	ret = geni_icc_get(&mas->se, NULL);
+	if (ret)
+		goto spi_geni_probe_runtime_disable;
+	/* Set the bus quota to a reasonable value for register access */
+	mas->se.icc_paths[GENI_TO_CORE].avg_bw = Bps_to_icc(CORE_2X_50_MHZ);
+	mas->se.icc_paths[CPU_TO_GENI].avg_bw = GENI_DEFAULT_BW;
+
+	ret = geni_icc_set_bw(&mas->se);
+	if (ret)
+		goto spi_geni_probe_runtime_disable;
 
 	ret = spi_geni_init(mas);
 	if (ret)
 		goto spi_geni_probe_runtime_disable;
 
-	ret = request_irq(mas->irq, geni_spi_isr,
-			IRQF_TRIGGER_HIGH, "spi_geni", spi);
+	ret = request_irq(mas->irq, geni_spi_isr, 0, dev_name(dev), spi);
 	if (ret)
 		goto spi_geni_probe_runtime_disable;
 
@@ -598,7 +741,10 @@
 spi_geni_probe_free_irq:
 	free_irq(mas->irq, spi);
 spi_geni_probe_runtime_disable:
-	pm_runtime_disable(&pdev->dev);
+	pm_runtime_disable(dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
+put_clkname:
+	dev_pm_opp_put_clkname(mas->se.opp_table);
 	return ret;
 }
 
@@ -612,6 +758,8 @@
 
 	free_irq(mas->irq, spi);
 	pm_runtime_disable(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_put_clkname(mas->se.opp_table);
 	return 0;
 }
 
@@ -619,16 +767,33 @@
 {
 	struct spi_master *spi = dev_get_drvdata(dev);
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	int ret;
 
-	return geni_se_resources_off(&mas->se);
+	/* Drop the performance state vote */
+	dev_pm_opp_set_rate(dev, 0);
+
+	ret = geni_se_resources_off(&mas->se);
+	if (ret)
+		return ret;
+
+	return geni_icc_disable(&mas->se);
 }
 
 static int __maybe_unused spi_geni_runtime_resume(struct device *dev)
 {
 	struct spi_master *spi = dev_get_drvdata(dev);
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
+	int ret;
 
-	return geni_se_resources_on(&mas->se);
+	ret = geni_icc_enable(&mas->se);
+	if (ret)
+		return ret;
+
+	ret = geni_se_resources_on(&mas->se);
+	if (ret)
+		return ret;
+
+	return dev_pm_opp_set_rate(mas->dev, mas->cur_sclk_hz);
 }
 
 static int __maybe_unused spi_geni_suspend(struct device *dev)
diff --git a/drivers/spi/spi-gpio.c b/drivers/spi/spi-gpio.c
index e7dc1fa..0584f4d 100644
--- a/drivers/spi/spi-gpio.c
+++ b/drivers/spi/spi-gpio.c
@@ -357,15 +357,12 @@
 	struct spi_gpio			*spi_gpio;
 	struct device			*dev = &pdev->dev;
 	struct spi_bitbang		*bb;
-	const struct of_device_id	*of_id;
-
-	of_id = of_match_device(spi_gpio_dt_ids, &pdev->dev);
 
 	master = devm_spi_alloc_master(dev, sizeof(*spi_gpio));
 	if (!master)
 		return -ENOMEM;
 
-	if (of_id)
+	if (pdev->dev.of_node)
 		status = spi_gpio_probe_dt(pdev, master);
 	else
 		status = spi_gpio_probe_pdata(pdev, master);
diff --git a/drivers/spi/spi-hisi-sfc-v3xx.c b/drivers/spi/spi-hisi-sfc-v3xx.c
new file mode 100644
index 0000000..4650b48
--- /dev/null
+++ b/drivers/spi/spi-hisi-sfc-v3xx.c
@@ -0,0 +1,524 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// HiSilicon SPI NOR V3XX Flash Controller Driver for hi16xx chipsets
+//
+// Copyright (c) 2019 HiSilicon Technologies Co., Ltd.
+// Author: John Garry <john.garry@huawei.com>
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/dmi.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#define HISI_SFC_V3XX_VERSION (0x1f8)
+
+#define HISI_SFC_V3XX_RAW_INT_STAT (0x120)
+#define HISI_SFC_V3XX_INT_STAT (0x124)
+#define HISI_SFC_V3XX_INT_MASK (0x128)
+#define HISI_SFC_V3XX_INT_CLR (0x12c)
+#define HISI_SFC_V3XX_CMD_CFG (0x300)
+#define HISI_SFC_V3XX_CMD_CFG_DATA_CNT_OFF 9
+#define HISI_SFC_V3XX_CMD_CFG_RW_MSK BIT(8)
+#define HISI_SFC_V3XX_CMD_CFG_DATA_EN_MSK BIT(7)
+#define HISI_SFC_V3XX_CMD_CFG_DUMMY_CNT_OFF 4
+#define HISI_SFC_V3XX_CMD_CFG_ADDR_EN_MSK BIT(3)
+#define HISI_SFC_V3XX_CMD_CFG_CS_SEL_OFF 1
+#define HISI_SFC_V3XX_CMD_CFG_START_MSK BIT(0)
+#define HISI_SFC_V3XX_CMD_INS (0x308)
+#define HISI_SFC_V3XX_CMD_ADDR (0x30c)
+#define HISI_SFC_V3XX_CMD_DATABUF0 (0x400)
+
+/* Common definition of interrupt bit masks */
+#define HISI_SFC_V3XX_INT_MASK_ALL (0x1ff)	/* all the masks */
+#define HISI_SFC_V3XX_INT_MASK_CPLT BIT(0)	/* command execution complete */
+#define HISI_SFC_V3XX_INT_MASK_PP_ERR BIT(2)	/* page progrom error */
+#define HISI_SFC_V3XX_INT_MASK_IACCES BIT(5)	/* error visiting inaccessible/
+						 * protected address
+						 */
+
+/* IO Mode definition in HISI_SFC_V3XX_CMD_CFG */
+#define HISI_SFC_V3XX_STD (0 << 17)
+#define HISI_SFC_V3XX_DIDO (1 << 17)
+#define HISI_SFC_V3XX_DIO (2 << 17)
+#define HISI_SFC_V3XX_FULL_DIO (3 << 17)
+#define HISI_SFC_V3XX_QIQO (5 << 17)
+#define HISI_SFC_V3XX_QIO (6 << 17)
+#define HISI_SFC_V3XX_FULL_QIO (7 << 17)
+
+/*
+ * The IO modes lookup table. hisi_sfc_v3xx_io_modes[(z - 1) / 2][y / 2][x / 2]
+ * stands for x-y-z mode, as described in SFDP terminology. -EIO indicates
+ * an invalid mode.
+ */
+static const int hisi_sfc_v3xx_io_modes[2][3][3] = {
+	{
+		{ HISI_SFC_V3XX_DIDO, HISI_SFC_V3XX_DIDO, HISI_SFC_V3XX_DIDO },
+		{ HISI_SFC_V3XX_DIO, HISI_SFC_V3XX_FULL_DIO, -EIO },
+		{ -EIO, -EIO, -EIO },
+	},
+	{
+		{ HISI_SFC_V3XX_QIQO, HISI_SFC_V3XX_QIQO, HISI_SFC_V3XX_QIQO },
+		{ -EIO, -EIO, -EIO },
+		{ HISI_SFC_V3XX_QIO, -EIO, HISI_SFC_V3XX_FULL_QIO },
+	},
+};
+
+struct hisi_sfc_v3xx_host {
+	struct device *dev;
+	void __iomem *regbase;
+	int max_cmd_dword;
+	struct completion *completion;
+	int irq;
+};
+
+static void hisi_sfc_v3xx_disable_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(0, host->regbase + HISI_SFC_V3XX_INT_MASK);
+}
+
+static void hisi_sfc_v3xx_enable_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(HISI_SFC_V3XX_INT_MASK_ALL, host->regbase + HISI_SFC_V3XX_INT_MASK);
+}
+
+static void hisi_sfc_v3xx_clear_int(struct hisi_sfc_v3xx_host *host)
+{
+	writel(HISI_SFC_V3XX_INT_MASK_ALL, host->regbase + HISI_SFC_V3XX_INT_CLR);
+}
+
+/*
+ * The interrupt status register indicates whether an error occurs
+ * after per operation. Check it, and clear the interrupts for
+ * next time judgement.
+ */
+static int hisi_sfc_v3xx_handle_completion(struct hisi_sfc_v3xx_host *host)
+{
+	u32 reg;
+
+	reg = readl(host->regbase + HISI_SFC_V3XX_RAW_INT_STAT);
+	hisi_sfc_v3xx_clear_int(host);
+
+	if (reg & HISI_SFC_V3XX_INT_MASK_IACCES) {
+		dev_err(host->dev, "fail to access protected address\n");
+		return -EIO;
+	}
+
+	if (reg & HISI_SFC_V3XX_INT_MASK_PP_ERR) {
+		dev_err(host->dev, "page program operation failed\n");
+		return -EIO;
+	}
+
+	/*
+	 * The other bits of the interrupt registers is not currently
+	 * used and probably not be triggered in this driver. When it
+	 * happens, we regard it as an unsupported error here.
+	 */
+	if (!(reg & HISI_SFC_V3XX_INT_MASK_CPLT)) {
+		dev_err(host->dev, "unsupported error occurred, status=0x%x\n", reg);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+#define HISI_SFC_V3XX_WAIT_TIMEOUT_US		1000000
+#define HISI_SFC_V3XX_WAIT_POLL_INTERVAL_US	10
+
+static int hisi_sfc_v3xx_wait_cmd_idle(struct hisi_sfc_v3xx_host *host)
+{
+	u32 reg;
+
+	return readl_poll_timeout(host->regbase + HISI_SFC_V3XX_CMD_CFG, reg,
+				  !(reg & HISI_SFC_V3XX_CMD_CFG_START_MSK),
+				  HISI_SFC_V3XX_WAIT_POLL_INTERVAL_US,
+				  HISI_SFC_V3XX_WAIT_TIMEOUT_US);
+}
+
+static int hisi_sfc_v3xx_adjust_op_size(struct spi_mem *mem,
+					struct spi_mem_op *op)
+{
+	struct spi_device *spi = mem->spi;
+	struct hisi_sfc_v3xx_host *host;
+	uintptr_t addr = (uintptr_t)op->data.buf.in;
+	int max_byte_count;
+
+	host = spi_controller_get_devdata(spi->master);
+
+	max_byte_count = host->max_cmd_dword * 4;
+
+	if (!IS_ALIGNED(addr, 4) && op->data.nbytes >= 4)
+		op->data.nbytes = 4 - (addr % 4);
+	else if (op->data.nbytes > max_byte_count)
+		op->data.nbytes = max_byte_count;
+
+	return 0;
+}
+
+/*
+ * The controller only supports Standard SPI mode, Duall mode and
+ * Quad mode. Double sanitize the ops here to avoid OOB access.
+ */
+static bool hisi_sfc_v3xx_supports_op(struct spi_mem *mem,
+				      const struct spi_mem_op *op)
+{
+	if (op->data.buswidth > 4 || op->dummy.buswidth > 4 ||
+	    op->addr.buswidth > 4 || op->cmd.buswidth > 4)
+		return false;
+
+	return spi_mem_default_supports_op(mem, op);
+}
+
+/*
+ * memcpy_{to,from}io doesn't gurantee 32b accesses - which we require for the
+ * DATABUF registers -so use __io{read,write}32_copy when possible. For
+ * trailing bytes, copy them byte-by-byte from the DATABUF register, as we
+ * can't clobber outside the source/dest buffer.
+ *
+ * For efficient data read/write, we try to put any start 32b unaligned data
+ * into a separate transaction in hisi_sfc_v3xx_adjust_op_size().
+ */
+static void hisi_sfc_v3xx_read_databuf(struct hisi_sfc_v3xx_host *host,
+				       u8 *to, unsigned int len)
+{
+	void __iomem *from;
+	int i;
+
+	from = host->regbase + HISI_SFC_V3XX_CMD_DATABUF0;
+
+	if (IS_ALIGNED((uintptr_t)to, 4)) {
+		int words = len / 4;
+
+		__ioread32_copy(to, from, words);
+
+		len -= words * 4;
+		if (len) {
+			u32 val;
+
+			to += words * 4;
+			from += words * 4;
+
+			val = __raw_readl(from);
+
+			for (i = 0; i < len; i++, val >>= 8, to++)
+				*to = (u8)val;
+		}
+	} else {
+		for (i = 0; i < DIV_ROUND_UP(len, 4); i++, from += 4) {
+			u32 val = __raw_readl(from);
+			int j;
+
+			for (j = 0; j < 4 && (j + (i * 4) < len);
+			     to++, val >>= 8, j++)
+				*to = (u8)val;
+		}
+	}
+}
+
+static void hisi_sfc_v3xx_write_databuf(struct hisi_sfc_v3xx_host *host,
+					const u8 *from, unsigned int len)
+{
+	void __iomem *to;
+	int i;
+
+	to = host->regbase + HISI_SFC_V3XX_CMD_DATABUF0;
+
+	if (IS_ALIGNED((uintptr_t)from, 4)) {
+		int words = len / 4;
+
+		__iowrite32_copy(to, from, words);
+
+		len -= words * 4;
+		if (len) {
+			u32 val = 0;
+
+			to += words * 4;
+			from += words * 4;
+
+			for (i = 0; i < len; i++, from++)
+				val |= *from << i * 8;
+			__raw_writel(val, to);
+		}
+
+	} else {
+		for (i = 0; i < DIV_ROUND_UP(len, 4); i++, to += 4) {
+			u32 val = 0;
+			int j;
+
+			for (j = 0; j < 4 && (j + (i * 4) < len);
+			     from++, j++)
+				val |= *from << j * 8;
+			__raw_writel(val, to);
+		}
+	}
+}
+
+static int hisi_sfc_v3xx_start_bus(struct hisi_sfc_v3xx_host *host,
+				   const struct spi_mem_op *op,
+				   u8 chip_select)
+{
+	int len = op->data.nbytes, buswidth_mode;
+	u32 config = 0;
+
+	if (op->addr.nbytes)
+		config |= HISI_SFC_V3XX_CMD_CFG_ADDR_EN_MSK;
+
+	if (op->data.buswidth == 0 || op->data.buswidth == 1) {
+		buswidth_mode = HISI_SFC_V3XX_STD;
+	} else {
+		int data_idx, addr_idx, cmd_idx;
+
+		data_idx = (op->data.buswidth - 1) / 2;
+		addr_idx = op->addr.buswidth / 2;
+		cmd_idx = op->cmd.buswidth / 2;
+		buswidth_mode = hisi_sfc_v3xx_io_modes[data_idx][addr_idx][cmd_idx];
+	}
+	if (buswidth_mode < 0)
+		return buswidth_mode;
+	config |= buswidth_mode;
+
+	if (op->data.dir != SPI_MEM_NO_DATA) {
+		config |= (len - 1) << HISI_SFC_V3XX_CMD_CFG_DATA_CNT_OFF;
+		config |= HISI_SFC_V3XX_CMD_CFG_DATA_EN_MSK;
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_IN)
+		config |= HISI_SFC_V3XX_CMD_CFG_RW_MSK;
+
+	config |= op->dummy.nbytes << HISI_SFC_V3XX_CMD_CFG_DUMMY_CNT_OFF |
+		  chip_select << HISI_SFC_V3XX_CMD_CFG_CS_SEL_OFF |
+		  HISI_SFC_V3XX_CMD_CFG_START_MSK;
+
+	writel(op->addr.val, host->regbase + HISI_SFC_V3XX_CMD_ADDR);
+	writel(op->cmd.opcode, host->regbase + HISI_SFC_V3XX_CMD_INS);
+
+	writel(config, host->regbase + HISI_SFC_V3XX_CMD_CFG);
+
+	return 0;
+}
+
+static int hisi_sfc_v3xx_generic_exec_op(struct hisi_sfc_v3xx_host *host,
+					 const struct spi_mem_op *op,
+					 u8 chip_select)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+	int ret;
+
+	if (host->irq) {
+		host->completion = &done;
+		hisi_sfc_v3xx_enable_int(host);
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		hisi_sfc_v3xx_write_databuf(host, op->data.buf.out, op->data.nbytes);
+
+	ret = hisi_sfc_v3xx_start_bus(host, op, chip_select);
+	if (ret)
+		return ret;
+
+	if (host->irq) {
+		ret = wait_for_completion_timeout(host->completion,
+						  usecs_to_jiffies(HISI_SFC_V3XX_WAIT_TIMEOUT_US));
+		if (!ret)
+			ret = -ETIMEDOUT;
+		else
+			ret = 0;
+
+		hisi_sfc_v3xx_disable_int(host);
+		host->completion = NULL;
+	} else {
+		ret = hisi_sfc_v3xx_wait_cmd_idle(host);
+	}
+	if (hisi_sfc_v3xx_handle_completion(host) || ret)
+		return -EIO;
+
+	if (op->data.dir == SPI_MEM_DATA_IN)
+		hisi_sfc_v3xx_read_databuf(host, op->data.buf.in, op->data.nbytes);
+
+	return 0;
+}
+
+static int hisi_sfc_v3xx_exec_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct hisi_sfc_v3xx_host *host;
+	struct spi_device *spi = mem->spi;
+	u8 chip_select = spi->chip_select;
+
+	host = spi_controller_get_devdata(spi->master);
+
+	return hisi_sfc_v3xx_generic_exec_op(host, op, chip_select);
+}
+
+static const struct spi_controller_mem_ops hisi_sfc_v3xx_mem_ops = {
+	.adjust_op_size = hisi_sfc_v3xx_adjust_op_size,
+	.supports_op = hisi_sfc_v3xx_supports_op,
+	.exec_op = hisi_sfc_v3xx_exec_op,
+};
+
+static irqreturn_t hisi_sfc_v3xx_isr(int irq, void *data)
+{
+	struct hisi_sfc_v3xx_host *host = data;
+
+	hisi_sfc_v3xx_disable_int(host);
+
+	complete(host->completion);
+
+	return IRQ_HANDLED;
+}
+
+static int hisi_sfc_v3xx_buswidth_override_bits;
+
+/*
+ * ACPI FW does not allow us to currently set the device buswidth, so quirk it
+ * depending on the board.
+ */
+static int __init hisi_sfc_v3xx_dmi_quirk(const struct dmi_system_id *d)
+{
+	hisi_sfc_v3xx_buswidth_override_bits = SPI_RX_QUAD | SPI_TX_QUAD;
+
+	return 0;
+}
+
+static const struct dmi_system_id hisi_sfc_v3xx_dmi_quirk_table[]  = {
+	{
+	.callback = hisi_sfc_v3xx_dmi_quirk,
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Huawei"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "D06"),
+	},
+	},
+	{
+	.callback = hisi_sfc_v3xx_dmi_quirk,
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Huawei"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "TaiShan 2280 V2"),
+	},
+	},
+	{
+	.callback = hisi_sfc_v3xx_dmi_quirk,
+	.matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Huawei"),
+		DMI_MATCH(DMI_PRODUCT_NAME, "TaiShan 200 (Model 2280)"),
+	},
+	},
+	{}
+};
+
+static int hisi_sfc_v3xx_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct hisi_sfc_v3xx_host *host;
+	struct spi_controller *ctlr;
+	u32 version;
+	int ret;
+
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*host));
+	if (!ctlr)
+		return -ENOMEM;
+
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD |
+			  SPI_TX_DUAL | SPI_TX_QUAD;
+
+	ctlr->buswidth_override_bits = hisi_sfc_v3xx_buswidth_override_bits;
+
+	host = spi_controller_get_devdata(ctlr);
+	host->dev = dev;
+
+	platform_set_drvdata(pdev, host);
+
+	host->regbase = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(host->regbase)) {
+		ret = PTR_ERR(host->regbase);
+		goto err_put_master;
+	}
+
+	host->irq = platform_get_irq_optional(pdev, 0);
+	if (host->irq == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto err_put_master;
+	}
+
+	hisi_sfc_v3xx_disable_int(host);
+
+	if (host->irq > 0) {
+		ret = devm_request_irq(dev, host->irq, hisi_sfc_v3xx_isr, 0,
+				       "hisi-sfc-v3xx", host);
+
+		if (ret) {
+			dev_err(dev, "failed to request irq%d, ret = %d\n", host->irq, ret);
+			host->irq = 0;
+		}
+	} else {
+		host->irq = 0;
+	}
+
+	ctlr->bus_num = -1;
+	ctlr->num_chipselect = 1;
+	ctlr->mem_ops = &hisi_sfc_v3xx_mem_ops;
+
+	version = readl(host->regbase + HISI_SFC_V3XX_VERSION);
+
+	switch (version) {
+	case 0x351:
+		host->max_cmd_dword = 64;
+		break;
+	default:
+		host->max_cmd_dword = 16;
+		break;
+	}
+
+	ret = devm_spi_register_controller(dev, ctlr);
+	if (ret)
+		goto err_put_master;
+
+	dev_info(&pdev->dev, "hw version 0x%x, %s mode.\n",
+		 version, host->irq ? "irq" : "polling");
+
+	return 0;
+
+err_put_master:
+	spi_master_put(ctlr);
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hisi_sfc_v3xx_acpi_ids[] = {
+	{"HISI0341", 0},
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, hisi_sfc_v3xx_acpi_ids);
+#endif
+
+static struct platform_driver hisi_sfc_v3xx_spi_driver = {
+	.driver = {
+		.name	= "hisi-sfc-v3xx",
+		.acpi_match_table = ACPI_PTR(hisi_sfc_v3xx_acpi_ids),
+	},
+	.probe	= hisi_sfc_v3xx_probe,
+};
+
+static int __init hisi_sfc_v3xx_spi_init(void)
+{
+	dmi_check_system(hisi_sfc_v3xx_dmi_quirk_table);
+
+	return platform_driver_register(&hisi_sfc_v3xx_spi_driver);
+}
+
+static void __exit hisi_sfc_v3xx_spi_exit(void)
+{
+	platform_driver_unregister(&hisi_sfc_v3xx_spi_driver);
+}
+
+module_init(hisi_sfc_v3xx_spi_init);
+module_exit(hisi_sfc_v3xx_spi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("John Garry <john.garry@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon SPI NOR V3XX Flash Controller Driver for hi16xx chipsets");
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index e9ef809..5f05d51 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -9,7 +9,6 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -102,10 +101,6 @@
 	bool rx_dma_busy;
 };
 
-struct img_spfi_device_data {
-	bool gpio_requested;
-};
-
 static inline u32 spfi_readl(struct img_spfi *spfi, u32 reg)
 {
 	return readl(spfi->regs + reg);
@@ -442,54 +437,6 @@
 	return 0;
 }
 
-static int img_spfi_setup(struct spi_device *spi)
-{
-	int ret = -EINVAL;
-	struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi);
-
-	if (!spfi_data) {
-		spfi_data = kzalloc(sizeof(*spfi_data), GFP_KERNEL);
-		if (!spfi_data)
-			return -ENOMEM;
-		spfi_data->gpio_requested = false;
-		spi_set_ctldata(spi, spfi_data);
-	}
-	if (!spfi_data->gpio_requested) {
-		ret = gpio_request_one(spi->cs_gpio,
-				       (spi->mode & SPI_CS_HIGH) ?
-				       GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH,
-				       dev_name(&spi->dev));
-		if (ret)
-			dev_err(&spi->dev, "can't request chipselect gpio %d\n",
-				spi->cs_gpio);
-		else
-			spfi_data->gpio_requested = true;
-	} else {
-		if (gpio_is_valid(spi->cs_gpio)) {
-			int mode = ((spi->mode & SPI_CS_HIGH) ?
-				    GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH);
-
-			ret = gpio_direction_output(spi->cs_gpio, mode);
-			if (ret)
-				dev_err(&spi->dev, "chipselect gpio %d setup failed (%d)\n",
-					spi->cs_gpio, ret);
-		}
-	}
-	return ret;
-}
-
-static void img_spfi_cleanup(struct spi_device *spi)
-{
-	struct img_spfi_device_data *spfi_data = spi_get_ctldata(spi);
-
-	if (spfi_data) {
-		if (spfi_data->gpio_requested)
-			gpio_free(spi->cs_gpio);
-		kfree(spfi_data);
-		spi_set_ctldata(spi, NULL);
-	}
-}
-
 static void img_spfi_config(struct spi_master *master, struct spi_device *spi,
 			    struct spi_transfer *xfer)
 {
@@ -659,15 +606,28 @@
 			master->max_speed_hz = max_speed_hz;
 	}
 
-	master->setup = img_spfi_setup;
-	master->cleanup = img_spfi_cleanup;
 	master->transfer_one = img_spfi_transfer_one;
 	master->prepare_message = img_spfi_prepare;
 	master->unprepare_message = img_spfi_unprepare;
 	master->handle_err = img_spfi_handle_err;
+	master->use_gpio_descriptors = true;
 
-	spfi->tx_ch = dma_request_slave_channel(spfi->dev, "tx");
-	spfi->rx_ch = dma_request_slave_channel(spfi->dev, "rx");
+	spfi->tx_ch = dma_request_chan(spfi->dev, "tx");
+	if (IS_ERR(spfi->tx_ch)) {
+		ret = PTR_ERR(spfi->tx_ch);
+		spfi->tx_ch = NULL;
+		if (ret == -EPROBE_DEFER)
+			goto disable_pm;
+	}
+
+	spfi->rx_ch = dma_request_chan(spfi->dev, "rx");
+	if (IS_ERR(spfi->rx_ch)) {
+		ret = PTR_ERR(spfi->rx_ch);
+		spfi->rx_ch = NULL;
+		if (ret == -EPROBE_DEFER)
+			goto disable_pm;
+	}
+
 	if (!spfi->tx_ch || !spfi->rx_ch) {
 		if (spfi->tx_ch)
 			dma_release_channel(spfi->tx_ch);
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index 474d5a7..0e3bc0b 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -8,23 +8,23 @@
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/slab.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/property.h>
 
 #include <linux/platform_data/dma-imx.h>
-#include <linux/platform_data/spi-imx.h>
 
 #define DRIVER_NAME "spi_imx"
 
@@ -32,6 +32,8 @@
 module_param(use_dma, bool, 0644);
 MODULE_PARM_DESC(use_dma, "Enable usage of DMA when available (default)");
 
+#define MXC_RPM_TIMEOUT		2000 /* 2000ms */
+
 #define MXC_CSPIRXDATA		0x00
 #define MXC_CSPITXDATA		0x04
 #define MXC_CSPICTRL		0x08
@@ -70,6 +72,7 @@
 	void (*reset)(struct spi_imx_data *);
 	void (*setup_wml)(struct spi_imx_data *);
 	void (*disable)(struct spi_imx_data *);
+	void (*disable_dma)(struct spi_imx_data *);
 	bool has_dmamode;
 	bool has_slavemode;
 	unsigned int fifo_size;
@@ -222,7 +225,7 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 
-	if (!use_dma)
+	if (!use_dma || master->fallback)
 		return false;
 
 	if (!master->dma_rx)
@@ -484,6 +487,11 @@
 	writel(reg, spi_imx->base + MX51_ECSPI_CTRL);
 }
 
+static void mx51_disable_dma(struct spi_imx_data *spi_imx)
+{
+	writel(0, spi_imx->base + MX51_ECSPI_DMA);
+}
+
 static void mx51_ecspi_disable(struct spi_imx_data *spi_imx)
 {
 	u32 ctrl;
@@ -728,7 +736,7 @@
 		reg |= MX31_CSPICTRL_POL;
 	if (spi->mode & SPI_CS_HIGH)
 		reg |= MX31_CSPICTRL_SSPOL;
-	if (!gpio_is_valid(spi->cs_gpio))
+	if (!spi->cs_gpiod)
 		reg |= (spi->chip_select) <<
 			(is_imx35_cspi(spi_imx) ? MX35_CSPICTRL_CS_SHIFT :
 						  MX31_CSPICTRL_CS_SHIFT);
@@ -828,7 +836,7 @@
 		reg |= MX21_CSPICTRL_POL;
 	if (spi->mode & SPI_CS_HIGH)
 		reg |= MX21_CSPICTRL_SSPOL;
-	if (!gpio_is_valid(spi->cs_gpio))
+	if (!spi->cs_gpiod)
 		reg |= spi->chip_select << MX21_CSPICTRL_CS_SHIFT;
 
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
@@ -996,6 +1004,7 @@
 	.rx_available = mx51_ecspi_rx_available,
 	.reset = mx51_ecspi_reset,
 	.setup_wml = mx51_setup_wml,
+	.disable_dma = mx51_disable_dma,
 	.fifo_size = 64,
 	.has_dmamode = true,
 	.dynamic_burst = true,
@@ -1010,6 +1019,7 @@
 	.prepare_transfer = mx51_ecspi_prepare_transfer,
 	.trigger = mx51_ecspi_trigger,
 	.rx_available = mx51_ecspi_rx_available,
+	.disable_dma = mx51_disable_dma,
 	.reset = mx51_ecspi_reset,
 	.fifo_size = 64,
 	.has_dmamode = true,
@@ -1057,20 +1067,6 @@
 };
 MODULE_DEVICE_TABLE(of, spi_imx_dt_ids);
 
-static void spi_imx_chipselect(struct spi_device *spi, int is_active)
-{
-	int active = is_active != BITBANG_CS_INACTIVE;
-	int dev_is_lowactive = !(spi->mode & SPI_CS_HIGH);
-
-	if (spi->mode & SPI_NO_CS)
-		return;
-
-	if (!gpio_is_valid(spi->cs_gpio))
-		return;
-
-	gpio_set_value(spi->cs_gpio, dev_is_lowactive ^ active);
-}
-
 static void spi_imx_set_burst_len(struct spi_imx_data *spi_imx, int n_bits)
 {
 	u32 ctrl;
@@ -1249,9 +1245,9 @@
 	}
 
 	if (spi_imx_can_dma(spi_imx->bitbang.master, spi, t))
-		spi_imx->usedma = 1;
+		spi_imx->usedma = true;
 	else
-		spi_imx->usedma = 0;
+		spi_imx->usedma = false;
 
 	if (is_imx53_ecspi(spi_imx) && spi_imx->slave_mode) {
 		spi_imx->rx = mx53_ecspi_rx_slave;
@@ -1291,7 +1287,7 @@
 	spi_imx->wml = spi_imx->devtype_data->fifo_size / 2;
 
 	/* Prepare for TX DMA: */
-	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	master->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(master->dma_tx)) {
 		ret = PTR_ERR(master->dma_tx);
 		dev_dbg(dev, "can't get the TX DMA channel, error %d!\n", ret);
@@ -1300,7 +1296,7 @@
 	}
 
 	/* Prepare for RX : */
-	master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	master->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(master->dma_rx)) {
 		ret = PTR_ERR(master->dma_rx);
 		dev_dbg(dev, "can't get the RX DMA channel, error %d\n", ret);
@@ -1375,11 +1371,12 @@
 
 	ret = spi_imx_dma_configure(master);
 	if (ret)
-		return ret;
+		goto dma_failure_no_start;
 
 	if (!spi_imx->devtype_data->setup_wml) {
 		dev_err(spi_imx->dev, "No setup_wml()?\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto dma_failure_no_start;
 	}
 	spi_imx->devtype_data->setup_wml(spi_imx);
 
@@ -1390,8 +1387,10 @@
 	desc_rx = dmaengine_prep_slave_sg(master->dma_rx,
 				rx->sgl, rx->nents, DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-	if (!desc_rx)
-		return -EINVAL;
+	if (!desc_rx) {
+		ret = -EINVAL;
+		goto dma_failure_no_start;
+	}
 
 	desc_rx->callback = spi_imx_dma_rx_callback;
 	desc_rx->callback_param = (void *)spi_imx;
@@ -1404,6 +1403,7 @@
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc_tx) {
 		dmaengine_terminate_all(master->dma_tx);
+		dmaengine_terminate_all(master->dma_rx);
 		return -EINVAL;
 	}
 
@@ -1435,6 +1435,10 @@
 	}
 
 	return transfer->len;
+/* fallback to pio */
+dma_failure_no_start:
+	transfer->error |= SPI_TRANS_FAIL_NO_START;
+	return ret;
 }
 
 static int spi_imx_pio_transfer(struct spi_device *spi,
@@ -1518,6 +1522,8 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
 
+	transfer->effective_speed_hz = spi_imx->spi_bus_clk;
+
 	/* flush rxfifo before transfer */
 	while (spi_imx->devtype_data->rx_available(spi_imx))
 		readl(spi_imx->base + MXC_CSPIRXDATA);
@@ -1527,8 +1533,8 @@
 
 	if (spi_imx->usedma)
 		return spi_imx_dma_transfer(spi_imx, transfer);
-	else
-		return spi_imx_pio_transfer(spi, transfer);
+
+	return spi_imx_pio_transfer(spi, transfer);
 }
 
 static int spi_imx_setup(struct spi_device *spi)
@@ -1536,15 +1542,6 @@
 	dev_dbg(&spi->dev, "%s: mode %d, %u bpw, %d hz\n", __func__,
 		 spi->mode, spi->bits_per_word, spi->max_speed_hz);
 
-	if (spi->mode & SPI_NO_CS)
-		return 0;
-
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_direction_output(spi->cs_gpio,
-				      spi->mode & SPI_CS_HIGH ? 0 : 1);
-
-	spi_imx_chipselect(spi, BITBANG_CS_INACTIVE);
-
 	return 0;
 }
 
@@ -1558,20 +1555,17 @@
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 	int ret;
 
-	ret = clk_enable(spi_imx->clk_per);
-	if (ret)
-		return ret;
-
-	ret = clk_enable(spi_imx->clk_ipg);
-	if (ret) {
-		clk_disable(spi_imx->clk_per);
+	ret = pm_runtime_get_sync(spi_imx->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(spi_imx->dev);
+		dev_err(spi_imx->dev, "failed to enable clock\n");
 		return ret;
 	}
 
 	ret = spi_imx->devtype_data->prepare_message(spi_imx, msg);
 	if (ret) {
-		clk_disable(spi_imx->clk_ipg);
-		clk_disable(spi_imx->clk_per);
+		pm_runtime_mark_last_busy(spi_imx->dev);
+		pm_runtime_put_autosuspend(spi_imx->dev);
 	}
 
 	return ret;
@@ -1582,8 +1576,8 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(master);
 
-	clk_disable(spi_imx->clk_ipg);
-	clk_disable(spi_imx->clk_per);
+	pm_runtime_mark_last_busy(spi_imx->dev);
+	pm_runtime_put_autosuspend(spi_imx->dev);
 	return 0;
 }
 
@@ -1602,20 +1596,14 @@
 	struct device_node *np = pdev->dev.of_node;
 	const struct of_device_id *of_id =
 			of_match_device(spi_imx_dt_ids, &pdev->dev);
-	struct spi_imx_master *mxc_platform_info =
-			dev_get_platdata(&pdev->dev);
 	struct spi_master *master;
 	struct spi_imx_data *spi_imx;
 	struct resource *res;
-	int i, ret, irq, spi_drctl;
+	int ret, irq, spi_drctl;
 	const struct spi_imx_devtype_data *devtype_data = of_id ? of_id->data :
 		(struct spi_imx_devtype_data *)pdev->id_entry->driver_data;
 	bool slave_mode;
-
-	if (!np && !mxc_platform_info) {
-		dev_err(&pdev->dev, "can't get the platform data\n");
-		return -EINVAL;
-	}
+	u32 val;
 
 	slave_mode = devtype_data->has_slavemode &&
 			of_property_read_bool(np, "spi-slave");
@@ -1638,6 +1626,7 @@
 
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(1, 32);
 	master->bus_num = np ? -1 : pdev->id;
+	master->use_gpio_descriptors = true;
 
 	spi_imx = spi_master_get_devdata(master);
 	spi_imx->bitbang.master = master;
@@ -1646,28 +1635,17 @@
 
 	spi_imx->devtype_data = devtype_data;
 
-	/* Get number of chip selects, either platform data or OF */
-	if (mxc_platform_info) {
-		master->num_chipselect = mxc_platform_info->num_chipselect;
-		if (mxc_platform_info->chipselect) {
-			master->cs_gpios = devm_kcalloc(&master->dev,
-				master->num_chipselect, sizeof(int),
-				GFP_KERNEL);
-			if (!master->cs_gpios)
-				return -ENOMEM;
+	/*
+	 * Get number of chip selects from device properties. This can be
+	 * coming from device tree or boardfiles, if it is not defined,
+	 * a default value of 3 chip selects will be used, as all the legacy
+	 * board files have <= 3 chip selects.
+	 */
+	if (!device_property_read_u32(&pdev->dev, "num-cs", &val))
+		master->num_chipselect = val;
+	else
+		master->num_chipselect = 3;
 
-			for (i = 0; i < master->num_chipselect; i++)
-				master->cs_gpios[i] = mxc_platform_info->chipselect[i];
-		}
-	} else {
-		u32 num_cs;
-
-		if (!of_property_read_u32(np, "num-cs", &num_cs))
-			master->num_chipselect = num_cs;
-		/* If not preset, default value of 1 is used */
-	}
-
-	spi_imx->bitbang.chipselect = spi_imx_chipselect;
 	spi_imx->bitbang.setup_transfer = spi_imx_setupxfer;
 	spi_imx->bitbang.txrx_bufs = spi_imx_transfer;
 	spi_imx->bitbang.master->setup = spi_imx_setup;
@@ -1726,6 +1704,12 @@
 	if (ret)
 		goto out_put_per;
 
+	pm_runtime_set_autosuspend_delay(spi_imx->dev, MXC_RPM_TIMEOUT);
+	pm_runtime_use_autosuspend(spi_imx->dev);
+	pm_runtime_get_noresume(spi_imx->dev);
+	pm_runtime_set_active(spi_imx->dev);
+	pm_runtime_enable(spi_imx->dev);
+
 	spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per);
 	/*
 	 * Only validated on i.mx35 and i.mx6 now, can remove the constraint
@@ -1734,10 +1718,10 @@
 	if (spi_imx->devtype_data->has_dmamode) {
 		ret = spi_imx_sdma_init(&pdev->dev, spi_imx, master);
 		if (ret == -EPROBE_DEFER)
-			goto out_clk_put;
+			goto out_runtime_pm_put;
 
 		if (ret < 0)
-			dev_err(&pdev->dev, "dma setup error %d, use pio\n",
+			dev_dbg(&pdev->dev, "dma setup error %d, use pio\n",
 				ret);
 	}
 
@@ -1748,36 +1732,23 @@
 	master->dev.of_node = pdev->dev.of_node;
 	ret = spi_bitbang_start(&spi_imx->bitbang);
 	if (ret) {
-		dev_err(&pdev->dev, "bitbang start failed with %d\n", ret);
-		goto out_clk_put;
+		dev_err_probe(&pdev->dev, ret, "bitbang start failed\n");
+		goto out_bitbang_start;
 	}
 
-	/* Request GPIO CS lines, if any */
-	if (!spi_imx->slave_mode && master->cs_gpios) {
-		for (i = 0; i < master->num_chipselect; i++) {
-			if (!gpio_is_valid(master->cs_gpios[i]))
-				continue;
+	pm_runtime_mark_last_busy(spi_imx->dev);
+	pm_runtime_put_autosuspend(spi_imx->dev);
 
-			ret = devm_gpio_request(&pdev->dev,
-						master->cs_gpios[i],
-						DRIVER_NAME);
-			if (ret) {
-				dev_err(&pdev->dev, "Can't get CS GPIO %i\n",
-					master->cs_gpios[i]);
-				goto out_spi_bitbang;
-			}
-		}
-	}
-
-	dev_info(&pdev->dev, "probed\n");
-
-	clk_disable(spi_imx->clk_ipg);
-	clk_disable(spi_imx->clk_per);
 	return ret;
 
-out_spi_bitbang:
-	spi_bitbang_stop(&spi_imx->bitbang);
-out_clk_put:
+out_bitbang_start:
+	if (spi_imx->devtype_data->has_dmamode)
+		spi_imx_sdma_exit(spi_imx);
+out_runtime_pm_put:
+	pm_runtime_dont_use_autosuspend(spi_imx->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_disable(spi_imx->dev);
+
 	clk_disable_unprepare(spi_imx->clk_ipg);
 out_put_per:
 	clk_disable_unprepare(spi_imx->clk_per);
@@ -1795,30 +1766,83 @@
 
 	spi_bitbang_stop(&spi_imx->bitbang);
 
-	ret = clk_enable(spi_imx->clk_per);
-	if (ret)
-		return ret;
-
-	ret = clk_enable(spi_imx->clk_ipg);
-	if (ret) {
-		clk_disable(spi_imx->clk_per);
+	ret = pm_runtime_get_sync(spi_imx->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(spi_imx->dev);
+		dev_err(spi_imx->dev, "failed to enable clock\n");
 		return ret;
 	}
 
 	writel(0, spi_imx->base + MXC_CSPICTRL);
-	clk_disable_unprepare(spi_imx->clk_ipg);
-	clk_disable_unprepare(spi_imx->clk_per);
+
+	pm_runtime_dont_use_autosuspend(spi_imx->dev);
+	pm_runtime_put_sync(spi_imx->dev);
+	pm_runtime_disable(spi_imx->dev);
+
 	spi_imx_sdma_exit(spi_imx);
 	spi_master_put(master);
 
 	return 0;
 }
 
+static int __maybe_unused spi_imx_runtime_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_imx_data *spi_imx;
+	int ret;
+
+	spi_imx = spi_master_get_devdata(master);
+
+	ret = clk_prepare_enable(spi_imx->clk_per);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(spi_imx->clk_ipg);
+	if (ret) {
+		clk_disable_unprepare(spi_imx->clk_per);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused spi_imx_runtime_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_imx_data *spi_imx;
+
+	spi_imx = spi_master_get_devdata(master);
+
+	clk_disable_unprepare(spi_imx->clk_per);
+	clk_disable_unprepare(spi_imx->clk_ipg);
+
+	return 0;
+}
+
+static int __maybe_unused spi_imx_suspend(struct device *dev)
+{
+	pinctrl_pm_select_sleep_state(dev);
+	return 0;
+}
+
+static int __maybe_unused spi_imx_resume(struct device *dev)
+{
+	pinctrl_pm_select_default_state(dev);
+	return 0;
+}
+
+static const struct dev_pm_ops imx_spi_pm = {
+	SET_RUNTIME_PM_OPS(spi_imx_runtime_suspend,
+				spi_imx_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(spi_imx_suspend, spi_imx_resume)
+};
+
 static struct platform_driver spi_imx_driver = {
 	.driver = {
 		   .name = DRIVER_NAME,
 		   .of_match_table = spi_imx_dt_ids,
-		   },
+		   .pm = &imx_spi_pm,
+	},
 	.id_table = spi_imx_devtype,
 	.probe = spi_imx_probe,
 	.remove = spi_imx_remove,
diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c
index cc49fa4..bba10f0 100644
--- a/drivers/spi/spi-jcore.c
+++ b/drivers/spi/spi-jcore.c
@@ -170,7 +170,7 @@
 	if (!devm_request_mem_region(&pdev->dev, res->start,
 				     resource_size(res), pdev->name))
 		goto exit_busy;
-	hw->base = devm_ioremap_nocache(&pdev->dev, res->start,
+	hw->base = devm_ioremap(&pdev->dev, res->start,
 					resource_size(res));
 	if (!hw->base)
 		goto exit_busy;
diff --git a/drivers/spi/spi-lantiq-ssc.c b/drivers/spi/spi-lantiq-ssc.c
index f9bc170..bcb5260 100644
--- a/drivers/spi/spi-lantiq-ssc.c
+++ b/drivers/spi/spi-lantiq-ssc.c
@@ -15,7 +15,6 @@
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/pm_runtime.h>
 #include <linux/spi/spi.h>
 
@@ -50,8 +49,6 @@
 #define LTQ_SPI_RXCNT		0x84
 #define LTQ_SPI_DMACON		0xec
 #define LTQ_SPI_IRNEN		0xf4
-#define LTQ_SPI_IRNICR		0xf8
-#define LTQ_SPI_IRNCR		0xfc
 
 #define LTQ_SPI_CLC_SMC_S	16	/* Clock divider for sleep mode */
 #define LTQ_SPI_CLC_SMC_M	(0xFF << LTQ_SPI_CLC_SMC_S)
@@ -61,9 +58,7 @@
 #define LTQ_SPI_CLC_DISR	BIT(0)	/* Disable request bit */
 
 #define LTQ_SPI_ID_TXFS_S	24	/* Implemented TX FIFO size */
-#define LTQ_SPI_ID_TXFS_M	(0x3F << LTQ_SPI_ID_TXFS_S)
 #define LTQ_SPI_ID_RXFS_S	16	/* Implemented RX FIFO size */
-#define LTQ_SPI_ID_RXFS_M	(0x3F << LTQ_SPI_ID_RXFS_S)
 #define LTQ_SPI_ID_MOD_S	8	/* Module ID */
 #define LTQ_SPI_ID_MOD_M	(0xff << LTQ_SPI_ID_MOD_S)
 #define LTQ_SPI_ID_CFG_S	5	/* DMA interface support */
@@ -126,19 +121,15 @@
 					 LTQ_SPI_WHBSTATE_CLRTUE)
 
 #define LTQ_SPI_RXFCON_RXFITL_S	8	/* FIFO interrupt trigger level */
-#define LTQ_SPI_RXFCON_RXFITL_M	(0x3F << LTQ_SPI_RXFCON_RXFITL_S)
 #define LTQ_SPI_RXFCON_RXFLU	BIT(1)	/* FIFO flush */
 #define LTQ_SPI_RXFCON_RXFEN	BIT(0)	/* FIFO enable */
 
 #define LTQ_SPI_TXFCON_TXFITL_S	8	/* FIFO interrupt trigger level */
-#define LTQ_SPI_TXFCON_TXFITL_M	(0x3F << LTQ_SPI_TXFCON_TXFITL_S)
 #define LTQ_SPI_TXFCON_TXFLU	BIT(1)	/* FIFO flush */
 #define LTQ_SPI_TXFCON_TXFEN	BIT(0)	/* FIFO enable */
 
 #define LTQ_SPI_FSTAT_RXFFL_S	0
-#define LTQ_SPI_FSTAT_RXFFL_M	(0x3f << LTQ_SPI_FSTAT_RXFFL_S)
 #define LTQ_SPI_FSTAT_TXFFL_S	8
-#define LTQ_SPI_FSTAT_TXFFL_M	(0x3f << LTQ_SPI_FSTAT_TXFFL_S)
 
 #define LTQ_SPI_GPOCON_ISCSBN_S	8
 #define LTQ_SPI_GPOCON_INVOUTN_S	0
@@ -158,9 +149,16 @@
 #define LTQ_SPI_IRNEN_T_XRX	BIT(0)	/* Receive end interrupt request */
 #define LTQ_SPI_IRNEN_ALL	0x1F
 
+struct lantiq_ssc_spi;
+
 struct lantiq_ssc_hwcfg {
-	unsigned int irnen_r;
-	unsigned int irnen_t;
+	int (*cfg_irq)(struct platform_device *pdev, struct lantiq_ssc_spi *spi);
+	unsigned int	irnen_r;
+	unsigned int	irnen_t;
+	unsigned int	irncr;
+	unsigned int	irnicr;
+	bool		irq_ack;
+	u32		fifo_size_mask;
 };
 
 struct lantiq_ssc_spi {
@@ -210,16 +208,18 @@
 
 static unsigned int tx_fifo_level(const struct lantiq_ssc_spi *spi)
 {
+	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
 	u32 fstat = lantiq_ssc_readl(spi, LTQ_SPI_FSTAT);
 
-	return (fstat & LTQ_SPI_FSTAT_TXFFL_M) >> LTQ_SPI_FSTAT_TXFFL_S;
+	return (fstat >> LTQ_SPI_FSTAT_TXFFL_S) & hwcfg->fifo_size_mask;
 }
 
 static unsigned int rx_fifo_level(const struct lantiq_ssc_spi *spi)
 {
+	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
 	u32 fstat = lantiq_ssc_readl(spi, LTQ_SPI_FSTAT);
 
-	return fstat & LTQ_SPI_FSTAT_RXFFL_M;
+	return (fstat >> LTQ_SPI_FSTAT_RXFFL_S) & hwcfg->fifo_size_mask;
 }
 
 static unsigned int tx_fifo_free(const struct lantiq_ssc_spi *spi)
@@ -392,7 +392,7 @@
 	u32 gpocon;
 
 	/* GPIOs are used for CS */
-	if (gpio_is_valid(spidev->cs_gpio))
+	if (spidev->cs_gpiod)
 		return 0;
 
 	dev_dbg(spi->dev, "using internal chipselect %u\n", cs);
@@ -623,6 +623,12 @@
 static irqreturn_t lantiq_ssc_xmit_interrupt(int irq, void *data)
 {
 	struct lantiq_ssc_spi *spi = data;
+	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
+	u32 val = lantiq_ssc_readl(spi, hwcfg->irncr);
+
+	spin_lock(&spi->lock);
+	if (hwcfg->irq_ack)
+		lantiq_ssc_writel(spi, val, hwcfg->irncr);
 
 	if (spi->tx) {
 		if (spi->rx && spi->rx_todo)
@@ -645,10 +651,12 @@
 		}
 	}
 
+	spin_unlock(&spi->lock);
 	return IRQ_HANDLED;
 
 completed:
 	queue_work(spi->wq, &spi->work);
+	spin_unlock(&spi->lock);
 
 	return IRQ_HANDLED;
 }
@@ -656,11 +664,17 @@
 static irqreturn_t lantiq_ssc_err_interrupt(int irq, void *data)
 {
 	struct lantiq_ssc_spi *spi = data;
+	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
 	u32 stat = lantiq_ssc_readl(spi, LTQ_SPI_STAT);
+	u32 val = lantiq_ssc_readl(spi, hwcfg->irncr);
 
 	if (!(stat & LTQ_SPI_STAT_ERRORS))
 		return IRQ_NONE;
 
+	spin_lock(&spi->lock);
+	if (hwcfg->irq_ack)
+		lantiq_ssc_writel(spi, val, hwcfg->irncr);
+
 	if (stat & LTQ_SPI_STAT_RUE)
 		dev_err(spi->dev, "receive underflow error\n");
 	if (stat & LTQ_SPI_STAT_TUE)
@@ -681,6 +695,25 @@
 	if (spi->master->cur_msg)
 		spi->master->cur_msg->status = -EIO;
 	queue_work(spi->wq, &spi->work);
+	spin_unlock(&spi->lock);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t intel_lgm_ssc_isr(int irq, void *data)
+{
+	struct lantiq_ssc_spi *spi = data;
+	const struct lantiq_ssc_hwcfg *hwcfg = spi->hwcfg;
+	u32 val = lantiq_ssc_readl(spi, hwcfg->irncr);
+
+	if (!(val & LTQ_SPI_IRNEN_ALL))
+		return IRQ_NONE;
+
+	if (val & LTQ_SPI_IRNEN_E)
+		return lantiq_ssc_err_interrupt(irq, data);
+
+	if ((val & hwcfg->irnen_t) || (val & hwcfg->irnen_r))
+		return lantiq_ssc_xmit_interrupt(irq, data);
 
 	return IRQ_HANDLED;
 }
@@ -785,20 +818,84 @@
 	return transfer_start(spi, spidev, t);
 }
 
+static int intel_lgm_cfg_irq(struct platform_device *pdev, struct lantiq_ssc_spi *spi)
+{
+	int irq;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	return devm_request_irq(&pdev->dev, irq, intel_lgm_ssc_isr, 0, "spi", spi);
+}
+
+static int lantiq_cfg_irq(struct platform_device *pdev, struct lantiq_ssc_spi *spi)
+{
+	int irq, err;
+
+	irq = platform_get_irq_byname(pdev, LTQ_SPI_RX_IRQ_NAME);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, lantiq_ssc_xmit_interrupt,
+			       0, LTQ_SPI_RX_IRQ_NAME, spi);
+	if (err)
+		return err;
+
+	irq = platform_get_irq_byname(pdev, LTQ_SPI_TX_IRQ_NAME);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, lantiq_ssc_xmit_interrupt,
+			       0, LTQ_SPI_TX_IRQ_NAME, spi);
+
+	if (err)
+		return err;
+
+	irq = platform_get_irq_byname(pdev, LTQ_SPI_ERR_IRQ_NAME);
+	if (irq < 0)
+		return irq;
+
+	err = devm_request_irq(&pdev->dev, irq, lantiq_ssc_err_interrupt,
+			       0, LTQ_SPI_ERR_IRQ_NAME, spi);
+	return err;
+}
+
 static const struct lantiq_ssc_hwcfg lantiq_ssc_xway = {
-	.irnen_r = LTQ_SPI_IRNEN_R_XWAY,
-	.irnen_t = LTQ_SPI_IRNEN_T_XWAY,
+	.cfg_irq	= lantiq_cfg_irq,
+	.irnen_r	= LTQ_SPI_IRNEN_R_XWAY,
+	.irnen_t	= LTQ_SPI_IRNEN_T_XWAY,
+	.irnicr		= 0xF8,
+	.irncr		= 0xFC,
+	.fifo_size_mask	= GENMASK(5, 0),
+	.irq_ack	= false,
 };
 
 static const struct lantiq_ssc_hwcfg lantiq_ssc_xrx = {
-	.irnen_r = LTQ_SPI_IRNEN_R_XRX,
-	.irnen_t = LTQ_SPI_IRNEN_T_XRX,
+	.cfg_irq	= lantiq_cfg_irq,
+	.irnen_r	= LTQ_SPI_IRNEN_R_XRX,
+	.irnen_t	= LTQ_SPI_IRNEN_T_XRX,
+	.irnicr		= 0xF8,
+	.irncr		= 0xFC,
+	.fifo_size_mask	= GENMASK(5, 0),
+	.irq_ack	= false,
+};
+
+static const struct lantiq_ssc_hwcfg intel_ssc_lgm = {
+	.cfg_irq	= intel_lgm_cfg_irq,
+	.irnen_r	= LTQ_SPI_IRNEN_R_XRX,
+	.irnen_t	= LTQ_SPI_IRNEN_T_XRX,
+	.irnicr		= 0xFC,
+	.irncr		= 0xF8,
+	.fifo_size_mask	= GENMASK(7, 0),
+	.irq_ack	= true,
 };
 
 static const struct of_device_id lantiq_ssc_match[] = {
 	{ .compatible = "lantiq,ase-spi", .data = &lantiq_ssc_xway, },
 	{ .compatible = "lantiq,falcon-spi", .data = &lantiq_ssc_xrx, },
 	{ .compatible = "lantiq,xrx100-spi", .data = &lantiq_ssc_xrx, },
+	{ .compatible = "intel,lgm-spi", .data = &intel_ssc_lgm, },
 	{},
 };
 MODULE_DEVICE_TABLE(of, lantiq_ssc_match);
@@ -807,13 +904,12 @@
 {
 	struct device *dev = &pdev->dev;
 	struct spi_master *master;
-	struct resource *res;
 	struct lantiq_ssc_spi *spi;
 	const struct lantiq_ssc_hwcfg *hwcfg;
 	const struct of_device_id *match;
-	int err, rx_irq, tx_irq, err_irq;
 	u32 id, supports_dma, revision;
 	unsigned int num_cs;
+	int err;
 
 	match = of_match_device(lantiq_ssc_match, dev);
 	if (!match) {
@@ -822,24 +918,6 @@
 	}
 	hwcfg = match->data;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "failed to get resources\n");
-		return -ENXIO;
-	}
-
-	rx_irq = platform_get_irq_byname(pdev, LTQ_SPI_RX_IRQ_NAME);
-	if (rx_irq < 0)
-		return -ENXIO;
-
-	tx_irq = platform_get_irq_byname(pdev, LTQ_SPI_TX_IRQ_NAME);
-	if (tx_irq < 0)
-		return -ENXIO;
-
-	err_irq = platform_get_irq_byname(pdev, LTQ_SPI_ERR_IRQ_NAME);
-	if (err_irq < 0)
-		return -ENXIO;
-
 	master = spi_alloc_master(dev, sizeof(struct lantiq_ssc_spi));
 	if (!master)
 		return -ENOMEM;
@@ -849,25 +927,13 @@
 	spi->dev = dev;
 	spi->hwcfg = hwcfg;
 	platform_set_drvdata(pdev, spi);
-
-	spi->regbase = devm_ioremap_resource(dev, res);
+	spi->regbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(spi->regbase)) {
 		err = PTR_ERR(spi->regbase);
 		goto err_master_put;
 	}
 
-	err = devm_request_irq(dev, rx_irq, lantiq_ssc_xmit_interrupt,
-			       0, LTQ_SPI_RX_IRQ_NAME, spi);
-	if (err)
-		goto err_master_put;
-
-	err = devm_request_irq(dev, tx_irq, lantiq_ssc_xmit_interrupt,
-			       0, LTQ_SPI_TX_IRQ_NAME, spi);
-	if (err)
-		goto err_master_put;
-
-	err = devm_request_irq(dev, err_irq, lantiq_ssc_err_interrupt,
-			       0, LTQ_SPI_ERR_IRQ_NAME, spi);
+	err = hwcfg->cfg_irq(pdev, spi);
 	if (err)
 		goto err_master_put;
 
@@ -906,6 +972,7 @@
 
 	master->dev.of_node = pdev->dev.of_node;
 	master->num_chipselect = num_cs;
+	master->use_gpio_descriptors = true;
 	master->setup = lantiq_ssc_setup;
 	master->set_cs = lantiq_ssc_set_cs;
 	master->handle_err = lantiq_ssc_handle_err;
@@ -925,8 +992,8 @@
 	INIT_WORK(&spi->work, lantiq_ssc_bussy_work);
 
 	id = lantiq_ssc_readl(spi, LTQ_SPI_ID);
-	spi->tx_fifo_size = (id & LTQ_SPI_ID_TXFS_M) >> LTQ_SPI_ID_TXFS_S;
-	spi->rx_fifo_size = (id & LTQ_SPI_ID_RXFS_M) >> LTQ_SPI_ID_RXFS_S;
+	spi->tx_fifo_size = (id >> LTQ_SPI_ID_TXFS_S) & hwcfg->fifo_size_mask;
+	spi->rx_fifo_size = (id >> LTQ_SPI_ID_RXFS_S) & hwcfg->fifo_size_mask;
 	supports_dma = (id & LTQ_SPI_ID_CFG_M) >> LTQ_SPI_ID_CFG_S;
 	revision = id & LTQ_SPI_ID_REV_M;
 
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 69a9df2..4d4f77a 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -298,12 +298,18 @@
 			{
 				.tx_buf = TX(0),
 				.rx_buf = RX(0),
-				.delay_usecs = 1000,
+				.delay = {
+					.value = 1000,
+					.unit = SPI_DELAY_UNIT_USECS,
+				},
 			},
 			{
 				.tx_buf = TX(0),
 				.rx_buf = RX(0),
-				.delay_usecs = 1000,
+				.delay = {
+					.value = 1000,
+					.unit = SPI_DELAY_UNIT_USECS,
+				},
 			},
 		},
 	},
@@ -448,7 +454,8 @@
 	u8 *end;
 };
 
-static int rx_ranges_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int rx_ranges_cmp(void *priv, const struct list_head *a,
+			 const struct list_head *b)
 {
 	struct rx_ranges *rx_a = list_entry(a, struct rx_ranges, list);
 	struct rx_ranges *rx_b = list_entry(b, struct rx_ranges, list);
@@ -537,7 +544,7 @@
 		unsigned long long nbits = (unsigned long long)BITS_PER_BYTE *
 					   xfer->len;
 
-		delay_usecs += xfer->delay_usecs;
+		delay_usecs += xfer->delay.value;
 		if (!xfer->speed_hz)
 			continue;
 		estimated_time += div_u64(nbits * NSEC_PER_SEC, xfer->speed_hz);
@@ -879,10 +886,10 @@
 /**
  * spi_test_execute_msg - default implementation to run a test
  *
- * spi: @spi_device on which to run the @spi_message
- * test: the test to execute, which already contains @msg
- * tx:   the tx buffer allocated for the test sequence
- * rx:   the rx buffer allocated for the test sequence
+ * @spi: @spi_device on which to run the @spi_message
+ * @test: the test to execute, which already contains @msg
+ * @tx:   the tx buffer allocated for the test sequence
+ * @rx:   the rx buffer allocated for the test sequence
  *
  * Returns: error code of spi_sync as well as basic error checking
  */
@@ -951,10 +958,10 @@
  *                     including all the relevant iterations on:
  *                     length and buffer alignment
  *
- * spi:  the spi_device to send the messages to
- * test: the test which we need to execute
- * tx:   the tx buffer allocated for the test sequence
- * rx:   the rx buffer allocated for the test sequence
+ * @spi:  the spi_device to send the messages to
+ * @test: the test which we need to execute
+ * @tx:   the tx buffer allocated for the test sequence
+ * @rx:   the rx buffer allocated for the test sequence
  *
  * Returns: status code of spi_sync or other failures
  */
diff --git a/drivers/spi/spi-mem.c b/drivers/spi/spi-mem.c
index 33115bc..4682f49 100644
--- a/drivers/spi/spi-mem.c
+++ b/drivers/spi/spi-mem.c
@@ -156,6 +156,12 @@
 				   op->data.dir == SPI_MEM_DATA_OUT))
 		return false;
 
+	if (op->cmd.dtr || op->addr.dtr || op->dummy.dtr || op->data.dtr)
+		return false;
+
+	if (op->cmd.nbytes != 1)
+		return false;
+
 	return true;
 }
 EXPORT_SYMBOL_GPL(spi_mem_default_supports_op);
@@ -170,7 +176,7 @@
 
 static int spi_mem_check_op(const struct spi_mem_op *op)
 {
-	if (!op->cmd.buswidth)
+	if (!op->cmd.buswidth || !op->cmd.nbytes)
 		return -EINVAL;
 
 	if ((op->addr.nbytes && !op->addr.buswidth) ||
@@ -289,7 +295,7 @@
 	if (!spi_mem_internal_supports_op(mem, op))
 		return -ENOTSUPP;
 
-	if (ctlr->mem_ops) {
+	if (ctlr->mem_ops && !mem->spi->cs_gpiod) {
 		ret = spi_mem_access_start(mem);
 		if (ret)
 			return ret;
@@ -307,8 +313,7 @@
 			return ret;
 	}
 
-	tmpbufsize = sizeof(op->cmd.opcode) + op->addr.nbytes +
-		     op->dummy.nbytes;
+	tmpbufsize = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
 
 	/*
 	 * Allocate a buffer to transmit the CMD, ADDR cycles with kmalloc() so
@@ -323,7 +328,7 @@
 
 	tmpbuf[0] = op->cmd.opcode;
 	xfers[xferpos].tx_buf = tmpbuf;
-	xfers[xferpos].len = sizeof(op->cmd.opcode);
+	xfers[xferpos].len = op->cmd.nbytes;
 	xfers[xferpos].tx_nbits = op->cmd.buswidth;
 	spi_message_add_tail(&xfers[xferpos], &msg);
 	xferpos++;
@@ -421,12 +426,12 @@
 	struct spi_controller *ctlr = mem->spi->controller;
 	size_t len;
 
-	len = sizeof(op->cmd.opcode) + op->addr.nbytes + op->dummy.nbytes;
-
 	if (ctlr->mem_ops && ctlr->mem_ops->adjust_op_size)
 		return ctlr->mem_ops->adjust_op_size(mem, op);
 
 	if (!ctlr->mem_ops || !ctlr->mem_ops->exec_op) {
+		len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes;
+
 		if (len > spi_max_transfer_size(mem->spi))
 			return -EINVAL;
 
@@ -490,7 +495,7 @@
  * This function is creating a direct mapping descriptor which can then be used
  * to access the memory using spi_mem_dirmap_read() or spi_mem_dirmap_write().
  * If the SPI controller driver does not support direct mapping, this function
- * fallback to an implementation using spi_mem_exec_op(), so that the caller
+ * falls back to an implementation using spi_mem_exec_op(), so that the caller
  * doesn't have to bother implementing a fallback on his own.
  *
  * Return: a valid pointer in case of success, and ERR_PTR() otherwise.
diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c
index 3c841ae..0bc7daa 100644
--- a/drivers/spi/spi-meson-spicc.c
+++ b/drivers/spi/spi-meson-spicc.c
@@ -9,17 +9,18 @@
 
 #include <linux/bitfield.h>
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/reset.h>
-#include <linux/gpio.h>
 
 /*
  * The Meson SPICC controller could support DMA based transfers, but is not
@@ -34,7 +35,6 @@
  *   to have a CS go down over the full transfer
  */
 
-#define SPICC_MAX_FREQ	30000000
 #define SPICC_MAX_BURST	128
 
 /* Register Map */
@@ -106,7 +106,21 @@
 #define SPICC_SWAP_RO		BIT(14) /* RX FIFO Data Swap Read-Only */
 #define SPICC_SWAP_W1		BIT(15) /* RX FIFO Data Swap Write-Only */
 #define SPICC_DLYCTL_RO_MASK	GENMASK(20, 15) /* Delay Control Read-Only */
-#define SPICC_DLYCTL_W1_MASK	GENMASK(21, 16) /* Delay Control Write-Only */
+#define SPICC_MO_DELAY_MASK	GENMASK(17, 16) /* Master Output Delay */
+#define SPICC_MO_NO_DELAY	0
+#define SPICC_MO_DELAY_1_CYCLE	1
+#define SPICC_MO_DELAY_2_CYCLE	2
+#define SPICC_MO_DELAY_3_CYCLE	3
+#define SPICC_MI_DELAY_MASK	GENMASK(19, 18) /* Master Input Delay */
+#define SPICC_MI_NO_DELAY	0
+#define SPICC_MI_DELAY_1_CYCLE	1
+#define SPICC_MI_DELAY_2_CYCLE	2
+#define SPICC_MI_DELAY_3_CYCLE	3
+#define SPICC_MI_CAP_DELAY_MASK	GENMASK(21, 20) /* Master Capture Delay */
+#define SPICC_CAP_AHEAD_2_CYCLE	0
+#define SPICC_CAP_AHEAD_1_CYCLE	1
+#define SPICC_CAP_NO_DELAY	2
+#define SPICC_CAP_DELAY_1_CYCLE	3
 #define SPICC_FIFORST_RO_MASK	GENMASK(22, 21) /* FIFO Softreset Read-Only */
 #define SPICC_FIFORST_W1_MASK	GENMASK(23, 22) /* FIFO Softreset Write-Only */
 
@@ -114,31 +128,59 @@
 
 #define SPICC_DWADDR	0x24	/* Write Address of DMA */
 
+#define SPICC_ENH_CTL0	0x38	/* Enhanced Feature */
+#define SPICC_ENH_CLK_CS_DELAY_MASK	GENMASK(15, 0)
+#define SPICC_ENH_DATARATE_MASK		GENMASK(23, 16)
+#define SPICC_ENH_DATARATE_EN		BIT(24)
+#define SPICC_ENH_MOSI_OEN		BIT(25)
+#define SPICC_ENH_CLK_OEN		BIT(26)
+#define SPICC_ENH_CS_OEN		BIT(27)
+#define SPICC_ENH_CLK_CS_DELAY_EN	BIT(28)
+#define SPICC_ENH_MAIN_CLK_AO		BIT(29)
+
 #define writel_bits_relaxed(mask, val, addr) \
 	writel_relaxed((readl_relaxed(addr) & ~(mask)) | (val), addr)
 
-#define SPICC_BURST_MAX	16
-#define SPICC_FIFO_HALF 10
+struct meson_spicc_data {
+	unsigned int			max_speed_hz;
+	unsigned int			min_speed_hz;
+	unsigned int			fifo_size;
+	bool				has_oen;
+	bool				has_enhance_clk_div;
+	bool				has_pclk;
+};
 
 struct meson_spicc_device {
 	struct spi_master		*master;
 	struct platform_device		*pdev;
 	void __iomem			*base;
 	struct clk			*core;
+	struct clk			*pclk;
+	struct clk			*clk;
 	struct spi_message		*message;
 	struct spi_transfer		*xfer;
+	const struct meson_spicc_data	*data;
 	u8				*tx_buf;
 	u8				*rx_buf;
 	unsigned int			bytes_per_word;
 	unsigned long			tx_remain;
-	unsigned long			txb_remain;
 	unsigned long			rx_remain;
-	unsigned long			rxb_remain;
 	unsigned long			xfer_remain;
-	bool				is_burst_end;
-	bool				is_last_burst;
 };
 
+static void meson_spicc_oen_enable(struct meson_spicc_device *spicc)
+{
+	u32 conf;
+
+	if (!spicc->data->has_oen)
+		return;
+
+	conf = readl_relaxed(spicc->base + SPICC_ENH_CTL0) |
+		SPICC_ENH_MOSI_OEN | SPICC_ENH_CLK_OEN | SPICC_ENH_CS_OEN;
+
+	writel_relaxed(conf, spicc->base + SPICC_ENH_CTL0);
+}
+
 static inline bool meson_spicc_txfull(struct meson_spicc_device *spicc)
 {
 	return !!FIELD_GET(SPICC_TF,
@@ -147,7 +189,7 @@
 
 static inline bool meson_spicc_rxready(struct meson_spicc_device *spicc)
 {
-	return FIELD_GET(SPICC_RH | SPICC_RR | SPICC_RF_EN,
+	return FIELD_GET(SPICC_RH | SPICC_RR | SPICC_RF,
 			 readl_relaxed(spicc->base + SPICC_STATREG));
 }
 
@@ -202,34 +244,22 @@
 			       spicc->base + SPICC_TXDATA);
 }
 
-static inline u32 meson_spicc_setup_rx_irq(struct meson_spicc_device *spicc,
-					   u32 irq_ctrl)
+static inline void meson_spicc_setup_burst(struct meson_spicc_device *spicc)
 {
-	if (spicc->rx_remain > SPICC_FIFO_HALF)
-		irq_ctrl |= SPICC_RH_EN;
-	else
-		irq_ctrl |= SPICC_RR_EN;
 
-	return irq_ctrl;
-}
-
-static inline void meson_spicc_setup_burst(struct meson_spicc_device *spicc,
-					   unsigned int burst_len)
-{
+	unsigned int burst_len = min_t(unsigned int,
+				       spicc->xfer_remain /
+				       spicc->bytes_per_word,
+				       spicc->data->fifo_size);
 	/* Setup Xfer variables */
 	spicc->tx_remain = burst_len;
 	spicc->rx_remain = burst_len;
 	spicc->xfer_remain -= burst_len * spicc->bytes_per_word;
-	spicc->is_burst_end = false;
-	if (burst_len < SPICC_BURST_MAX || !spicc->xfer_remain)
-		spicc->is_last_burst = true;
-	else
-		spicc->is_last_burst = false;
 
 	/* Setup burst length */
 	writel_bits_relaxed(SPICC_BURSTLENGTH_MASK,
 			FIELD_PREP(SPICC_BURSTLENGTH_MASK,
-				burst_len),
+				burst_len - 1),
 			spicc->base + SPICC_CONREG);
 
 	/* Fill TX FIFO */
@@ -239,97 +269,71 @@
 static irqreturn_t meson_spicc_irq(int irq, void *data)
 {
 	struct meson_spicc_device *spicc = (void *) data;
-	u32 ctrl = readl_relaxed(spicc->base + SPICC_INTREG);
-	u32 stat = readl_relaxed(spicc->base + SPICC_STATREG) & ctrl;
 
-	ctrl &= ~(SPICC_RH_EN | SPICC_RR_EN);
+	writel_bits_relaxed(SPICC_TC, SPICC_TC, spicc->base + SPICC_STATREG);
 
 	/* Empty RX FIFO */
 	meson_spicc_rx(spicc);
 
-	/* Enable TC interrupt since we transferred everything */
-	if (!spicc->tx_remain && !spicc->rx_remain) {
-		spicc->is_burst_end = true;
+	if (!spicc->xfer_remain) {
+		/* Disable all IRQs */
+		writel(0, spicc->base + SPICC_INTREG);
 
-		/* Enable TC interrupt */
-		ctrl |= SPICC_TC_EN;
+		spi_finalize_current_transfer(spicc->master);
 
-		/* Reload IRQ status */
-		stat = readl_relaxed(spicc->base + SPICC_STATREG) & ctrl;
+		return IRQ_HANDLED;
 	}
 
-	/* Check transfer complete */
-	if ((stat & SPICC_TC) && spicc->is_burst_end) {
-		unsigned int burst_len;
+	/* Setup burst */
+	meson_spicc_setup_burst(spicc);
 
-		/* Clear TC bit */
-		writel_relaxed(SPICC_TC, spicc->base + SPICC_STATREG);
-
-		/* Disable TC interrupt */
-		ctrl &= ~SPICC_TC_EN;
-
-		if (spicc->is_last_burst) {
-			/* Disable all IRQs */
-			writel(0, spicc->base + SPICC_INTREG);
-
-			spi_finalize_current_transfer(spicc->master);
-
-			return IRQ_HANDLED;
-		}
-
-		burst_len = min_t(unsigned int,
-				  spicc->xfer_remain / spicc->bytes_per_word,
-				  SPICC_BURST_MAX);
-
-		/* Setup burst */
-		meson_spicc_setup_burst(spicc, burst_len);
-
-		/* Restart burst */
-		writel_bits_relaxed(SPICC_XCH, SPICC_XCH,
-				    spicc->base + SPICC_CONREG);
-	}
-
-	/* Setup RX interrupt trigger */
-	ctrl = meson_spicc_setup_rx_irq(spicc, ctrl);
-
-	/* Reconfigure interrupts */
-	writel(ctrl, spicc->base + SPICC_INTREG);
+	/* Start burst */
+	writel_bits_relaxed(SPICC_XCH, SPICC_XCH, spicc->base + SPICC_CONREG);
 
 	return IRQ_HANDLED;
 }
 
-static u32 meson_spicc_setup_speed(struct meson_spicc_device *spicc, u32 conf,
-				   u32 speed)
+static void meson_spicc_auto_io_delay(struct meson_spicc_device *spicc)
 {
-	unsigned long parent, value;
-	unsigned int i, div;
+	u32 div, hz;
+	u32 mi_delay, cap_delay;
+	u32 conf;
 
-	parent = clk_get_rate(spicc->core);
-
-	/* Find closest inferior/equal possible speed */
-	for (i = 0 ; i < 7 ; ++i) {
-		/* 2^(data_rate+2) */
-		value = parent >> (i + 2);
-
-		if (value <= speed)
-			break;
+	if (spicc->data->has_enhance_clk_div) {
+		div = FIELD_GET(SPICC_ENH_DATARATE_MASK,
+				readl_relaxed(spicc->base + SPICC_ENH_CTL0));
+		div++;
+		div <<= 1;
+	} else {
+		div = FIELD_GET(SPICC_DATARATE_MASK,
+				readl_relaxed(spicc->base + SPICC_CONREG));
+		div += 2;
+		div = 1 << div;
 	}
 
-	/* If provided speed it lower than max divider, use max divider */
-	if (i > 7) {
-		div = 7;
-		dev_warn_once(&spicc->pdev->dev, "unable to get close to speed %u\n",
-			      speed);
-	} else
-		div = i;
+	mi_delay = SPICC_MI_NO_DELAY;
+	cap_delay = SPICC_CAP_AHEAD_2_CYCLE;
+	hz = clk_get_rate(spicc->clk);
 
-	dev_dbg(&spicc->pdev->dev, "parent %lu, speed %u -> %lu (%u)\n",
-		parent, speed, value, div);
+	if (hz >= 100000000)
+		cap_delay = SPICC_CAP_DELAY_1_CYCLE;
+	else if (hz >= 80000000)
+		cap_delay = SPICC_CAP_NO_DELAY;
+	else if (hz >= 40000000)
+		cap_delay = SPICC_CAP_AHEAD_1_CYCLE;
+	else if (div >= 16)
+		mi_delay = SPICC_MI_DELAY_3_CYCLE;
+	else if (div >= 8)
+		mi_delay = SPICC_MI_DELAY_2_CYCLE;
+	else if (div >= 6)
+		mi_delay = SPICC_MI_DELAY_1_CYCLE;
 
-	conf &= ~SPICC_DATARATE_MASK;
-	conf |= FIELD_PREP(SPICC_DATARATE_MASK, div);
-
-	return conf;
+	conf = readl_relaxed(spicc->base + SPICC_TESTREG);
+	conf &= ~(SPICC_MO_DELAY_MASK | SPICC_MI_DELAY_MASK
+		  | SPICC_MI_CAP_DELAY_MASK);
+	conf |= FIELD_PREP(SPICC_MI_DELAY_MASK, mi_delay);
+	conf |= FIELD_PREP(SPICC_MI_CAP_DELAY_MASK, cap_delay);
+	writel_relaxed(conf, spicc->base + SPICC_TESTREG);
 }
 
 static void meson_spicc_setup_xfer(struct meson_spicc_device *spicc,
@@ -340,9 +344,6 @@
 	/* Read original configuration */
 	conf = conf_orig = readl_relaxed(spicc->base + SPICC_CONREG);
 
-	/* Select closest divider */
-	conf = meson_spicc_setup_speed(spicc, conf, xfer->speed_hz);
-
 	/* Setup word width */
 	conf &= ~SPICC_BITLENGTH_MASK;
 	conf |= FIELD_PREP(SPICC_BITLENGTH_MASK,
@@ -351,6 +352,30 @@
 	/* Ignore if unchanged */
 	if (conf != conf_orig)
 		writel_relaxed(conf, spicc->base + SPICC_CONREG);
+
+	clk_set_rate(spicc->clk, xfer->speed_hz);
+
+	meson_spicc_auto_io_delay(spicc);
+
+	writel_relaxed(0, spicc->base + SPICC_DMAREG);
+}
+
+static void meson_spicc_reset_fifo(struct meson_spicc_device *spicc)
+{
+	if (spicc->data->has_oen)
+		writel_bits_relaxed(SPICC_ENH_MAIN_CLK_AO,
+				    SPICC_ENH_MAIN_CLK_AO,
+				    spicc->base + SPICC_ENH_CTL0);
+
+	writel_bits_relaxed(SPICC_FIFORST_W1_MASK, SPICC_FIFORST_W1_MASK,
+			    spicc->base + SPICC_TESTREG);
+
+	while (meson_spicc_rxready(spicc))
+		readl_relaxed(spicc->base + SPICC_RXDATA);
+
+	if (spicc->data->has_oen)
+		writel_bits_relaxed(SPICC_ENH_MAIN_CLK_AO, 0,
+				    spicc->base + SPICC_ENH_CTL0);
 }
 
 static int meson_spicc_transfer_one(struct spi_master *master,
@@ -358,8 +383,6 @@
 				    struct spi_transfer *xfer)
 {
 	struct meson_spicc_device *spicc = spi_master_get_devdata(master);
-	unsigned int burst_len;
-	u32 irq = 0;
 
 	/* Store current transfer */
 	spicc->xfer = xfer;
@@ -373,22 +396,22 @@
 	spicc->bytes_per_word =
 	   DIV_ROUND_UP(spicc->xfer->bits_per_word, 8);
 
+	if (xfer->len % spicc->bytes_per_word)
+		return -EINVAL;
+
 	/* Setup transfer parameters */
 	meson_spicc_setup_xfer(spicc, xfer);
 
-	burst_len = min_t(unsigned int,
-			  spicc->xfer_remain / spicc->bytes_per_word,
-			  SPICC_BURST_MAX);
+	meson_spicc_reset_fifo(spicc);
 
-	meson_spicc_setup_burst(spicc, burst_len);
-
-	irq = meson_spicc_setup_rx_irq(spicc, irq);
+	/* Setup burst */
+	meson_spicc_setup_burst(spicc);
 
 	/* Start burst */
 	writel_bits_relaxed(SPICC_XCH, SPICC_XCH, spicc->base + SPICC_CONREG);
 
 	/* Enable interrupts */
-	writel_relaxed(irq, spicc->base + SPICC_INTREG);
+	writel_relaxed(SPICC_TC_EN, spicc->base + SPICC_INTREG);
 
 	return 1;
 }
@@ -445,7 +468,7 @@
 	/* Setup no wait cycles by default */
 	writel_relaxed(0, spicc->base + SPICC_PERIODREG);
 
-	writel_bits_relaxed(BIT(24), BIT(24), spicc->base + SPICC_TESTREG);
+	writel_bits_relaxed(SPICC_LBC_W1, 0, spicc->base + SPICC_TESTREG);
 
 	return 0;
 }
@@ -457,9 +480,6 @@
 	/* Disable all IRQs */
 	writel(0, spicc->base + SPICC_INTREG);
 
-	/* Disable controller */
-	writel_bits_relaxed(SPICC_ENABLE, 0, spicc->base + SPICC_CONREG);
-
 	device_reset_optional(&spicc->pdev->dev);
 
 	return 0;
@@ -467,43 +487,178 @@
 
 static int meson_spicc_setup(struct spi_device *spi)
 {
-	int ret = 0;
-
 	if (!spi->controller_state)
 		spi->controller_state = spi_master_get_devdata(spi->master);
-	else if (gpio_is_valid(spi->cs_gpio))
-		goto out_gpio;
-	else if (spi->cs_gpio == -ENOENT)
-		return 0;
 
-	if (gpio_is_valid(spi->cs_gpio)) {
-		ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev));
-		if (ret) {
-			dev_err(&spi->dev, "failed to request cs gpio\n");
-			return ret;
-		}
-	}
-
-out_gpio:
-	ret = gpio_direction_output(spi->cs_gpio,
-			!(spi->mode & SPI_CS_HIGH));
-
-	return ret;
+	return 0;
 }
 
 static void meson_spicc_cleanup(struct spi_device *spi)
 {
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
-
 	spi->controller_state = NULL;
 }
 
+/*
+ * The Clock Mux
+ *            x-----------------x   x------------x    x------\
+ *        |---| pow2 fixed div  |---| pow2 div   |----|      |
+ *        |   x-----------------x   x------------x    |      |
+ * src ---|                                           | mux  |-- out
+ *        |   x-----------------x   x------------x    |      |
+ *        |---| enh fixed div   |---| enh div    |0---|      |
+ *            x-----------------x   x------------x    x------/
+ *
+ * Clk path for GX series:
+ *    src -> pow2 fixed div -> pow2 div -> out
+ *
+ * Clk path for AXG series:
+ *    src -> pow2 fixed div -> pow2 div -> mux -> out
+ *    src -> enh fixed div -> enh div -> mux -> out
+ *
+ * Clk path for G12A series:
+ *    pclk -> pow2 fixed div -> pow2 div -> mux -> out
+ *    pclk -> enh fixed div -> enh div -> mux -> out
+ */
+
+static int meson_spicc_clk_init(struct meson_spicc_device *spicc)
+{
+	struct device *dev = &spicc->pdev->dev;
+	struct clk_fixed_factor *pow2_fixed_div, *enh_fixed_div;
+	struct clk_divider *pow2_div, *enh_div;
+	struct clk_mux *mux;
+	struct clk_init_data init;
+	struct clk *clk;
+	struct clk_parent_data parent_data[2];
+	char name[64];
+
+	memset(&init, 0, sizeof(init));
+	memset(&parent_data, 0, sizeof(parent_data));
+
+	init.parent_data = parent_data;
+
+	/* algorithm for pow2 div: rate = freq / 4 / (2 ^ N) */
+
+	pow2_fixed_div = devm_kzalloc(dev, sizeof(*pow2_fixed_div), GFP_KERNEL);
+	if (!pow2_fixed_div)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "%s#pow2_fixed_div", dev_name(dev));
+	init.name = name;
+	init.ops = &clk_fixed_factor_ops;
+	init.flags = 0;
+	if (spicc->data->has_pclk)
+		parent_data[0].hw = __clk_get_hw(spicc->pclk);
+	else
+		parent_data[0].hw = __clk_get_hw(spicc->core);
+	init.num_parents = 1;
+
+	pow2_fixed_div->mult = 1,
+	pow2_fixed_div->div = 4,
+	pow2_fixed_div->hw.init = &init;
+
+	clk = devm_clk_register(dev, &pow2_fixed_div->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	pow2_div = devm_kzalloc(dev, sizeof(*pow2_div), GFP_KERNEL);
+	if (!pow2_div)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "%s#pow2_div", dev_name(dev));
+	init.name = name;
+	init.ops = &clk_divider_ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	parent_data[0].hw = &pow2_fixed_div->hw;
+	init.num_parents = 1;
+
+	pow2_div->shift = 16,
+	pow2_div->width = 3,
+	pow2_div->flags = CLK_DIVIDER_POWER_OF_TWO,
+	pow2_div->reg = spicc->base + SPICC_CONREG;
+	pow2_div->hw.init = &init;
+
+	clk = devm_clk_register(dev, &pow2_div->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	if (!spicc->data->has_enhance_clk_div) {
+		spicc->clk = clk;
+		return 0;
+	}
+
+	/* algorithm for enh div: rate = freq / 2 / (N + 1) */
+
+	enh_fixed_div = devm_kzalloc(dev, sizeof(*enh_fixed_div), GFP_KERNEL);
+	if (!enh_fixed_div)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "%s#enh_fixed_div", dev_name(dev));
+	init.name = name;
+	init.ops = &clk_fixed_factor_ops;
+	init.flags = 0;
+	if (spicc->data->has_pclk)
+		parent_data[0].hw = __clk_get_hw(spicc->pclk);
+	else
+		parent_data[0].hw = __clk_get_hw(spicc->core);
+	init.num_parents = 1;
+
+	enh_fixed_div->mult = 1,
+	enh_fixed_div->div = 2,
+	enh_fixed_div->hw.init = &init;
+
+	clk = devm_clk_register(dev, &enh_fixed_div->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	enh_div = devm_kzalloc(dev, sizeof(*enh_div), GFP_KERNEL);
+	if (!enh_div)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "%s#enh_div", dev_name(dev));
+	init.name = name;
+	init.ops = &clk_divider_ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	parent_data[0].hw = &enh_fixed_div->hw;
+	init.num_parents = 1;
+
+	enh_div->shift	= 16,
+	enh_div->width	= 8,
+	enh_div->reg = spicc->base + SPICC_ENH_CTL0;
+	enh_div->hw.init = &init;
+
+	clk = devm_clk_register(dev, &enh_div->hw);
+	if (WARN_ON(IS_ERR(clk)))
+		return PTR_ERR(clk);
+
+	mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return -ENOMEM;
+
+	snprintf(name, sizeof(name), "%s#sel", dev_name(dev));
+	init.name = name;
+	init.ops = &clk_mux_ops;
+	parent_data[0].hw = &pow2_div->hw;
+	parent_data[1].hw = &enh_div->hw;
+	init.num_parents = 2;
+	init.flags = CLK_SET_RATE_PARENT;
+
+	mux->mask = 0x1,
+	mux->shift = 24,
+	mux->reg = spicc->base + SPICC_ENH_CTL0;
+	mux->hw.init = &init;
+
+	spicc->clk = devm_clk_register(dev, &mux->hw);
+	if (WARN_ON(IS_ERR(spicc->clk)))
+		return PTR_ERR(spicc->clk);
+
+	return 0;
+}
+
 static int meson_spicc_probe(struct platform_device *pdev)
 {
 	struct spi_master *master;
 	struct meson_spicc_device *spicc;
-	int ret, irq, rate;
+	int ret, irq;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*spicc));
 	if (!master) {
@@ -513,6 +668,13 @@
 	spicc = spi_master_get_devdata(master);
 	spicc->master = master;
 
+	spicc->data = of_device_get_match_data(&pdev->dev);
+	if (!spicc->data) {
+		dev_err(&pdev->dev, "failed to get match data\n");
+		ret = -EINVAL;
+		goto out_master;
+	}
+
 	spicc->pdev = pdev;
 	platform_set_drvdata(pdev, spicc);
 
@@ -523,10 +685,19 @@
 		goto out_master;
 	}
 
+	/* Set master mode and enable controller */
+	writel_relaxed(SPICC_ENABLE | SPICC_MODE_MASTER,
+		       spicc->base + SPICC_CONREG);
+
 	/* Disable all IRQs */
 	writel_relaxed(0, spicc->base + SPICC_INTREG);
 
 	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto out_master;
+	}
+
 	ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq,
 			       0, NULL, spicc);
 	if (ret) {
@@ -541,12 +712,26 @@
 		goto out_master;
 	}
 
+	if (spicc->data->has_pclk) {
+		spicc->pclk = devm_clk_get(&pdev->dev, "pclk");
+		if (IS_ERR(spicc->pclk)) {
+			dev_err(&pdev->dev, "pclk clock request failed\n");
+			ret = PTR_ERR(spicc->pclk);
+			goto out_master;
+		}
+	}
+
 	ret = clk_prepare_enable(spicc->core);
 	if (ret) {
 		dev_err(&pdev->dev, "core clock enable failed\n");
 		goto out_master;
 	}
-	rate = clk_get_rate(spicc->core);
+
+	ret = clk_prepare_enable(spicc->pclk);
+	if (ret) {
+		dev_err(&pdev->dev, "pclk clock enable failed\n");
+		goto out_core_clk;
+	}
 
 	device_reset_optional(&pdev->dev);
 
@@ -558,18 +743,22 @@
 				     SPI_BPW_MASK(16) |
 				     SPI_BPW_MASK(8);
 	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
-	master->min_speed_hz = rate >> 9;
+	master->min_speed_hz = spicc->data->min_speed_hz;
+	master->max_speed_hz = spicc->data->max_speed_hz;
 	master->setup = meson_spicc_setup;
 	master->cleanup = meson_spicc_cleanup;
 	master->prepare_message = meson_spicc_prepare_message;
 	master->unprepare_transfer_hardware = meson_spicc_unprepare_transfer;
 	master->transfer_one = meson_spicc_transfer_one;
+	master->use_gpio_descriptors = true;
 
-	/* Setup max rate according to the Meson GX datasheet */
-	if ((rate >> 2) > SPICC_MAX_FREQ)
-		master->max_speed_hz = SPICC_MAX_FREQ;
-	else
-		master->max_speed_hz = rate >> 2;
+	meson_spicc_oen_enable(spicc);
+
+	ret = meson_spicc_clk_init(spicc);
+	if (ret) {
+		dev_err(&pdev->dev, "clock registration failed\n");
+		goto out_clk;
+	}
 
 	ret = devm_spi_register_master(&pdev->dev, master);
 	if (ret) {
@@ -580,6 +769,9 @@
 	return 0;
 
 out_clk:
+	clk_disable_unprepare(spicc->pclk);
+
+out_core_clk:
 	clk_disable_unprepare(spicc->core);
 
 out_master:
@@ -596,15 +788,49 @@
 	writel(0, spicc->base + SPICC_CONREG);
 
 	clk_disable_unprepare(spicc->core);
+	clk_disable_unprepare(spicc->pclk);
 
 	spi_master_put(spicc->master);
 
 	return 0;
 }
 
+static const struct meson_spicc_data meson_spicc_gx_data = {
+	.max_speed_hz		= 30000000,
+	.min_speed_hz		= 325000,
+	.fifo_size		= 16,
+};
+
+static const struct meson_spicc_data meson_spicc_axg_data = {
+	.max_speed_hz		= 80000000,
+	.min_speed_hz		= 325000,
+	.fifo_size		= 16,
+	.has_oen		= true,
+	.has_enhance_clk_div	= true,
+};
+
+static const struct meson_spicc_data meson_spicc_g12a_data = {
+	.max_speed_hz		= 166666666,
+	.min_speed_hz		= 50000,
+	.fifo_size		= 15,
+	.has_oen		= true,
+	.has_enhance_clk_div	= true,
+	.has_pclk		= true,
+};
+
 static const struct of_device_id meson_spicc_of_match[] = {
-	{ .compatible = "amlogic,meson-gx-spicc", },
-	{ .compatible = "amlogic,meson-axg-spicc", },
+	{
+		.compatible	= "amlogic,meson-gx-spicc",
+		.data		= &meson_spicc_gx_data,
+	},
+	{
+		.compatible = "amlogic,meson-axg-spicc",
+		.data		= &meson_spicc_axg_data,
+	},
+	{
+		.compatible = "amlogic,meson-g12a-spicc",
+		.data		= &meson_spicc_g12a_data,
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, meson_spicc_of_match);
diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
index c7b0399..c8ed781 100644
--- a/drivers/spi/spi-meson-spifc.c
+++ b/drivers/spi/spi-meson-spifc.c
@@ -70,7 +70,7 @@
  * @master:	the SPI master
  * @regmap:	regmap for device registers
  * @clk:	input clock of the built-in baud rate generator
- * @device:	the device structure
+ * @dev:	the device structure
  */
 struct meson_spifc {
 	struct spi_master *master;
@@ -349,6 +349,7 @@
 	return 0;
 out_clk:
 	clk_disable_unprepare(spifc->clk);
+	pm_runtime_disable(spifc->dev);
 out_err:
 	spi_master_put(master);
 	return ret;
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index a337b84..ea1b079 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -311,8 +311,7 @@
 			break;
 		m->actual_length += t->len;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		if (cs_change)
 			mpc512x_psc_spi_deactivate_cs(spi);
diff --git a/drivers/spi/spi-mpc52xx-psc.c b/drivers/spi/spi-mpc52xx-psc.c
index c7e478b..17935e7 100644
--- a/drivers/spi/spi-mpc52xx-psc.c
+++ b/drivers/spi/spi-mpc52xx-psc.c
@@ -234,8 +234,7 @@
 				break;
 			m->actual_length += t->len;
 
-			if (t->delay_usecs)
-				udelay(t->delay_usecs);
+			spi_transfer_delay_exec(t);
 
 			if (cs_change)
 				mpc52xx_psc_spi_deactivate_cs(spi);
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index cafb773..92a09df 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -140,7 +140,6 @@
  * supplies it.
  */
 static const struct mtk_chip_config mtk_default_chip_info = {
-	.cs_pol = 0,
 	.sample_sel = 0,
 };
 
@@ -172,6 +171,9 @@
 	{ .compatible = "mediatek,mt8183-spi",
 		.data = (void *)&mt8183_compat,
 	},
+	{ .compatible = "mediatek,mt8192-spi",
+		.data = (void *)&mt6765_compat,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, mtk_spi_of_match);
@@ -231,10 +233,12 @@
 #endif
 
 	if (mdata->dev_comp->enhance_timing) {
-		if (chip_config->cs_pol)
+		/* set CS polarity */
+		if (spi->mode & SPI_CS_HIGH)
 			reg_val |= SPI_CMD_CS_POL;
 		else
 			reg_val &= ~SPI_CMD_CS_POL;
+
 		if (chip_config->sample_sel)
 			reg_val |= SPI_CMD_SAMPLE_SEL;
 		else
@@ -265,6 +269,9 @@
 	u32 reg_val;
 	struct mtk_spi *mdata = spi_master_get_devdata(spi->master);
 
+	if (spi->mode & SPI_CS_HIGH)
+		enable = !enable;
+
 	reg_val = readl(mdata->base + SPI_CMD_REG);
 	if (!enable) {
 		reg_val |= SPI_CMD_PAUSE_EN;
@@ -533,7 +540,7 @@
 	else
 		mdata->state = MTK_SPI_IDLE;
 
-	if (!master->can_dma(master, master->cur_msg->spi, trans)) {
+	if (!master->can_dma(master, NULL, trans)) {
 		if (trans->rx_buf) {
 			cnt = mdata->xfer_len / 4;
 			ioread32_rep(mdata->base + SPI_RX_DATA_REG,
@@ -621,7 +628,6 @@
 	struct spi_master *master;
 	struct mtk_spi *mdata;
 	const struct of_device_id *of_id;
-	struct resource *res;
 	int i, irq, ret, addr_bits;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*mdata));
@@ -649,6 +655,10 @@
 
 	mdata = spi_master_get_devdata(master);
 	mdata->dev_comp = of_id->data;
+
+	if (mdata->dev_comp->enhance_timing)
+		master->mode_bits |= SPI_CS_HIGH;
+
 	if (mdata->dev_comp->must_tx)
 		master->flags = SPI_MASTER_MUST_TX;
 
@@ -684,15 +694,7 @@
 	}
 
 	platform_set_drvdata(pdev, master);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		ret = -ENODEV;
-		dev_err(&pdev->dev, "failed to determine base address\n");
-		goto err_put_master;
-	}
-
-	mdata->base = devm_ioremap_resource(&pdev->dev, res);
+	mdata->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mdata->base)) {
 		ret = PTR_ERR(mdata->base);
 		goto err_put_master;
diff --git a/drivers/spi/spi-mtk-nor.c b/drivers/spi/spi-mtk-nor.c
new file mode 100644
index 0000000..288f6c2
--- /dev/null
+++ b/drivers/spi/spi-mtk-nor.c
@@ -0,0 +1,922 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Mediatek SPI NOR controller driver
+//
+// Copyright (C) 2020 Chuanhong Guo <gch981213@gmail.com>
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+#include <linux/string.h>
+
+#define DRIVER_NAME "mtk-spi-nor"
+
+#define MTK_NOR_REG_CMD			0x00
+#define MTK_NOR_CMD_WRITE		BIT(4)
+#define MTK_NOR_CMD_PROGRAM		BIT(2)
+#define MTK_NOR_CMD_READ		BIT(0)
+#define MTK_NOR_CMD_MASK		GENMASK(5, 0)
+
+#define MTK_NOR_REG_PRG_CNT		0x04
+#define MTK_NOR_PRG_CNT_MAX		56
+#define MTK_NOR_REG_RDATA		0x0c
+
+#define MTK_NOR_REG_RADR0		0x10
+#define MTK_NOR_REG_RADR(n)		(MTK_NOR_REG_RADR0 + 4 * (n))
+#define MTK_NOR_REG_RADR3		0xc8
+
+#define MTK_NOR_REG_WDATA		0x1c
+
+#define MTK_NOR_REG_PRGDATA0		0x20
+#define MTK_NOR_REG_PRGDATA(n)		(MTK_NOR_REG_PRGDATA0 + 4 * (n))
+#define MTK_NOR_REG_PRGDATA_MAX		5
+
+#define MTK_NOR_REG_SHIFT0		0x38
+#define MTK_NOR_REG_SHIFT(n)		(MTK_NOR_REG_SHIFT0 + 4 * (n))
+#define MTK_NOR_REG_SHIFT_MAX		9
+
+#define MTK_NOR_REG_CFG1		0x60
+#define MTK_NOR_FAST_READ		BIT(0)
+
+#define MTK_NOR_REG_CFG2		0x64
+#define MTK_NOR_WR_CUSTOM_OP_EN		BIT(4)
+#define MTK_NOR_WR_BUF_EN		BIT(0)
+
+#define MTK_NOR_REG_PP_DATA		0x98
+
+#define MTK_NOR_REG_IRQ_STAT		0xa8
+#define MTK_NOR_REG_IRQ_EN		0xac
+#define MTK_NOR_IRQ_DMA			BIT(7)
+#define MTK_NOR_IRQ_MASK		GENMASK(7, 0)
+
+#define MTK_NOR_REG_CFG3		0xb4
+#define MTK_NOR_DISABLE_WREN		BIT(7)
+#define MTK_NOR_DISABLE_SR_POLL		BIT(5)
+
+#define MTK_NOR_REG_WP			0xc4
+#define MTK_NOR_ENABLE_SF_CMD		0x30
+
+#define MTK_NOR_REG_BUSCFG		0xcc
+#define MTK_NOR_4B_ADDR			BIT(4)
+#define MTK_NOR_QUAD_ADDR		BIT(3)
+#define MTK_NOR_QUAD_READ		BIT(2)
+#define MTK_NOR_DUAL_ADDR		BIT(1)
+#define MTK_NOR_DUAL_READ		BIT(0)
+#define MTK_NOR_BUS_MODE_MASK		GENMASK(4, 0)
+
+#define MTK_NOR_REG_DMA_CTL		0x718
+#define MTK_NOR_DMA_START		BIT(0)
+
+#define MTK_NOR_REG_DMA_FADR		0x71c
+#define MTK_NOR_REG_DMA_DADR		0x720
+#define MTK_NOR_REG_DMA_END_DADR	0x724
+#define MTK_NOR_REG_DMA_DADR_HB		0x738
+#define MTK_NOR_REG_DMA_END_DADR_HB	0x73c
+
+#define MTK_NOR_PRG_MAX_SIZE		6
+// Reading DMA src/dst addresses have to be 16-byte aligned
+#define MTK_NOR_DMA_ALIGN		16
+#define MTK_NOR_DMA_ALIGN_MASK		(MTK_NOR_DMA_ALIGN - 1)
+// and we allocate a bounce buffer if destination address isn't aligned.
+#define MTK_NOR_BOUNCE_BUF_SIZE		PAGE_SIZE
+
+// Buffered page program can do one 128-byte transfer
+#define MTK_NOR_PP_SIZE			128
+
+#define CLK_TO_US(sp, clkcnt)		DIV_ROUND_UP(clkcnt, sp->spi_freq / 1000000)
+
+struct mtk_nor {
+	struct spi_controller *ctlr;
+	struct device *dev;
+	void __iomem *base;
+	u8 *buffer;
+	dma_addr_t buffer_dma;
+	struct clk *spi_clk;
+	struct clk *ctlr_clk;
+	unsigned int spi_freq;
+	bool wbuf_en;
+	bool has_irq;
+	bool high_dma;
+	struct completion op_done;
+};
+
+static inline void mtk_nor_rmw(struct mtk_nor *sp, u32 reg, u32 set, u32 clr)
+{
+	u32 val = readl(sp->base + reg);
+
+	val &= ~clr;
+	val |= set;
+	writel(val, sp->base + reg);
+}
+
+static inline int mtk_nor_cmd_exec(struct mtk_nor *sp, u32 cmd, ulong clk)
+{
+	ulong delay = CLK_TO_US(sp, clk);
+	u32 reg;
+	int ret;
+
+	writel(cmd, sp->base + MTK_NOR_REG_CMD);
+	ret = readl_poll_timeout(sp->base + MTK_NOR_REG_CMD, reg, !(reg & cmd),
+				 delay / 3, (delay + 1) * 200);
+	if (ret < 0)
+		dev_err(sp->dev, "command %u timeout.\n", cmd);
+	return ret;
+}
+
+static void mtk_nor_set_addr(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	u32 addr = op->addr.val;
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		writeb(addr & 0xff, sp->base + MTK_NOR_REG_RADR(i));
+		addr >>= 8;
+	}
+	if (op->addr.nbytes == 4) {
+		writeb(addr & 0xff, sp->base + MTK_NOR_REG_RADR3);
+		mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, MTK_NOR_4B_ADDR, 0);
+	} else {
+		mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, 0, MTK_NOR_4B_ADDR);
+	}
+}
+
+static bool need_bounce(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	return ((uintptr_t)op->data.buf.in & MTK_NOR_DMA_ALIGN_MASK);
+}
+
+static bool mtk_nor_match_read(const struct spi_mem_op *op)
+{
+	int dummy = 0;
+
+	if (op->dummy.buswidth)
+		dummy = op->dummy.nbytes * BITS_PER_BYTE / op->dummy.buswidth;
+
+	if ((op->data.buswidth == 2) || (op->data.buswidth == 4)) {
+		if (op->addr.buswidth == 1)
+			return dummy == 8;
+		else if (op->addr.buswidth == 2)
+			return dummy == 4;
+		else if (op->addr.buswidth == 4)
+			return dummy == 6;
+	} else if ((op->addr.buswidth == 1) && (op->data.buswidth == 1)) {
+		if (op->cmd.opcode == 0x03)
+			return dummy == 0;
+		else if (op->cmd.opcode == 0x0b)
+			return dummy == 8;
+	}
+	return false;
+}
+
+static bool mtk_nor_match_prg(const struct spi_mem_op *op)
+{
+	int tx_len, rx_len, prg_len, prg_left;
+
+	// prg mode is spi-only.
+	if ((op->cmd.buswidth > 1) || (op->addr.buswidth > 1) ||
+	    (op->dummy.buswidth > 1) || (op->data.buswidth > 1))
+		return false;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		// count dummy bytes only if we need to write data after it
+		tx_len += op->dummy.nbytes;
+
+		// leave at least one byte for data
+		if (tx_len > MTK_NOR_REG_PRGDATA_MAX)
+			return false;
+
+		// if there's no addr, meaning adjust_op_size is impossible,
+		// check data length as well.
+		if ((!op->addr.nbytes) &&
+		    (tx_len + op->data.nbytes > MTK_NOR_REG_PRGDATA_MAX + 1))
+			return false;
+	} else if (op->data.dir == SPI_MEM_DATA_IN) {
+		if (tx_len > MTK_NOR_REG_PRGDATA_MAX + 1)
+			return false;
+
+		rx_len = op->data.nbytes;
+		prg_left = MTK_NOR_PRG_CNT_MAX / 8 - tx_len - op->dummy.nbytes;
+		if (prg_left > MTK_NOR_REG_SHIFT_MAX + 1)
+			prg_left = MTK_NOR_REG_SHIFT_MAX + 1;
+		if (rx_len > prg_left) {
+			if (!op->addr.nbytes)
+				return false;
+			rx_len = prg_left;
+		}
+
+		prg_len = tx_len + op->dummy.nbytes + rx_len;
+		if (prg_len > MTK_NOR_PRG_CNT_MAX / 8)
+			return false;
+	} else {
+		prg_len = tx_len + op->dummy.nbytes;
+		if (prg_len > MTK_NOR_PRG_CNT_MAX / 8)
+			return false;
+	}
+	return true;
+}
+
+static void mtk_nor_adj_prg_size(struct spi_mem_op *op)
+{
+	int tx_len, tx_left, prg_left;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		tx_len += op->dummy.nbytes;
+		tx_left = MTK_NOR_REG_PRGDATA_MAX + 1 - tx_len;
+		if (op->data.nbytes > tx_left)
+			op->data.nbytes = tx_left;
+	} else if (op->data.dir == SPI_MEM_DATA_IN) {
+		prg_left = MTK_NOR_PRG_CNT_MAX / 8 - tx_len - op->dummy.nbytes;
+		if (prg_left > MTK_NOR_REG_SHIFT_MAX + 1)
+			prg_left = MTK_NOR_REG_SHIFT_MAX + 1;
+		if (op->data.nbytes > prg_left)
+			op->data.nbytes = prg_left;
+	}
+}
+
+static int mtk_nor_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct mtk_nor *sp = spi_controller_get_devdata(mem->spi->master);
+
+	if (!op->data.nbytes)
+		return 0;
+
+	if ((op->addr.nbytes == 3) || (op->addr.nbytes == 4)) {
+		if ((op->data.dir == SPI_MEM_DATA_IN) &&
+		    mtk_nor_match_read(op)) {
+			// limit size to prevent timeout calculation overflow
+			if (op->data.nbytes > 0x400000)
+				op->data.nbytes = 0x400000;
+
+			if ((op->addr.val & MTK_NOR_DMA_ALIGN_MASK) ||
+			    (op->data.nbytes < MTK_NOR_DMA_ALIGN))
+				op->data.nbytes = 1;
+			else if (!need_bounce(sp, op))
+				op->data.nbytes &= ~MTK_NOR_DMA_ALIGN_MASK;
+			else if (op->data.nbytes > MTK_NOR_BOUNCE_BUF_SIZE)
+				op->data.nbytes = MTK_NOR_BOUNCE_BUF_SIZE;
+			return 0;
+		} else if (op->data.dir == SPI_MEM_DATA_OUT) {
+			if (op->data.nbytes >= MTK_NOR_PP_SIZE)
+				op->data.nbytes = MTK_NOR_PP_SIZE;
+			else
+				op->data.nbytes = 1;
+			return 0;
+		}
+	}
+
+	mtk_nor_adj_prg_size(op);
+	return 0;
+}
+
+static bool mtk_nor_supports_op(struct spi_mem *mem,
+				const struct spi_mem_op *op)
+{
+	if (!spi_mem_default_supports_op(mem, op))
+		return false;
+
+	if (op->cmd.buswidth != 1)
+		return false;
+
+	if ((op->addr.nbytes == 3) || (op->addr.nbytes == 4)) {
+		switch(op->data.dir) {
+		case SPI_MEM_DATA_IN:
+			if (mtk_nor_match_read(op))
+				return true;
+			break;
+		case SPI_MEM_DATA_OUT:
+			if ((op->addr.buswidth == 1) &&
+			    (op->dummy.nbytes == 0) &&
+			    (op->data.buswidth == 1))
+				return true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return mtk_nor_match_prg(op);
+}
+
+static void mtk_nor_setup_bus(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	u32 reg = 0;
+
+	if (op->addr.nbytes == 4)
+		reg |= MTK_NOR_4B_ADDR;
+
+	if (op->data.buswidth == 4) {
+		reg |= MTK_NOR_QUAD_READ;
+		writeb(op->cmd.opcode, sp->base + MTK_NOR_REG_PRGDATA(4));
+		if (op->addr.buswidth == 4)
+			reg |= MTK_NOR_QUAD_ADDR;
+	} else if (op->data.buswidth == 2) {
+		reg |= MTK_NOR_DUAL_READ;
+		writeb(op->cmd.opcode, sp->base + MTK_NOR_REG_PRGDATA(3));
+		if (op->addr.buswidth == 2)
+			reg |= MTK_NOR_DUAL_ADDR;
+	} else {
+		if (op->cmd.opcode == 0x0b)
+			mtk_nor_rmw(sp, MTK_NOR_REG_CFG1, MTK_NOR_FAST_READ, 0);
+		else
+			mtk_nor_rmw(sp, MTK_NOR_REG_CFG1, 0, MTK_NOR_FAST_READ);
+	}
+	mtk_nor_rmw(sp, MTK_NOR_REG_BUSCFG, reg, MTK_NOR_BUS_MODE_MASK);
+}
+
+static int mtk_nor_dma_exec(struct mtk_nor *sp, u32 from, unsigned int length,
+			    dma_addr_t dma_addr)
+{
+	int ret = 0;
+	ulong delay;
+	u32 reg;
+
+	writel(from, sp->base + MTK_NOR_REG_DMA_FADR);
+	writel(dma_addr, sp->base + MTK_NOR_REG_DMA_DADR);
+	writel(dma_addr + length, sp->base + MTK_NOR_REG_DMA_END_DADR);
+
+	if (sp->high_dma) {
+		writel(upper_32_bits(dma_addr),
+		       sp->base + MTK_NOR_REG_DMA_DADR_HB);
+		writel(upper_32_bits(dma_addr + length),
+		       sp->base + MTK_NOR_REG_DMA_END_DADR_HB);
+	}
+
+	if (sp->has_irq) {
+		reinit_completion(&sp->op_done);
+		mtk_nor_rmw(sp, MTK_NOR_REG_IRQ_EN, MTK_NOR_IRQ_DMA, 0);
+	}
+
+	mtk_nor_rmw(sp, MTK_NOR_REG_DMA_CTL, MTK_NOR_DMA_START, 0);
+
+	delay = CLK_TO_US(sp, (length + 5) * BITS_PER_BYTE);
+
+	if (sp->has_irq) {
+		if (!wait_for_completion_timeout(&sp->op_done,
+						 (delay + 1) * 100))
+			ret = -ETIMEDOUT;
+	} else {
+		ret = readl_poll_timeout(sp->base + MTK_NOR_REG_DMA_CTL, reg,
+					 !(reg & MTK_NOR_DMA_START), delay / 3,
+					 (delay + 1) * 100);
+	}
+
+	if (ret < 0)
+		dev_err(sp->dev, "dma read timeout.\n");
+
+	return ret;
+}
+
+static int mtk_nor_read_bounce(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	unsigned int rdlen;
+	int ret;
+
+	if (op->data.nbytes & MTK_NOR_DMA_ALIGN_MASK)
+		rdlen = (op->data.nbytes + MTK_NOR_DMA_ALIGN) & ~MTK_NOR_DMA_ALIGN_MASK;
+	else
+		rdlen = op->data.nbytes;
+
+	ret = mtk_nor_dma_exec(sp, op->addr.val, rdlen, sp->buffer_dma);
+
+	if (!ret)
+		memcpy(op->data.buf.in, sp->buffer, op->data.nbytes);
+
+	return ret;
+}
+
+static int mtk_nor_read_dma(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	int ret;
+	dma_addr_t dma_addr;
+
+	if (need_bounce(sp, op))
+		return mtk_nor_read_bounce(sp, op);
+
+	dma_addr = dma_map_single(sp->dev, op->data.buf.in,
+				  op->data.nbytes, DMA_FROM_DEVICE);
+
+	if (dma_mapping_error(sp->dev, dma_addr))
+		return -EINVAL;
+
+	ret = mtk_nor_dma_exec(sp, op->addr.val, op->data.nbytes, dma_addr);
+
+	dma_unmap_single(sp->dev, dma_addr, op->data.nbytes, DMA_FROM_DEVICE);
+
+	return ret;
+}
+
+static int mtk_nor_read_pio(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	u8 *buf = op->data.buf.in;
+	int ret;
+
+	ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_READ, 6 * BITS_PER_BYTE);
+	if (!ret)
+		buf[0] = readb(sp->base + MTK_NOR_REG_RDATA);
+	return ret;
+}
+
+static int mtk_nor_write_buffer_enable(struct mtk_nor *sp)
+{
+	int ret;
+	u32 val;
+
+	if (sp->wbuf_en)
+		return 0;
+
+	val = readl(sp->base + MTK_NOR_REG_CFG2);
+	writel(val | MTK_NOR_WR_BUF_EN, sp->base + MTK_NOR_REG_CFG2);
+	ret = readl_poll_timeout(sp->base + MTK_NOR_REG_CFG2, val,
+				 val & MTK_NOR_WR_BUF_EN, 0, 10000);
+	if (!ret)
+		sp->wbuf_en = true;
+	return ret;
+}
+
+static int mtk_nor_write_buffer_disable(struct mtk_nor *sp)
+{
+	int ret;
+	u32 val;
+
+	if (!sp->wbuf_en)
+		return 0;
+	val = readl(sp->base + MTK_NOR_REG_CFG2);
+	writel(val & ~MTK_NOR_WR_BUF_EN, sp->base + MTK_NOR_REG_CFG2);
+	ret = readl_poll_timeout(sp->base + MTK_NOR_REG_CFG2, val,
+				 !(val & MTK_NOR_WR_BUF_EN), 0, 10000);
+	if (!ret)
+		sp->wbuf_en = false;
+	return ret;
+}
+
+static int mtk_nor_pp_buffered(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	const u8 *buf = op->data.buf.out;
+	u32 val;
+	int ret, i;
+
+	ret = mtk_nor_write_buffer_enable(sp);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < op->data.nbytes; i += 4) {
+		val = buf[i + 3] << 24 | buf[i + 2] << 16 | buf[i + 1] << 8 |
+		      buf[i];
+		writel(val, sp->base + MTK_NOR_REG_PP_DATA);
+	}
+	return mtk_nor_cmd_exec(sp, MTK_NOR_CMD_WRITE,
+				(op->data.nbytes + 5) * BITS_PER_BYTE);
+}
+
+static int mtk_nor_pp_unbuffered(struct mtk_nor *sp,
+				 const struct spi_mem_op *op)
+{
+	const u8 *buf = op->data.buf.out;
+	int ret;
+
+	ret = mtk_nor_write_buffer_disable(sp);
+	if (ret < 0)
+		return ret;
+	writeb(buf[0], sp->base + MTK_NOR_REG_WDATA);
+	return mtk_nor_cmd_exec(sp, MTK_NOR_CMD_WRITE, 6 * BITS_PER_BYTE);
+}
+
+static int mtk_nor_spi_mem_prg(struct mtk_nor *sp, const struct spi_mem_op *op)
+{
+	int rx_len = 0;
+	int reg_offset = MTK_NOR_REG_PRGDATA_MAX;
+	int tx_len, prg_len;
+	int i, ret;
+	void __iomem *reg;
+	u8 bufbyte;
+
+	tx_len = op->cmd.nbytes + op->addr.nbytes;
+
+	// count dummy bytes only if we need to write data after it
+	if (op->data.dir == SPI_MEM_DATA_OUT)
+		tx_len += op->dummy.nbytes + op->data.nbytes;
+	else if (op->data.dir == SPI_MEM_DATA_IN)
+		rx_len = op->data.nbytes;
+
+	prg_len = op->cmd.nbytes + op->addr.nbytes + op->dummy.nbytes +
+		  op->data.nbytes;
+
+	// an invalid op may reach here if the caller calls exec_op without
+	// adjust_op_size. return -EINVAL instead of -ENOTSUPP so that
+	// spi-mem won't try this op again with generic spi transfers.
+	if ((tx_len > MTK_NOR_REG_PRGDATA_MAX + 1) ||
+	    (rx_len > MTK_NOR_REG_SHIFT_MAX + 1) ||
+	    (prg_len > MTK_NOR_PRG_CNT_MAX / 8))
+		return -EINVAL;
+
+	// fill tx data
+	for (i = op->cmd.nbytes; i > 0; i--, reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		bufbyte = (op->cmd.opcode >> ((i - 1) * BITS_PER_BYTE)) & 0xff;
+		writeb(bufbyte, reg);
+	}
+
+	for (i = op->addr.nbytes; i > 0; i--, reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		bufbyte = (op->addr.val >> ((i - 1) * BITS_PER_BYTE)) & 0xff;
+		writeb(bufbyte, reg);
+	}
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		for (i = 0; i < op->dummy.nbytes; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+			writeb(0, reg);
+		}
+
+		for (i = 0; i < op->data.nbytes; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+			writeb(((const u8 *)(op->data.buf.out))[i], reg);
+		}
+	}
+
+	for (; reg_offset >= 0; reg_offset--) {
+		reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+		writeb(0, reg);
+	}
+
+	// trigger op
+	writel(prg_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+	ret = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM,
+			       prg_len * BITS_PER_BYTE);
+	if (ret)
+		return ret;
+
+	// fetch read data
+	reg_offset = 0;
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		for (i = op->data.nbytes - 1; i >= 0; i--, reg_offset++) {
+			reg = sp->base + MTK_NOR_REG_SHIFT(reg_offset);
+			((u8 *)(op->data.buf.in))[i] = readb(reg);
+		}
+	}
+
+	return 0;
+}
+
+static int mtk_nor_exec_op(struct spi_mem *mem, const struct spi_mem_op *op)
+{
+	struct mtk_nor *sp = spi_controller_get_devdata(mem->spi->master);
+	int ret;
+
+	if ((op->data.nbytes == 0) ||
+	    ((op->addr.nbytes != 3) && (op->addr.nbytes != 4)))
+		return mtk_nor_spi_mem_prg(sp, op);
+
+	if (op->data.dir == SPI_MEM_DATA_OUT) {
+		mtk_nor_set_addr(sp, op);
+		writeb(op->cmd.opcode, sp->base + MTK_NOR_REG_PRGDATA0);
+		if (op->data.nbytes == MTK_NOR_PP_SIZE)
+			return mtk_nor_pp_buffered(sp, op);
+		return mtk_nor_pp_unbuffered(sp, op);
+	}
+
+	if ((op->data.dir == SPI_MEM_DATA_IN) && mtk_nor_match_read(op)) {
+		ret = mtk_nor_write_buffer_disable(sp);
+		if (ret < 0)
+			return ret;
+		mtk_nor_setup_bus(sp, op);
+		if (op->data.nbytes == 1) {
+			mtk_nor_set_addr(sp, op);
+			return mtk_nor_read_pio(sp, op);
+		} else {
+			return mtk_nor_read_dma(sp, op);
+		}
+	}
+
+	return mtk_nor_spi_mem_prg(sp, op);
+}
+
+static int mtk_nor_setup(struct spi_device *spi)
+{
+	struct mtk_nor *sp = spi_controller_get_devdata(spi->master);
+
+	if (spi->max_speed_hz && (spi->max_speed_hz < sp->spi_freq)) {
+		dev_err(&spi->dev, "spi clock should be %u Hz.\n",
+			sp->spi_freq);
+		return -EINVAL;
+	}
+	spi->max_speed_hz = sp->spi_freq;
+
+	return 0;
+}
+
+static int mtk_nor_transfer_one_message(struct spi_controller *master,
+					struct spi_message *m)
+{
+	struct mtk_nor *sp = spi_controller_get_devdata(master);
+	struct spi_transfer *t = NULL;
+	unsigned long trx_len = 0;
+	int stat = 0;
+	int reg_offset = MTK_NOR_REG_PRGDATA_MAX;
+	void __iomem *reg;
+	const u8 *txbuf;
+	u8 *rxbuf;
+	int i;
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		txbuf = t->tx_buf;
+		for (i = 0; i < t->len; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_PRGDATA(reg_offset);
+			if (txbuf)
+				writeb(txbuf[i], reg);
+			else
+				writeb(0, reg);
+		}
+		trx_len += t->len;
+	}
+
+	writel(trx_len * BITS_PER_BYTE, sp->base + MTK_NOR_REG_PRG_CNT);
+
+	stat = mtk_nor_cmd_exec(sp, MTK_NOR_CMD_PROGRAM,
+				trx_len * BITS_PER_BYTE);
+	if (stat < 0)
+		goto msg_done;
+
+	reg_offset = trx_len - 1;
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		rxbuf = t->rx_buf;
+		for (i = 0; i < t->len; i++, reg_offset--) {
+			reg = sp->base + MTK_NOR_REG_SHIFT(reg_offset);
+			if (rxbuf)
+				rxbuf[i] = readb(reg);
+		}
+	}
+
+	m->actual_length = trx_len;
+msg_done:
+	m->status = stat;
+	spi_finalize_current_message(master);
+
+	return 0;
+}
+
+static void mtk_nor_disable_clk(struct mtk_nor *sp)
+{
+	clk_disable_unprepare(sp->spi_clk);
+	clk_disable_unprepare(sp->ctlr_clk);
+}
+
+static int mtk_nor_enable_clk(struct mtk_nor *sp)
+{
+	int ret;
+
+	ret = clk_prepare_enable(sp->spi_clk);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(sp->ctlr_clk);
+	if (ret) {
+		clk_disable_unprepare(sp->spi_clk);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mtk_nor_init(struct mtk_nor *sp)
+{
+	writel(0, sp->base + MTK_NOR_REG_IRQ_EN);
+	writel(MTK_NOR_IRQ_MASK, sp->base + MTK_NOR_REG_IRQ_STAT);
+
+	writel(MTK_NOR_ENABLE_SF_CMD, sp->base + MTK_NOR_REG_WP);
+	mtk_nor_rmw(sp, MTK_NOR_REG_CFG2, MTK_NOR_WR_CUSTOM_OP_EN, 0);
+	mtk_nor_rmw(sp, MTK_NOR_REG_CFG3,
+		    MTK_NOR_DISABLE_WREN | MTK_NOR_DISABLE_SR_POLL, 0);
+}
+
+static irqreturn_t mtk_nor_irq_handler(int irq, void *data)
+{
+	struct mtk_nor *sp = data;
+	u32 irq_status, irq_enabled;
+
+	irq_status = readl(sp->base + MTK_NOR_REG_IRQ_STAT);
+	irq_enabled = readl(sp->base + MTK_NOR_REG_IRQ_EN);
+	// write status back to clear interrupt
+	writel(irq_status, sp->base + MTK_NOR_REG_IRQ_STAT);
+
+	if (!(irq_status & irq_enabled))
+		return IRQ_NONE;
+
+	if (irq_status & MTK_NOR_IRQ_DMA) {
+		complete(&sp->op_done);
+		writel(0, sp->base + MTK_NOR_REG_IRQ_EN);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static size_t mtk_max_msg_size(struct spi_device *spi)
+{
+	return MTK_NOR_PRG_MAX_SIZE;
+}
+
+static const struct spi_controller_mem_ops mtk_nor_mem_ops = {
+	.adjust_op_size = mtk_nor_adjust_op_size,
+	.supports_op = mtk_nor_supports_op,
+	.exec_op = mtk_nor_exec_op
+};
+
+static const struct of_device_id mtk_nor_match[] = {
+	{ .compatible = "mediatek,mt8192-nor", .data = (void *)36 },
+	{ .compatible = "mediatek,mt8173-nor", .data = (void *)32 },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, mtk_nor_match);
+
+static int mtk_nor_probe(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr;
+	struct mtk_nor *sp;
+	void __iomem *base;
+	struct clk *spi_clk, *ctlr_clk;
+	int ret, irq;
+	unsigned long dma_bits;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	spi_clk = devm_clk_get(&pdev->dev, "spi");
+	if (IS_ERR(spi_clk))
+		return PTR_ERR(spi_clk);
+
+	ctlr_clk = devm_clk_get(&pdev->dev, "sf");
+	if (IS_ERR(ctlr_clk))
+		return PTR_ERR(ctlr_clk);
+
+	dma_bits = (unsigned long)of_device_get_match_data(&pdev->dev);
+	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(dma_bits))) {
+		dev_err(&pdev->dev, "failed to set dma mask(%lu)\n", dma_bits);
+		return -EINVAL;
+	}
+
+	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*sp));
+	if (!ctlr) {
+		dev_err(&pdev->dev, "failed to allocate spi controller\n");
+		return -ENOMEM;
+	}
+
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->max_message_size = mtk_max_msg_size;
+	ctlr->mem_ops = &mtk_nor_mem_ops;
+	ctlr->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD | SPI_TX_DUAL | SPI_TX_QUAD;
+	ctlr->num_chipselect = 1;
+	ctlr->setup = mtk_nor_setup;
+	ctlr->transfer_one_message = mtk_nor_transfer_one_message;
+	ctlr->auto_runtime_pm = true;
+
+	dev_set_drvdata(&pdev->dev, ctlr);
+
+	sp = spi_controller_get_devdata(ctlr);
+	sp->base = base;
+	sp->has_irq = false;
+	sp->wbuf_en = false;
+	sp->ctlr = ctlr;
+	sp->dev = &pdev->dev;
+	sp->spi_clk = spi_clk;
+	sp->ctlr_clk = ctlr_clk;
+	sp->high_dma = (dma_bits > 32);
+	sp->buffer = dmam_alloc_coherent(&pdev->dev,
+				MTK_NOR_BOUNCE_BUF_SIZE + MTK_NOR_DMA_ALIGN,
+				&sp->buffer_dma, GFP_KERNEL);
+	if (!sp->buffer)
+		return -ENOMEM;
+
+	if ((uintptr_t)sp->buffer & MTK_NOR_DMA_ALIGN_MASK) {
+		dev_err(sp->dev, "misaligned allocation of internal buffer.\n");
+		return -ENOMEM;
+	}
+
+	ret = mtk_nor_enable_clk(sp);
+	if (ret < 0)
+		return ret;
+
+	sp->spi_freq = clk_get_rate(sp->spi_clk);
+
+	mtk_nor_init(sp);
+
+	irq = platform_get_irq_optional(pdev, 0);
+
+	if (irq < 0) {
+		dev_warn(sp->dev, "IRQ not available.");
+	} else {
+		ret = devm_request_irq(sp->dev, irq, mtk_nor_irq_handler, 0,
+				       pdev->name, sp);
+		if (ret < 0) {
+			dev_warn(sp->dev, "failed to request IRQ.");
+		} else {
+			init_completion(&sp->op_done);
+			sp->has_irq = true;
+		}
+	}
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, -1);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret < 0)
+		goto err_probe;
+
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
+
+	dev_info(&pdev->dev, "spi frequency: %d Hz\n", sp->spi_freq);
+
+	return 0;
+
+err_probe:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+
+	mtk_nor_disable_clk(sp);
+
+	return ret;
+}
+
+static int mtk_nor_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(&pdev->dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+
+	mtk_nor_disable_clk(sp);
+
+	return 0;
+}
+
+static int __maybe_unused mtk_nor_runtime_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+
+	mtk_nor_disable_clk(sp);
+
+	return 0;
+}
+
+static int __maybe_unused mtk_nor_runtime_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct mtk_nor *sp = spi_controller_get_devdata(ctlr);
+
+	return mtk_nor_enable_clk(sp);
+}
+
+static int __maybe_unused mtk_nor_suspend(struct device *dev)
+{
+	return pm_runtime_force_suspend(dev);
+}
+
+static int __maybe_unused mtk_nor_resume(struct device *dev)
+{
+	return pm_runtime_force_resume(dev);
+}
+
+static const struct dev_pm_ops mtk_nor_pm_ops = {
+	SET_RUNTIME_PM_OPS(mtk_nor_runtime_suspend,
+			   mtk_nor_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(mtk_nor_suspend, mtk_nor_resume)
+};
+
+static struct platform_driver mtk_nor_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+		.of_match_table = mtk_nor_match,
+		.pm = &mtk_nor_pm_ops,
+	},
+	.probe = mtk_nor_probe,
+	.remove = mtk_nor_remove,
+};
+
+module_platform_driver(mtk_nor_driver);
+
+MODULE_DESCRIPTION("Mediatek SPI NOR controller driver");
+MODULE_AUTHOR("Chuanhong Guo <gch981213@gmail.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c
new file mode 100644
index 0000000..9708b78
--- /dev/null
+++ b/drivers/spi/spi-mux.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// General Purpose SPI multiplexer
+
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mux/consumer.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+
+#define SPI_MUX_NO_CS	((unsigned int)-1)
+
+/**
+ * DOC: Driver description
+ *
+ * This driver supports a MUX on an SPI bus. This can be useful when you need
+ * more chip selects than the hardware peripherals support, or than are
+ * available in a particular board setup.
+ *
+ * The driver will create an additional SPI controller. Devices added under the
+ * mux will be handled as 'chip selects' on this controller.
+ */
+
+/**
+ * struct spi_mux_priv - the basic spi_mux structure
+ * @spi:		pointer to the device struct attached to the parent
+ *			spi controller
+ * @current_cs:		The current chip select set in the mux
+ * @child_msg_complete: The mux replaces the complete callback in the child's
+ *			message to its own callback; this field is used by the
+ *			driver to store the child's callback during a transfer
+ * @child_msg_context:	Used to store the child's context to the callback
+ * @child_msg_dev:	Used to store the spi_device pointer to the child
+ * @mux:		mux_control structure used to provide chip selects for
+ *			downstream spi devices
+ */
+struct spi_mux_priv {
+	struct spi_device	*spi;
+	unsigned int		current_cs;
+
+	void			(*child_msg_complete)(void *context);
+	void			*child_msg_context;
+	struct spi_device	*child_msg_dev;
+	struct mux_control	*mux;
+};
+
+/* should not get called when the parent controller is doing a transfer */
+static int spi_mux_select(struct spi_device *spi)
+{
+	struct spi_mux_priv *priv = spi_controller_get_devdata(spi->controller);
+	int ret;
+
+	ret = mux_control_select(priv->mux, spi->chip_select);
+	if (ret)
+		return ret;
+
+	if (priv->current_cs == spi->chip_select)
+		return 0;
+
+	dev_dbg(&priv->spi->dev, "setting up the mux for cs %d\n",
+		spi->chip_select);
+
+	/* copy the child device's settings except for the cs */
+	priv->spi->max_speed_hz = spi->max_speed_hz;
+	priv->spi->mode = spi->mode;
+	priv->spi->bits_per_word = spi->bits_per_word;
+
+	priv->current_cs = spi->chip_select;
+
+	return 0;
+}
+
+static int spi_mux_setup(struct spi_device *spi)
+{
+	struct spi_mux_priv *priv = spi_controller_get_devdata(spi->controller);
+
+	/*
+	 * can be called multiple times, won't do a valid setup now but we will
+	 * change the settings when we do a transfer (necessary because we
+	 * can't predict from which device it will be anyway)
+	 */
+	return spi_setup(priv->spi);
+}
+
+static void spi_mux_complete_cb(void *context)
+{
+	struct spi_mux_priv *priv = (struct spi_mux_priv *)context;
+	struct spi_controller *ctlr = spi_get_drvdata(priv->spi);
+	struct spi_message *m = ctlr->cur_msg;
+
+	m->complete = priv->child_msg_complete;
+	m->context = priv->child_msg_context;
+	m->spi = priv->child_msg_dev;
+	spi_finalize_current_message(ctlr);
+	mux_control_deselect(priv->mux);
+}
+
+static int spi_mux_transfer_one_message(struct spi_controller *ctlr,
+						struct spi_message *m)
+{
+	struct spi_mux_priv *priv = spi_controller_get_devdata(ctlr);
+	struct spi_device *spi = m->spi;
+	int ret;
+
+	ret = spi_mux_select(spi);
+	if (ret)
+		return ret;
+
+	/*
+	 * Replace the complete callback, context and spi_device with our own
+	 * pointers. Save originals
+	 */
+	priv->child_msg_complete = m->complete;
+	priv->child_msg_context = m->context;
+	priv->child_msg_dev = m->spi;
+
+	m->complete = spi_mux_complete_cb;
+	m->context = priv;
+	m->spi = priv->spi;
+
+	/* do the transfer */
+	return spi_async(priv->spi, m);
+}
+
+static int spi_mux_probe(struct spi_device *spi)
+{
+	struct spi_controller *ctlr;
+	struct spi_mux_priv *priv;
+	int ret;
+
+	ctlr = spi_alloc_master(&spi->dev, sizeof(*priv));
+	if (!ctlr)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ctlr);
+	priv = spi_controller_get_devdata(ctlr);
+	priv->spi = spi;
+
+	priv->mux = devm_mux_control_get(&spi->dev, NULL);
+	if (IS_ERR(priv->mux)) {
+		ret = dev_err_probe(&spi->dev, PTR_ERR(priv->mux),
+				    "failed to get control-mux\n");
+		goto err_put_ctlr;
+	}
+
+	priv->current_cs = SPI_MUX_NO_CS;
+
+	/* supported modes are the same as our parent's */
+	ctlr->mode_bits = spi->controller->mode_bits;
+	ctlr->flags = spi->controller->flags;
+	ctlr->transfer_one_message = spi_mux_transfer_one_message;
+	ctlr->setup = spi_mux_setup;
+	ctlr->num_chipselect = mux_control_states(priv->mux);
+	ctlr->bus_num = -1;
+	ctlr->dev.of_node = spi->dev.of_node;
+
+	ret = devm_spi_register_controller(&spi->dev, ctlr);
+	if (ret)
+		goto err_put_ctlr;
+
+	return 0;
+
+err_put_ctlr:
+	spi_controller_put(ctlr);
+
+	return ret;
+}
+
+static const struct spi_device_id spi_mux_id[] = {
+	{ "spi-mux" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, spi_mux_id);
+
+static const struct of_device_id spi_mux_of_match[] = {
+	{ .compatible = "spi-mux" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, spi_mux_of_match);
+
+static struct spi_driver spi_mux_driver = {
+	.probe  = spi_mux_probe,
+	.driver = {
+		.name   = "spi-mux",
+		.of_match_table = spi_mux_of_match,
+	},
+	.id_table = spi_mux_id,
+};
+
+module_spi_driver(spi_mux_driver);
+
+MODULE_DESCRIPTION("SPI multiplexer");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-mxic.c b/drivers/spi/spi-mxic.c
index eba706d..96b4182 100644
--- a/drivers/spi/spi-mxic.c
+++ b/drivers/spi/spi-mxic.c
@@ -145,8 +145,8 @@
 #define LWR_SUSP_CTRL_EN	BIT(31)
 
 #define DMAS_CTRL		0x9c
-#define DMAS_CTRL_DIR_READ	BIT(31)
-#define DMAS_CTRL_EN		BIT(30)
+#define DMAS_CTRL_EN		BIT(31)
+#define DMAS_CTRL_DIR_READ	BIT(30)
 
 #define DATA_STROB		0xa0
 #define DATA_STROB_EDO_EN	BIT(2)
@@ -275,7 +275,7 @@
 	writel(0, mxic->regs + HC_EN);
 	writel(0, mxic->regs + LRD_CFG);
 	writel(0, mxic->regs + LRD_CTRL);
-	writel(HC_CFG_NIO(1) | HC_CFG_TYPE(0, HC_CFG_TYPE_SPI_NAND) |
+	writel(HC_CFG_NIO(1) | HC_CFG_TYPE(0, HC_CFG_TYPE_SPI_NOR) |
 	       HC_CFG_SLV_ACT(0) | HC_CFG_MAN_CS_EN | HC_CFG_IDLE_SIO_LVL(1),
 	       mxic->regs + HC_CFG);
 }
@@ -346,7 +346,7 @@
 	if (op->addr.nbytes > 7)
 		return false;
 
-	return true;
+	return spi_mem_default_supports_op(mem, op);
 }
 
 static int mxic_spi_mem_exec_op(struct spi_mem *mem,
@@ -356,6 +356,7 @@
 	int nio = 1, i, ret;
 	u32 ss_ctrl;
 	u8 addr[8];
+	u8 opcode = op->cmd.opcode;
 
 	ret = mxic_spi_set_freq(mxic, mem->spi->max_speed_hz);
 	if (ret)
@@ -393,7 +394,7 @@
 	writel(readl(mxic->regs + HC_CFG) | HC_CFG_MAN_CS_ASSERT,
 	       mxic->regs + HC_CFG);
 
-	ret = mxic_spi_data_xfer(mxic, &op->cmd.opcode, NULL, 1);
+	ret = mxic_spi_data_xfer(mxic, &opcode, NULL, 1);
 	if (ret)
 		goto out;
 
diff --git a/drivers/spi/spi-mxs.c b/drivers/spi/spi-mxs.c
index 34856c9..435309b 100644
--- a/drivers/spi/spi-mxs.c
+++ b/drivers/spi/spi-mxs.c
@@ -22,7 +22,6 @@
 #include <linux/ioport.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
@@ -32,7 +31,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/completion.h>
-#include <linux/gpio.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
@@ -590,10 +589,10 @@
 	if (ret)
 		goto out_master_free;
 
-	ssp->dmach = dma_request_slave_channel(&pdev->dev, "rx-tx");
-	if (!ssp->dmach) {
+	ssp->dmach = dma_request_chan(&pdev->dev, "rx-tx");
+	if (IS_ERR(ssp->dmach)) {
 		dev_err(ssp->dev, "Failed to request DMA\n");
-		ret = -ENODEV;
+		ret = PTR_ERR(ssp->dmach);
 		goto out_master_free;
 	}
 
diff --git a/drivers/spi/spi-npcm-fiu.c b/drivers/spi/spi-npcm-fiu.c
index 1e770d6..b62471a 100644
--- a/drivers/spi/spi-npcm-fiu.c
+++ b/drivers/spi/spi-npcm-fiu.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Nuvoton Technology corporation.
 
+#include <linux/bits.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -177,7 +178,6 @@
 #define MAP_SIZE_16MB			0x1000000
 #define MAP_SIZE_8MB			0x800000
 
-#define NUM_BITS_IN_BYTE		8
 #define FIU_DRD_MAX_DUMMY_NUMBER	3
 #define NPCM_MAX_CHIP_NUM		4
 #define CHUNK_SIZE			16
@@ -252,8 +252,8 @@
 	fiu->drd_op.addr.buswidth = op->addr.buswidth;
 	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
 			   NPCM_FIU_DRD_CFG_DBW,
-			   ((op->dummy.nbytes * ilog2(op->addr.buswidth))
-			    / NUM_BITS_IN_BYTE) << NPCM_FIU_DRD_DBW_SHIFT);
+			   ((op->dummy.nbytes * ilog2(op->addr.buswidth)) / BITS_PER_BYTE)
+			   << NPCM_FIU_DRD_DBW_SHIFT);
 	fiu->drd_op.dummy.nbytes = op->dummy.nbytes;
 	regmap_update_bits(fiu->regmap, NPCM_FIU_DRD_CFG,
 			   NPCM_FIU_DRD_CFG_RDCMD, op->cmd.opcode);
@@ -603,7 +603,7 @@
 
 	if (!chip->flash_region_mapped_ptr) {
 		chip->flash_region_mapped_ptr =
-			devm_ioremap_nocache(fiu->dev, (fiu->res_mem->start +
+			devm_ioremap(fiu->dev, (fiu->res_mem->start +
 							(fiu->info->max_map_size *
 						    desc->mem->spi->chip_select)),
 					     (u32)desc->info.length);
diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c
index b191d57..56d10c4 100644
--- a/drivers/spi/spi-npcm-pspi.c
+++ b/drivers/spi/spi-npcm-pspi.c
@@ -10,8 +10,7 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/reset.h>
 
 #include <asm/unaligned.h>
 
@@ -20,7 +19,7 @@
 
 struct npcm_pspi {
 	struct completion xfer_done;
-	struct regmap *rst_regmap;
+	struct reset_control *reset;
 	struct spi_master *master;
 	unsigned int tx_bytes;
 	unsigned int rx_bytes;
@@ -59,12 +58,6 @@
 #define NPCM_PSPI_MIN_CLK_DIVIDER	4
 #define NPCM_PSPI_DEFAULT_CLK		25000000
 
-/* reset register */
-#define NPCM7XX_IPSRST2_OFFSET	0x24
-
-#define NPCM7XX_PSPI1_RESET	BIT(22)
-#define NPCM7XX_PSPI2_RESET	BIT(23)
-
 static inline unsigned int bytes_per_word(unsigned int bits)
 {
 	return bits <= 8 ? 1 : 2;
@@ -178,6 +171,13 @@
 		priv->mode = spi->mode;
 	}
 
+	/*
+	 * If transfer is even length, and 8 bits per word transfer,
+	 * then implement 16 bits-per-word transfer.
+	 */
+	if (priv->bits_per_word == 8 && !(t->len & 0x1))
+		t->bits_per_word = 16;
+
 	if (!priv->is_save_param || priv->bits_per_word != t->bits_per_word) {
 		npcm_pspi_set_transfer_size(priv, t->bits_per_word);
 		priv->bits_per_word = t->bits_per_word;
@@ -195,6 +195,7 @@
 static void npcm_pspi_send(struct npcm_pspi *priv)
 {
 	int wsize;
+	u16 val;
 
 	wsize = min(bytes_per_word(priv->bits_per_word), priv->tx_bytes);
 	priv->tx_bytes -= wsize;
@@ -204,17 +205,18 @@
 
 	switch (wsize) {
 	case 1:
-		iowrite8(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		val = *priv->tx_buf++;
+		iowrite8(val, NPCM_PSPI_DATA + priv->base);
 		break;
 	case 2:
-		iowrite16(*priv->tx_buf, NPCM_PSPI_DATA + priv->base);
+		val = *priv->tx_buf++;
+		val = *priv->tx_buf++ | (val << 8);
+		iowrite16(val, NPCM_PSPI_DATA + priv->base);
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		return;
 	}
-
-	priv->tx_buf += wsize;
 }
 
 static void npcm_pspi_recv(struct npcm_pspi *priv)
@@ -230,18 +232,17 @@
 
 	switch (rsize) {
 	case 1:
-		val = ioread8(priv->base + NPCM_PSPI_DATA);
+		*priv->rx_buf++ = ioread8(priv->base + NPCM_PSPI_DATA);
 		break;
 	case 2:
 		val = ioread16(priv->base + NPCM_PSPI_DATA);
+		*priv->rx_buf++ = (val >> 8);
+		*priv->rx_buf++ = val & 0xff;
 		break;
 	default:
 		WARN_ON_ONCE(1);
 		return;
 	}
-
-	*priv->rx_buf = val;
-	priv->rx_buf += rsize;
 }
 
 static int npcm_pspi_transfer_one(struct spi_master *master,
@@ -285,15 +286,14 @@
 
 static void npcm_pspi_reset_hw(struct npcm_pspi *priv)
 {
-	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET,
-		     NPCM7XX_PSPI1_RESET << priv->id);
-	regmap_write(priv->rst_regmap, NPCM7XX_IPSRST2_OFFSET, 0x0);
+	reset_control_assert(priv->reset);
+	udelay(5);
+	reset_control_deassert(priv->reset);
 }
 
 static irqreturn_t npcm_pspi_handler(int irq, void *dev_id)
 {
 	struct npcm_pspi *priv = dev_id;
-	u16 val;
 	u8 stat;
 
 	stat = ioread8(priv->base + NPCM_PSPI_STAT);
@@ -303,7 +303,7 @@
 
 	if (priv->tx_buf) {
 		if (stat & NPCM_PSPI_STAT_RBF) {
-			val = ioread8(NPCM_PSPI_DATA + priv->base);
+			ioread8(NPCM_PSPI_DATA + priv->base);
 			if (priv->tx_bytes == 0) {
 				npcm_pspi_disable(priv);
 				complete(&priv->xfer_done);
@@ -342,20 +342,9 @@
 	struct npcm_pspi *priv;
 	struct spi_master *master;
 	unsigned long clk_hz;
-	struct device_node *np = pdev->dev.of_node;
-	int num_cs, i;
-	int csgpio;
 	int irq;
 	int ret;
 
-	num_cs = of_gpio_named_count(np, "cs-gpios");
-	if (num_cs < 0)
-		return num_cs;
-
-	pdev->id = of_alias_get_id(np, "spi");
-	if (pdev->id < 0)
-		pdev->id = 0;
-
 	master = spi_alloc_master(&pdev->dev, sizeof(*priv));
 	if (!master)
 		return -ENOMEM;
@@ -365,7 +354,6 @@
 	priv = spi_master_get_devdata(master);
 	priv->master = master;
 	priv->is_save_param = false;
-	priv->id = pdev->id;
 
 	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
@@ -390,11 +378,10 @@
 		goto out_disable_clk;
 	}
 
-	priv->rst_regmap =
-		syscon_regmap_lookup_by_compatible("nuvoton,npcm750-rst");
-	if (IS_ERR(priv->rst_regmap)) {
-		dev_err(&pdev->dev, "failed to find nuvoton,npcm750-rst\n");
-		return PTR_ERR(priv->rst_regmap);
+	priv->reset = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->reset)) {
+		ret = PTR_ERR(priv->reset);
+		goto out_disable_clk;
 	}
 
 	/* reset SPI-HW block */
@@ -415,31 +402,14 @@
 	master->min_speed_hz = DIV_ROUND_UP(clk_hz, NPCM_PSPI_MAX_CLK_DIVIDER);
 	master->mode_bits = SPI_CPHA | SPI_CPOL;
 	master->dev.of_node = pdev->dev.of_node;
-	master->bus_num = pdev->id;
+	master->bus_num = -1;
 	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
 	master->transfer_one = npcm_pspi_transfer_one;
 	master->prepare_transfer_hardware =
 		npcm_pspi_prepare_transfer_hardware;
 	master->unprepare_transfer_hardware =
 		npcm_pspi_unprepare_transfer_hardware;
-	master->num_chipselect = num_cs;
-
-	for (i = 0; i < num_cs; i++) {
-		csgpio = of_get_named_gpio(np, "cs-gpios", i);
-		if (csgpio < 0) {
-			dev_err(&pdev->dev, "failed to get csgpio#%u\n", i);
-			goto out_disable_clk;
-		}
-		dev_dbg(&pdev->dev, "csgpio#%u = %d\n", i, csgpio);
-		ret = devm_gpio_request_one(&pdev->dev, csgpio,
-					    GPIOF_OUT_INIT_HIGH, DRIVER_NAME);
-		if (ret < 0) {
-			dev_err(&pdev->dev,
-				"failed to configure csgpio#%u %d\n"
-				, i, csgpio);
-			goto out_disable_clk;
-		}
-	}
+	master->use_gpio_descriptors = true;
 
 	/* set to default clock rate */
 	npcm_pspi_set_baudrate(priv, NPCM_PSPI_DEFAULT_CLK);
@@ -448,7 +418,7 @@
 	if (ret)
 		goto out_disable_clk;
 
-	pr_info("NPCM Peripheral SPI %d probed\n", pdev->id);
+	pr_info("NPCM Peripheral SPI %d probed\n", master->bus_num);
 
 	return 0;
 
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index 36a44a8..bcc0b5a 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -3,7 +3,8 @@
 /*
  * NXP FlexSPI(FSPI) controller driver.
  *
- * Copyright 2019 NXP.
+ * Copyright 2019-2020 NXP
+ * Copyright 2020 Puresoftware Ltd.
  *
  * FlexSPI is a flexsible SPI host controller which supports two SPI
  * channels and up to 4 external devices. Each channel supports
@@ -30,6 +31,7 @@
  *     Frieder Schrempf <frieder.schrempf@kontron.de>
  */
 
+#include <linux/acpi.h>
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
@@ -307,6 +309,7 @@
 
 #define POLL_TOUT		5000
 #define NXP_FSPI_MAX_CHIPSELECT		4
+#define NXP_FSPI_MIN_IOMAP	SZ_4M
 
 struct nxp_fspi_devtype_data {
 	unsigned int rxfifo;
@@ -324,11 +327,29 @@
 	.little_endian = true,  /* little-endian    */
 };
 
+static const struct nxp_fspi_devtype_data imx8mm_data = {
+	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
+	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
+	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
+	.quirks = 0,
+	.little_endian = true,  /* little-endian    */
+};
+
+static const struct nxp_fspi_devtype_data imx8qxp_data = {
+	.rxfifo = SZ_512,       /* (64  * 64 bits)  */
+	.txfifo = SZ_1K,        /* (128 * 64 bits)  */
+	.ahb_buf_size = SZ_2K,  /* (256 * 64 bits)  */
+	.quirks = 0,
+	.little_endian = true,  /* little-endian    */
+};
+
 struct nxp_fspi {
 	void __iomem *iobase;
 	void __iomem *ahb_addr;
 	u32 memmap_phy;
 	u32 memmap_phy_size;
+	u32 memmap_start;
+	u32 memmap_len;
 	struct clk *clk, *clk_en;
 	struct device *dev;
 	struct completion c;
@@ -544,6 +565,9 @@
 {
 	int ret;
 
+	if (is_acpi_node(f->dev->fwnode))
+		return 0;
+
 	ret = clk_prepare_enable(f->clk_en);
 	if (ret)
 		return ret;
@@ -557,10 +581,15 @@
 	return 0;
 }
 
-static void nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
+static int nxp_fspi_clk_disable_unprep(struct nxp_fspi *f)
 {
+	if (is_acpi_node(f->dev->fwnode))
+		return 0;
+
 	clk_disable_unprepare(f->clk);
 	clk_disable_unprepare(f->clk_en);
+
+	return 0;
 }
 
 /*
@@ -641,12 +670,35 @@
 	f->selected = spi->chip_select;
 }
 
-static void nxp_fspi_read_ahb(struct nxp_fspi *f, const struct spi_mem_op *op)
+static int nxp_fspi_read_ahb(struct nxp_fspi *f, const struct spi_mem_op *op)
 {
+	u32 start = op->addr.val;
 	u32 len = op->data.nbytes;
 
+	/* if necessary, ioremap before AHB read */
+	if ((!f->ahb_addr) || start < f->memmap_start ||
+	     start + len > f->memmap_start + f->memmap_len) {
+		if (f->ahb_addr)
+			iounmap(f->ahb_addr);
+
+		f->memmap_start = start;
+		f->memmap_len = len > NXP_FSPI_MIN_IOMAP ?
+				len : NXP_FSPI_MIN_IOMAP;
+
+		f->ahb_addr = ioremap_wc(f->memmap_phy + f->memmap_start,
+					 f->memmap_len);
+
+		if (!f->ahb_addr) {
+			dev_err(f->dev, "failed to alloc memory\n");
+			return -ENOMEM;
+		}
+	}
+
 	/* Read out the data directly from the AHB buffer. */
-	memcpy_fromio(op->data.buf.in, (f->ahb_addr + op->addr.val), len);
+	memcpy_fromio(op->data.buf.in,
+		      f->ahb_addr + start - f->memmap_start, len);
+
+	return 0;
 }
 
 static void nxp_fspi_fill_txfifo(struct nxp_fspi *f,
@@ -806,7 +858,7 @@
 	 */
 	if (op->data.nbytes > (f->devtype_data->rxfifo - 4) &&
 	    op->data.dir == SPI_MEM_DATA_IN) {
-		nxp_fspi_read_ahb(f, op);
+		err = nxp_fspi_read_ahb(f, op);
 	} else {
 		if (op->data.nbytes && op->data.dir == SPI_MEM_DATA_OUT)
 			nxp_fspi_fill_txfifo(f, op);
@@ -871,8 +923,9 @@
 	fspi_writel(f, FSPI_DLLBCR_OVRDEN, base + FSPI_DLLBCR);
 
 	/* enable module */
-	fspi_writel(f, FSPI_MCR0_AHB_TIMEOUT(0xFF) | FSPI_MCR0_IP_TIMEOUT(0xFF),
-		 base + FSPI_MCR0);
+	fspi_writel(f, FSPI_MCR0_AHB_TIMEOUT(0xFF) |
+		    FSPI_MCR0_IP_TIMEOUT(0xFF) | (u32) FSPI_MCR0_OCTCOMB_EN,
+		    base + FSPI_MCR0);
 
 	/*
 	 * Disable same device enable bit and configure all slave devices
@@ -959,7 +1012,7 @@
 
 	f = spi_controller_get_devdata(ctlr);
 	f->dev = dev;
-	f->devtype_data = of_device_get_match_data(dev);
+	f->devtype_data = device_get_match_data(dev);
 	if (!f->devtype_data) {
 		ret = -ENODEV;
 		goto err_put_ctrl;
@@ -968,7 +1021,12 @@
 	platform_set_drvdata(pdev, f);
 
 	/* find the resources - configuration register address space */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_base");
+	if (is_acpi_node(f->dev->fwnode))
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	else
+		res = platform_get_resource_byname(pdev,
+				IORESOURCE_MEM, "fspi_base");
+
 	f->iobase = devm_ioremap_resource(dev, res);
 	if (IS_ERR(f->iobase)) {
 		ret = PTR_ERR(f->iobase);
@@ -976,10 +1034,14 @@
 	}
 
 	/* find the resources - controller memory mapped space */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "fspi_mmap");
-	f->ahb_addr = devm_ioremap_resource(dev, res);
-	if (IS_ERR(f->ahb_addr)) {
-		ret = PTR_ERR(f->ahb_addr);
+	if (is_acpi_node(f->dev->fwnode))
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	else
+		res = platform_get_resource_byname(pdev,
+				IORESOURCE_MEM, "fspi_mmap");
+
+	if (!res) {
+		ret = -ENODEV;
 		goto err_put_ctrl;
 	}
 
@@ -988,22 +1050,24 @@
 	f->memmap_phy_size = resource_size(res);
 
 	/* find the clocks */
-	f->clk_en = devm_clk_get(dev, "fspi_en");
-	if (IS_ERR(f->clk_en)) {
-		ret = PTR_ERR(f->clk_en);
-		goto err_put_ctrl;
-	}
+	if (dev_of_node(&pdev->dev)) {
+		f->clk_en = devm_clk_get(dev, "fspi_en");
+		if (IS_ERR(f->clk_en)) {
+			ret = PTR_ERR(f->clk_en);
+			goto err_put_ctrl;
+		}
 
-	f->clk = devm_clk_get(dev, "fspi");
-	if (IS_ERR(f->clk)) {
-		ret = PTR_ERR(f->clk);
-		goto err_put_ctrl;
-	}
+		f->clk = devm_clk_get(dev, "fspi");
+		if (IS_ERR(f->clk)) {
+			ret = PTR_ERR(f->clk);
+			goto err_put_ctrl;
+		}
 
-	ret = nxp_fspi_clk_prep_enable(f);
-	if (ret) {
-		dev_err(dev, "can not enable the clock\n");
-		goto err_put_ctrl;
+		ret = nxp_fspi_clk_prep_enable(f);
+		if (ret) {
+			dev_err(dev, "can not enable the clock\n");
+			goto err_put_ctrl;
+		}
 	}
 
 	/* Clear potential interrupts */
@@ -1033,7 +1097,7 @@
 
 	ctlr->dev.of_node = np;
 
-	ret = spi_register_controller(ctlr);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret)
 		goto err_destroy_mutex;
 
@@ -1063,6 +1127,9 @@
 
 	mutex_destroy(&f->lock);
 
+	if (f->ahb_addr)
+		iounmap(f->ahb_addr);
+
 	return 0;
 }
 
@@ -1082,10 +1149,20 @@
 
 static const struct of_device_id nxp_fspi_dt_ids[] = {
 	{ .compatible = "nxp,lx2160a-fspi", .data = (void *)&lx2160a_data, },
+	{ .compatible = "nxp,imx8mm-fspi", .data = (void *)&imx8mm_data, },
+	{ .compatible = "nxp,imx8qxp-fspi", .data = (void *)&imx8qxp_data, },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, nxp_fspi_dt_ids);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id nxp_fspi_acpi_ids[] = {
+	{ "NXP0009", .driver_data = (kernel_ulong_t)&lx2160a_data, },
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, nxp_fspi_acpi_ids);
+#endif
+
 static const struct dev_pm_ops nxp_fspi_pm_ops = {
 	.suspend	= nxp_fspi_suspend,
 	.resume		= nxp_fspi_resume,
@@ -1095,6 +1172,7 @@
 	.driver = {
 		.name	= "nxp-fspi",
 		.of_match_table = nxp_fspi_dt_ids,
+		.acpi_match_table = ACPI_PTR(nxp_fspi_acpi_ids),
 		.pm =   &nxp_fspi_pm_ops,
 	},
 	.probe          = nxp_fspi_probe,
diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c
index e2331eb..f3843f0 100644
--- a/drivers/spi/spi-oc-tiny.c
+++ b/drivers/spi/spi-oc-tiny.c
@@ -2,7 +2,7 @@
 /*
  * OpenCores tiny SPI master driver
  *
- * http://opencores.org/project,tiny_spi
+ * https://opencores.org/project,tiny_spi
  *
  * Copyright (C) 2011 Thomas Chou <thomas@wytron.com.tw>
  *
@@ -20,7 +20,6 @@
 #include <linux/spi/spi_bitbang.h>
 #include <linux/spi/spi_oc_tiny.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
 #include <linux/of.h>
 
 #define DRV_NAME "spi_oc_tiny"
@@ -50,8 +49,6 @@
 	unsigned int txc, rxc;
 	const u8 *txp;
 	u8 *rxp;
-	int gpio_cs_count;
-	int *gpio_cs;
 };
 
 static inline struct tiny_spi *tiny_spi_to_hw(struct spi_device *sdev)
@@ -66,16 +63,6 @@
 	return min(DIV_ROUND_UP(hw->freq, hz * 2), (1U << hw->baudwidth)) - 1;
 }
 
-static void tiny_spi_chipselect(struct spi_device *spi, int is_active)
-{
-	struct tiny_spi *hw = tiny_spi_to_hw(spi);
-
-	if (hw->gpio_cs_count > 0) {
-		gpio_set_value(hw->gpio_cs[spi->chip_select],
-			(spi->mode & SPI_CS_HIGH) ? is_active : !is_active);
-	}
-}
-
 static int tiny_spi_setup_transfer(struct spi_device *spi,
 				   struct spi_transfer *t)
 {
@@ -203,24 +190,10 @@
 {
 	struct tiny_spi *hw = platform_get_drvdata(pdev);
 	struct device_node *np = pdev->dev.of_node;
-	unsigned int i;
 	u32 val;
 
 	if (!np)
 		return 0;
-	hw->gpio_cs_count = of_gpio_count(np);
-	if (hw->gpio_cs_count > 0) {
-		hw->gpio_cs = devm_kcalloc(&pdev->dev,
-				hw->gpio_cs_count, sizeof(unsigned int),
-				GFP_KERNEL);
-		if (!hw->gpio_cs)
-			return -ENOMEM;
-	}
-	for (i = 0; i < hw->gpio_cs_count; i++) {
-		hw->gpio_cs[i] = of_get_gpio_flags(np, i, NULL);
-		if (hw->gpio_cs[i] < 0)
-			return -ENODEV;
-	}
 	hw->bitbang.master->dev.of_node = pdev->dev.of_node;
 	if (!of_property_read_u32(np, "clock-frequency", &val))
 		hw->freq = val;
@@ -240,7 +213,6 @@
 	struct tiny_spi_platform_data *platp = dev_get_platdata(&pdev->dev);
 	struct tiny_spi *hw;
 	struct spi_master *master;
-	unsigned int i;
 	int err = -ENODEV;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct tiny_spi));
@@ -249,9 +221,9 @@
 
 	/* setup the master state. */
 	master->bus_num = pdev->id;
-	master->num_chipselect = 255;
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	master->setup = tiny_spi_setup;
+	master->use_gpio_descriptors = true;
 
 	hw = spi_master_get_devdata(master);
 	platform_set_drvdata(pdev, hw);
@@ -259,7 +231,6 @@
 	/* setup the state for the bitbang driver */
 	hw->bitbang.master = master;
 	hw->bitbang.setup_transfer = tiny_spi_setup_transfer;
-	hw->bitbang.chipselect = tiny_spi_chipselect;
 	hw->bitbang.txrx_bufs = tiny_spi_txrx_bufs;
 
 	/* find and map our resources */
@@ -279,12 +250,6 @@
 	}
 	/* find platform data */
 	if (platp) {
-		hw->gpio_cs_count = platp->gpio_cs_count;
-		hw->gpio_cs = platp->gpio_cs;
-		if (platp->gpio_cs_count && !platp->gpio_cs) {
-			err = -EBUSY;
-			goto exit;
-		}
 		hw->freq = platp->freq;
 		hw->baudwidth = platp->baudwidth;
 	} else {
@@ -292,13 +257,6 @@
 		if (err)
 			goto exit;
 	}
-	for (i = 0; i < hw->gpio_cs_count; i++) {
-		err = gpio_request(hw->gpio_cs[i], dev_name(&pdev->dev));
-		if (err)
-			goto exit_gpio;
-		gpio_direction_output(hw->gpio_cs[i], 1);
-	}
-	hw->bitbang.master->num_chipselect = max(1, hw->gpio_cs_count);
 
 	/* register our spi controller */
 	err = spi_bitbang_start(&hw->bitbang);
@@ -308,9 +266,6 @@
 
 	return 0;
 
-exit_gpio:
-	while (i-- > 0)
-		gpio_free(hw->gpio_cs[i]);
 exit:
 	spi_master_put(master);
 	return err;
@@ -320,11 +275,8 @@
 {
 	struct tiny_spi *hw = platform_get_drvdata(pdev);
 	struct spi_master *master = hw->bitbang.master;
-	unsigned int i;
 
 	spi_bitbang_stop(&hw->bitbang);
-	for (i = 0; i < hw->gpio_cs_count; i++)
-		gpio_free(hw->gpio_cs[i]);
 	spi_master_put(master);
 	return 0;
 }
diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c
index f64d030..0d0cd06 100644
--- a/drivers/spi/spi-omap-100k.c
+++ b/drivers/spi/spi-omap-100k.c
@@ -19,7 +19,6 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
 #include <linux/slab.h>
 
 #include <linux/spi/spi.h>
@@ -128,7 +127,7 @@
 
 static int spi100k_read_data(struct spi_master *master, int len)
 {
-	int dataH, dataL;
+	int dataL;
 	struct omap1_spi100k *spi100k = spi_master_get_devdata(master);
 
 	/* Always do at least 16 bits */
@@ -146,7 +145,7 @@
 	udelay(1000);
 
 	dataL = readw(spi100k->base + SPI_RX_LSB);
-	dataH = readw(spi100k->base + SPI_RX_MSB);
+	readw(spi100k->base + SPI_RX_MSB);
 	spi100k_disable_clock(master);
 
 	return dataL;
@@ -321,8 +320,7 @@
 			}
 		}
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		/* ignore the "leave it on after last xfer" hint */
 
diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c
index 85ee907..df28c66 100644
--- a/drivers/spi/spi-omap-uwire.c
+++ b/drivers/spi/spi-omap-uwire.c
@@ -450,7 +450,7 @@
 static void uwire_off(struct uwire_spi *uwire)
 {
 	uwire_write_reg(UWIRE_SR3, 0);
-	clk_disable(uwire->ck);
+	clk_disable_unprepare(uwire->ck);
 	spi_master_put(uwire->bitbang.master);
 }
 
@@ -482,7 +482,7 @@
 		spi_master_put(master);
 		return status;
 	}
-	clk_enable(uwire->ck);
+	clk_prepare_enable(uwire->ck);
 
 	if (cpu_is_omap7xx())
 		uwire_idx_shift = 1;
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index 38f4be2..3596bbe 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -26,7 +26,6 @@
 #include <linux/gcd.h>
 
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
 
 #include <linux/platform_data/spi-omap2-mcspi.h>
 
@@ -129,6 +128,7 @@
 	int			fifo_depth;
 	bool			slave_aborted;
 	unsigned int		pin_dir:1;
+	size_t			max_xfer_len;
 };
 
 struct omap2_mcspi_cs {
@@ -406,30 +406,26 @@
 {
 	struct omap2_mcspi	*mcspi;
 	struct omap2_mcspi_dma  *mcspi_dma;
+	struct dma_async_tx_descriptor *tx;
 
 	mcspi = spi_master_get_devdata(spi->master);
 	mcspi_dma = &mcspi->dma_channels[spi->chip_select];
 
-	if (mcspi_dma->dma_tx) {
-		struct dma_async_tx_descriptor *tx;
+	dmaengine_slave_config(mcspi_dma->dma_tx, &cfg);
 
-		dmaengine_slave_config(mcspi_dma->dma_tx, &cfg);
-
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, xfer->tx_sg.sgl,
-					     xfer->tx_sg.nents,
-					     DMA_MEM_TO_DEV,
-					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-		if (tx) {
-			tx->callback = omap2_mcspi_tx_callback;
-			tx->callback_param = spi;
-			dmaengine_submit(tx);
-		} else {
-			/* FIXME: fall back to PIO? */
-		}
+	tx = dmaengine_prep_slave_sg(mcspi_dma->dma_tx, xfer->tx_sg.sgl,
+				     xfer->tx_sg.nents,
+				     DMA_MEM_TO_DEV,
+				     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (tx) {
+		tx->callback = omap2_mcspi_tx_callback;
+		tx->callback_param = spi;
+		dmaengine_submit(tx);
+	} else {
+		/* FIXME: fall back to PIO? */
 	}
 	dma_async_issue_pending(mcspi_dma->dma_tx);
 	omap2_mcspi_set_dma_req(spi, 0, 1);
-
 }
 
 static unsigned
@@ -448,6 +444,7 @@
 	int			word_len, element_count;
 	struct omap2_mcspi_cs	*cs = spi->controller_state;
 	void __iomem		*chstat_reg = cs->base + OMAP2_MCSPI_CHSTAT0;
+	struct dma_async_tx_descriptor *tx;
 
 	mcspi = spi_master_get_devdata(spi->master);
 	mcspi_dma = &mcspi->dma_channels[spi->chip_select];
@@ -471,55 +468,47 @@
 	else /* word_len <= 32 */
 		element_count = count >> 2;
 
-	if (mcspi_dma->dma_rx) {
-		struct dma_async_tx_descriptor *tx;
 
-		dmaengine_slave_config(mcspi_dma->dma_rx, &cfg);
+	dmaengine_slave_config(mcspi_dma->dma_rx, &cfg);
 
+	/*
+	 *  Reduce DMA transfer length by one more if McSPI is
+	 *  configured in turbo mode.
+	 */
+	if ((l & OMAP2_MCSPI_CHCONF_TURBO) && mcspi->fifo_depth == 0)
+		transfer_reduction += es;
+
+	if (transfer_reduction) {
+		/* Split sgl into two. The second sgl won't be used. */
+		sizes[0] = count - transfer_reduction;
+		sizes[1] = transfer_reduction;
+		nb_sizes = 2;
+	} else {
 		/*
-		 *  Reduce DMA transfer length by one more if McSPI is
-		 *  configured in turbo mode.
+		 * Don't bother splitting the sgl. This essentially
+		 * clones the original sgl.
 		 */
-		if ((l & OMAP2_MCSPI_CHCONF_TURBO) && mcspi->fifo_depth == 0)
-			transfer_reduction += es;
+		sizes[0] = count;
+		nb_sizes = 1;
+	}
 
-		if (transfer_reduction) {
-			/* Split sgl into two. The second sgl won't be used. */
-			sizes[0] = count - transfer_reduction;
-			sizes[1] = transfer_reduction;
-			nb_sizes = 2;
-		} else {
-			/*
-			 * Don't bother splitting the sgl. This essentially
-			 * clones the original sgl.
-			 */
-			sizes[0] = count;
-			nb_sizes = 1;
-		}
+	ret = sg_split(xfer->rx_sg.sgl, xfer->rx_sg.nents, 0, nb_sizes,
+		       sizes, sg_out, out_mapped_nents, GFP_KERNEL);
 
-		ret = sg_split(xfer->rx_sg.sgl, xfer->rx_sg.nents,
-			       0, nb_sizes,
-			       sizes,
-			       sg_out, out_mapped_nents,
-			       GFP_KERNEL);
+	if (ret < 0) {
+		dev_err(&spi->dev, "sg_split failed\n");
+		return 0;
+	}
 
-		if (ret < 0) {
-			dev_err(&spi->dev, "sg_split failed\n");
-			return 0;
-		}
-
-		tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx,
-					     sg_out[0],
-					     out_mapped_nents[0],
-					     DMA_DEV_TO_MEM,
-					     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-		if (tx) {
-			tx->callback = omap2_mcspi_rx_callback;
-			tx->callback_param = spi;
-			dmaengine_submit(tx);
-		} else {
-				/* FIXME: fall back to PIO? */
-		}
+	tx = dmaengine_prep_slave_sg(mcspi_dma->dma_rx, sg_out[0],
+				     out_mapped_nents[0], DMA_DEV_TO_MEM,
+				     DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (tx) {
+		tx->callback = omap2_mcspi_rx_callback;
+		tx->callback_param = spi;
+		dmaengine_submit(tx);
+	} else {
+		/* FIXME: fall back to PIO? */
 	}
 
 	dma_async_issue_pending(mcspi_dma->dma_rx);
@@ -1054,9 +1043,6 @@
 
 		kfree(cs);
 	}
-
-	if (gpio_is_valid(spi->cs_gpio))
-		gpio_free(spi->cs_gpio);
 }
 
 static int omap2_mcspi_setup(struct spi_device *spi)
@@ -1080,16 +1066,6 @@
 		/* Link this to context save list */
 		list_add_tail(&cs->node, &ctx->cs);
 		initial_setup = true;
-
-		if (gpio_is_valid(spi->cs_gpio)) {
-			ret = gpio_request(spi->cs_gpio, dev_name(&spi->dev));
-			if (ret) {
-				dev_err(&spi->dev, "failed to request gpio\n");
-				return ret;
-			}
-			gpio_direction_output(spi->cs_gpio,
-					 !(spi->mode & SPI_CS_HIGH));
-		}
 	}
 
 	ret = pm_runtime_get_sync(mcspi->dev);
@@ -1178,7 +1154,7 @@
 
 	omap2_mcspi_set_enable(spi, 0);
 
-	if (gpio_is_valid(spi->cs_gpio))
+	if (spi->cs_gpiod)
 		omap2_mcspi_set_cs(spi, spi->mode & SPI_CS_HIGH);
 
 	if (par_override ||
@@ -1267,7 +1243,7 @@
 
 	omap2_mcspi_set_enable(spi, 0);
 
-	if (gpio_is_valid(spi->cs_gpio))
+	if (spi->cs_gpiod)
 		omap2_mcspi_set_cs(spi, !(spi->mode & SPI_CS_HIGH));
 
 	if (mcspi->fifo_depth > 0 && t)
@@ -1323,6 +1299,18 @@
 	return (xfer->len >= DMA_MIN_BYTES);
 }
 
+static size_t omap2_mcspi_max_xfer_size(struct spi_device *spi)
+{
+	struct omap2_mcspi *mcspi = spi_master_get_devdata(spi->master);
+	struct omap2_mcspi_dma *mcspi_dma =
+		&mcspi->dma_channels[spi->chip_select];
+
+	if (mcspi->max_xfer_len && mcspi_dma->dma_rx)
+		return mcspi->max_xfer_len;
+
+	return SIZE_MAX;
+}
+
 static int omap2_mcspi_controller_setup(struct omap2_mcspi *mcspi)
 {
 	struct spi_master	*master = mcspi->master;
@@ -1391,6 +1379,11 @@
 	.regs_offset = OMAP4_MCSPI_REG_OFFSET,
 };
 
+static struct omap2_mcspi_platform_config am654_pdata = {
+	.regs_offset = OMAP4_MCSPI_REG_OFFSET,
+	.max_xfer_len = SZ_4K - 1,
+};
+
 static const struct of_device_id omap_mcspi_of_match[] = {
 	{
 		.compatible = "ti,omap2-mcspi",
@@ -1400,6 +1393,10 @@
 		.compatible = "ti,omap4-mcspi",
 		.data = &omap4_pdata,
 	},
+	{
+		.compatible = "ti,am654-mcspi",
+		.data = &am654_pdata,
+	},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, omap_mcspi_of_match);
@@ -1436,6 +1433,7 @@
 	master->dev.of_node = node;
 	master->max_speed_hz = OMAP2_MCSPI_MAX_FREQ;
 	master->min_speed_hz = OMAP2_MCSPI_MAX_FREQ >> 15;
+	master->use_gpio_descriptors = true;
 
 	platform_set_drvdata(pdev, master);
 
@@ -1457,6 +1455,10 @@
 		mcspi->pin_dir = pdata->pin_dir;
 	}
 	regs_offset = pdata->regs_offset;
+	if (pdata->max_xfer_len) {
+		mcspi->max_xfer_len = pdata->max_xfer_len;
+		master->max_transfer_size = omap2_mcspi_max_xfer_size;
+	}
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mcspi->base = devm_ioremap_resource(&pdev->dev, r);
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 6643ccd..b57b8b3 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -17,10 +17,8 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/clk.h>
 #include <linux/sizes.h>
-#include <linux/gpio.h>
 #include <asm/unaligned.h>
 
 #define DRIVER_NAME			"orion_spi"
@@ -98,7 +96,6 @@
 	struct clk              *clk;
 	struct clk              *axi_clk;
 	const struct orion_spi_dev *devdata;
-	int			unused_hw_gpio;
 
 	struct orion_child_options	child[ORION_NUM_CHIPSELECTS];
 };
@@ -325,20 +322,27 @@
 static void orion_spi_set_cs(struct spi_device *spi, bool enable)
 {
 	struct orion_spi *orion_spi;
-	int cs;
 
 	orion_spi = spi_master_get_devdata(spi->master);
 
-	if (gpio_is_valid(spi->cs_gpio))
-		cs = orion_spi->unused_hw_gpio;
-	else
-		cs = spi->chip_select;
-
+	/*
+	 * If this line is using a GPIO to control chip select, this internal
+	 * .set_cs() function will still be called, so we clear any previous
+	 * chip select. The CS we activate will not have any elecrical effect,
+	 * as it is handled by a GPIO, but that doesn't matter. What we need
+	 * is to deassert the old chip select and assert some other chip select.
+	 */
 	orion_spi_clrbits(orion_spi, ORION_SPI_IF_CTRL_REG, ORION_SPI_CS_MASK);
 	orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG,
-				ORION_SPI_CS(cs));
+			  ORION_SPI_CS(spi->chip_select));
 
-	/* Chip select logic is inverted from spi_set_cs */
+	/*
+	 * Chip select logic is inverted from spi_set_cs(). For lines using a
+	 * GPIO to do chip select SPI_CS_HIGH is enforced and inversion happens
+	 * in the GPIO library, but we don't care about that, because in those
+	 * cases we are dealing with an unused native CS anyways so the polarity
+	 * doesn't matter.
+	 */
 	if (!enable)
 		orion_spi_setbits(orion_spi, ORION_SPI_IF_CTRL_REG, 0x1);
 	else
@@ -467,8 +471,7 @@
 			if (orion_spi_write_read_8bit(spi, &tx, &rx) < 0)
 				goto out;
 			count--;
-			if (xfer->word_delay_usecs)
-				udelay(xfer->word_delay_usecs);
+			spi_delay_exec(&xfer->word_delay, xfer);
 		} while (count);
 	} else if (word_len == 16) {
 		const u16 *tx = xfer->tx_buf;
@@ -478,8 +481,7 @@
 			if (orion_spi_write_read_16bit(spi, &tx, &rx) < 0)
 				goto out;
 			count -= 2;
-			if (xfer->word_delay_usecs)
-				udelay(xfer->word_delay_usecs);
+			spi_delay_exec(&xfer->word_delay, xfer);
 		} while (count);
 	}
 
@@ -505,9 +507,6 @@
 
 static int orion_spi_setup(struct spi_device *spi)
 {
-	if (gpio_is_valid(spi->cs_gpio)) {
-		gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-	}
 	return orion_spi_setup_transfer(spi, NULL);
 }
 
@@ -624,13 +623,13 @@
 	master->setup = orion_spi_setup;
 	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
 	master->auto_runtime_pm = true;
+	master->use_gpio_descriptors = true;
 	master->flags = SPI_MASTER_GPIO_SS;
 
 	platform_set_drvdata(pdev, master);
 
 	spi = spi_master_get_devdata(master);
 	spi->master = master;
-	spi->unused_hw_gpio = -1;
 
 	of_id = of_match_device(orion_spi_of_match_table, &pdev->dev);
 	devdata = (of_id) ? of_id->data : &orion_spi_dev_data;
@@ -648,8 +647,7 @@
 
 	/* The following clock is only used by some SoCs */
 	spi->axi_clk = devm_clk_get(&pdev->dev, "axi");
-	if (IS_ERR(spi->axi_clk) &&
-	    PTR_ERR(spi->axi_clk) == -EPROBE_DEFER) {
+	if (PTR_ERR(spi->axi_clk) == -EPROBE_DEFER) {
 		status = -EPROBE_DEFER;
 		goto out_rel_clk;
 	}
@@ -686,7 +684,6 @@
 	for_each_available_child_of_node(pdev->dev.of_node, np) {
 		struct orion_direct_acc *dir_acc;
 		u32 cs;
-		int cs_gpio;
 
 		/* Get chip-select number from the "reg" property */
 		status = of_property_read_u32(np, "reg", &cs);
@@ -698,44 +695,6 @@
 		}
 
 		/*
-		 * Initialize the CS GPIO:
-		 * - properly request the actual GPIO signal
-		 * - de-assert the logical signal so that all GPIO CS lines
-		 *   are inactive when probing for slaves
-		 * - find an unused physical CS which will be driven for any
-		 *   slave which uses a CS GPIO
-		 */
-		cs_gpio = of_get_named_gpio(pdev->dev.of_node, "cs-gpios", cs);
-		if (cs_gpio > 0) {
-			char *gpio_name;
-			int cs_flags;
-
-			if (spi->unused_hw_gpio == -1) {
-				dev_info(&pdev->dev,
-					"Selected unused HW CS#%d for any GPIO CSes\n",
-					cs);
-				spi->unused_hw_gpio = cs;
-			}
-
-			gpio_name = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-					"%s-CS%d", dev_name(&pdev->dev), cs);
-			if (!gpio_name) {
-				status = -ENOMEM;
-				goto out_rel_axi_clk;
-			}
-
-			cs_flags = of_property_read_bool(np, "spi-cs-high") ?
-				GPIOF_OUT_INIT_LOW : GPIOF_OUT_INIT_HIGH;
-			status = devm_gpio_request_one(&pdev->dev, cs_gpio,
-					cs_flags, gpio_name);
-			if (status) {
-				dev_err(&pdev->dev,
-					"Can't request GPIO for CS %d\n", cs);
-				goto out_rel_axi_clk;
-			}
-		}
-
-		/*
 		 * Check if an address is configured for this SPI device. If
 		 * not, the MBus mapping via the 'ranges' property in the 'soc'
 		 * node is not configured and this device should not use the
@@ -749,7 +708,7 @@
 		/*
 		 * Only map one page for direct access. This is enough for the
 		 * simple TX transfer which only writes to the first word.
-		 * This needs to get extended for the direct SPI-NOR / SPI-NAND
+		 * This needs to get extended for the direct SPI NOR / SPI NAND
 		 * support, once this gets implemented.
 		 */
 		dir_acc = &spi->child[cs].direct_access;
@@ -772,9 +731,6 @@
 	if (status < 0)
 		goto out_rel_pm;
 
-	pm_runtime_mark_last_busy(&pdev->dev);
-	pm_runtime_put_autosuspend(&pdev->dev);
-
 	master->dev.of_node = pdev->dev.of_node;
 	status = spi_register_master(master);
 	if (status < 0)
diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c
index b5268b0..5eb7b61 100644
--- a/drivers/spi/spi-pic32.c
+++ b/drivers/spi/spi-pic32.c
@@ -607,25 +607,30 @@
 	gpio_direction_output(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
 }
 
-static void pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
+static int pic32_spi_dma_prep(struct pic32_spi *pic32s, struct device *dev)
 {
 	struct spi_master *master = pic32s->master;
-	dma_cap_mask_t mask;
+	int ret = 0;
 
-	dma_cap_zero(mask);
-	dma_cap_set(DMA_SLAVE, mask);
+	master->dma_rx = dma_request_chan(dev, "spi-rx");
+	if (IS_ERR(master->dma_rx)) {
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER)
+			ret = -EPROBE_DEFER;
+		else
+			dev_warn(dev, "RX channel not found.\n");
 
-	master->dma_rx = dma_request_slave_channel_compat(mask, NULL, NULL,
-							  dev, "spi-rx");
-	if (!master->dma_rx) {
-		dev_warn(dev, "RX channel not found.\n");
+		master->dma_rx = NULL;
 		goto out_err;
 	}
 
-	master->dma_tx = dma_request_slave_channel_compat(mask, NULL, NULL,
-							  dev, "spi-tx");
-	if (!master->dma_tx) {
-		dev_warn(dev, "TX channel not found.\n");
+	master->dma_tx = dma_request_chan(dev, "spi-tx");
+	if (IS_ERR(master->dma_tx)) {
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER)
+			ret = -EPROBE_DEFER;
+		else
+			dev_warn(dev, "TX channel not found.\n");
+
+		master->dma_tx = NULL;
 		goto out_err;
 	}
 
@@ -635,14 +640,20 @@
 	/* DMA chnls allocated and prepared */
 	set_bit(PIC32F_DMA_PREP, &pic32s->flags);
 
-	return;
+	return 0;
 
 out_err:
-	if (master->dma_rx)
+	if (master->dma_rx) {
 		dma_release_channel(master->dma_rx);
+		master->dma_rx = NULL;
+	}
 
-	if (master->dma_tx)
+	if (master->dma_tx) {
 		dma_release_channel(master->dma_tx);
+		master->dma_tx = NULL;
+	}
+
+	return ret;
 }
 
 static void pic32_spi_dma_unprep(struct pic32_spi *pic32s)
@@ -777,7 +788,10 @@
 	master->unprepare_transfer_hardware	= pic32_spi_unprepare_hardware;
 
 	/* optional DMA support */
-	pic32_spi_dma_prep(pic32s, &pdev->dev);
+	ret = pic32_spi_dma_prep(pic32s, &pdev->dev);
+	if (ret)
+		goto err_bailout;
+
 	if (test_bit(PIC32F_DMA_PREP, &pic32s->flags))
 		master->can_dma	= pic32_spi_can_dma;
 
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 7fedea6..e4ee8b0 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -298,7 +298,7 @@
 	READING_U32
 };
 
-/**
+/*
  * The type of writing going on on this chip
  */
 enum ssp_writing {
@@ -317,6 +317,7 @@
  * @extended_cr: 32 bit wide control register 0 with extra
  * features and extra features in CR1 as found in the ST variants
  * @pl023: supports a subset of the ST extensions called "PL023"
+ * @loopback: supports loopback mode
  * @internal_cs_ctrl: supports chip select control register
  */
 struct vendor_data {
@@ -353,11 +354,14 @@
  * @read: the type of read currently going on
  * @write: the type of write currently going on
  * @exp_fifo_level: expected FIFO level
+ * @rx_lev_trig: receive FIFO watermark level which triggers IRQ
+ * @tx_lev_trig: transmit FIFO watermark level which triggers IRQ
  * @dma_rx_channel: optional channel for RX DMA
  * @dma_tx_channel: optional channel for TX DMA
  * @sgt_rx: scattertable for the RX transfer
  * @sgt_tx: scattertable for the TX transfer
  * @dummypage: a dummy page used for driving data on the bus with DMA
+ * @dma_running: indicates whether DMA is in operation
  * @cur_cs: current chip select (gpio)
  * @chipselects: list of chipselects (gpios)
  */
@@ -485,12 +489,11 @@
 					struct spi_transfer, transfer_list);
 
 	/* Delay if requested before any change in chip select */
-	if (last_transfer->delay_usecs)
-		/*
-		 * FIXME: This runs in interrupt context.
-		 * Is this really smart?
-		 */
-		udelay(last_transfer->delay_usecs);
+	/*
+	 * FIXME: This runs in interrupt context.
+	 * Is this really smart?
+	 */
+	spi_transfer_delay_exec(last_transfer);
 
 	if (!last_transfer->cs_change) {
 		struct spi_message *next_msg;
@@ -663,7 +666,7 @@
 	writew(CLEAR_ALL_INTERRUPTS, SSP_ICR(pl022->virtbase));
 }
 
-/**
+/*
  * This will write to TX and read from RX according to the parameters
  * set in pl022.
  */
@@ -1159,7 +1162,7 @@
 	int err;
 
 	/* automatically configure DMA channels from platform, normally using DT */
-	chan = dma_request_slave_channel_reason(dev, "rx");
+	chan = dma_request_chan(dev, "rx");
 	if (IS_ERR(chan)) {
 		err = PTR_ERR(chan);
 		goto err_no_rxchan;
@@ -1167,7 +1170,7 @@
 
 	pl022->dma_rx_channel = chan;
 
-	chan = dma_request_slave_channel_reason(dev, "tx");
+	chan = dma_request_chan(dev, "tx");
 	if (IS_ERR(chan)) {
 		err = PTR_ERR(chan);
 		goto err_no_txchan;
@@ -1238,6 +1241,8 @@
 
 /**
  * pl022_interrupt_handler - Interrupt handler for SSP controller
+ * @irq: IRQ number
+ * @dev_id: Local device data
  *
  * This function handles interrupts generated for an interrupt based transfer.
  * If a receive overrun (ROR) interrupt is there then we disable SSP, flag the
@@ -1335,7 +1340,7 @@
 	return IRQ_HANDLED;
 }
 
-/**
+/*
  * This sets up the pointers to memory for the next message to
  * send out on the SPI bus.
  */
@@ -1401,12 +1406,11 @@
 		previous = list_entry(transfer->transfer_list.prev,
 					struct spi_transfer,
 					transfer_list);
-		if (previous->delay_usecs)
-			/*
-			 * FIXME: This runs in interrupt context.
-			 * Is this really smart?
-			 */
-			udelay(previous->delay_usecs);
+		/*
+		 * FIXME: This runs in interrupt context.
+		 * Is this really smart?
+		 */
+		spi_transfer_delay_exec(previous);
 
 		/* Reselect chip select only if cs_change was requested */
 		if (previous->cs_change)
@@ -1520,8 +1524,7 @@
 			previous =
 			    list_entry(transfer->transfer_list.prev,
 				       struct spi_transfer, transfer_list);
-			if (previous->delay_usecs)
-				udelay(previous->delay_usecs);
+			spi_transfer_delay_exec(previous);
 			if (previous->cs_change)
 				pl022_cs_control(pl022, SSP_CHIP_SELECT);
 		} else {
@@ -1720,12 +1723,13 @@
 				return -EINVAL;
 			}
 		} else {
-			if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX)
+			if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) {
 				dev_err(&pl022->adev->dev,
 					"Microwire half duplex mode requested,"
 					" but this is only available in the"
 					" ST version of PL022\n");
-			return -EINVAL;
+				return -EINVAL;
+			}
 		}
 	}
 	return 0;
diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c
index 0ea2d9a..d8ee363 100644
--- a/drivers/spi/spi-ppc4xx.c
+++ b/drivers/spi/spi-ppc4xx.c
@@ -28,11 +28,9 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-#include <linux/of_gpio.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 
-#include <linux/gpio.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 
@@ -127,8 +125,6 @@
 	const unsigned char *tx;
 	unsigned char *rx;
 
-	int *gpios;
-
 	struct spi_ppc4xx_regs __iomem *regs; /* pointer to the registers */
 	struct spi_master *master;
 	struct device *dev;
@@ -260,27 +256,6 @@
 	return 0;
 }
 
-static void spi_ppc4xx_chipsel(struct spi_device *spi, int value)
-{
-	struct ppc4xx_spi *hw = spi_master_get_devdata(spi->master);
-	unsigned int cs = spi->chip_select;
-	unsigned int cspol;
-
-	/*
-	 * If there are no chip selects at all, or if this is the special
-	 * case of a non-existent (dummy) chip select, do nothing.
-	 */
-
-	if (!hw->master->num_chipselect || hw->gpios[cs] == -EEXIST)
-		return;
-
-	cspol = spi->mode & SPI_CS_HIGH ? 1 : 0;
-	if (value == BITBANG_CS_INACTIVE)
-		cspol = !cspol;
-
-	gpio_set_value(hw->gpios[cs], cspol);
-}
-
 static irqreturn_t spi_ppc4xx_int(int irq, void *dev_id)
 {
 	struct ppc4xx_spi *hw;
@@ -359,19 +334,6 @@
 	dcri_clrset(SDR0, SDR0_PFC1, 0x80000000 >> 14, 0);
 }
 
-static void free_gpios(struct ppc4xx_spi *hw)
-{
-	if (hw->master->num_chipselect) {
-		int i;
-		for (i = 0; i < hw->master->num_chipselect; i++)
-			if (gpio_is_valid(hw->gpios[i]))
-				gpio_free(hw->gpios[i]);
-
-		kfree(hw->gpios);
-		hw->gpios = NULL;
-	}
-}
-
 /*
  * platform_device layer stuff...
  */
@@ -385,7 +347,6 @@
 	struct device *dev = &op->dev;
 	struct device_node *opbnp;
 	int ret;
-	int num_gpios;
 	const unsigned int *clk;
 
 	master = spi_alloc_master(dev, sizeof *hw);
@@ -399,74 +360,32 @@
 
 	init_completion(&hw->done);
 
-	/*
-	 * A count of zero implies a single SPI device without any chip-select.
-	 * Note that of_gpio_count counts all gpios assigned to this spi master.
-	 * This includes both "null" gpio's and real ones.
-	 */
-	num_gpios = of_gpio_count(np);
-	if (num_gpios > 0) {
-		int i;
-
-		hw->gpios = kcalloc(num_gpios, sizeof(*hw->gpios), GFP_KERNEL);
-		if (!hw->gpios) {
-			ret = -ENOMEM;
-			goto free_master;
-		}
-
-		for (i = 0; i < num_gpios; i++) {
-			int gpio;
-			enum of_gpio_flags flags;
-
-			gpio = of_get_gpio_flags(np, i, &flags);
-			hw->gpios[i] = gpio;
-
-			if (gpio_is_valid(gpio)) {
-				/* Real CS - set the initial state. */
-				ret = gpio_request(gpio, np->name);
-				if (ret < 0) {
-					dev_err(dev,
-						"can't request gpio #%d: %d\n",
-						i, ret);
-					goto free_gpios;
-				}
-
-				gpio_direction_output(gpio,
-						!!(flags & OF_GPIO_ACTIVE_LOW));
-			} else if (gpio == -EEXIST) {
-				; /* No CS, but that's OK. */
-			} else {
-				dev_err(dev, "invalid gpio #%d: %d\n", i, gpio);
-				ret = -EINVAL;
-				goto free_gpios;
-			}
-		}
-	}
-
 	/* Setup the state for the bitbang driver */
 	bbp = &hw->bitbang;
 	bbp->master = hw->master;
 	bbp->setup_transfer = spi_ppc4xx_setupxfer;
-	bbp->chipselect = spi_ppc4xx_chipsel;
 	bbp->txrx_bufs = spi_ppc4xx_txrx;
 	bbp->use_dma = 0;
 	bbp->master->setup = spi_ppc4xx_setup;
 	bbp->master->cleanup = spi_ppc4xx_cleanup;
 	bbp->master->bits_per_word_mask = SPI_BPW_MASK(8);
+	bbp->master->use_gpio_descriptors = true;
+	/*
+	 * The SPI core will count the number of GPIO descriptors to figure
+	 * out the number of chip selects available on the platform.
+	 */
+	bbp->master->num_chipselect = 0;
 
 	/* the spi->mode bits understood by this driver: */
 	bbp->master->mode_bits =
 		SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST;
 
-	/* this many pins in all GPIO controllers */
-	bbp->master->num_chipselect = num_gpios > 0 ? num_gpios : 0;
-
 	/* Get the clock for the OPB */
 	opbnp = of_find_compatible_node(NULL, NULL, "ibm,opb");
 	if (opbnp == NULL) {
 		dev_err(dev, "OPB: cannot find node\n");
 		ret = -ENODEV;
-		goto free_gpios;
+		goto free_master;
 	}
 	/* Get the clock (Hz) for the OPB */
 	clk = of_get_property(opbnp, "clock-frequency", NULL);
@@ -474,7 +393,7 @@
 		dev_err(dev, "OPB: no clock-frequency property set\n");
 		of_node_put(opbnp);
 		ret = -ENODEV;
-		goto free_gpios;
+		goto free_master;
 	}
 	hw->opb_freq = *clk;
 	hw->opb_freq >>= 2;
@@ -483,7 +402,7 @@
 	ret = of_address_to_resource(np, 0, &resource);
 	if (ret) {
 		dev_err(dev, "error while parsing device node resource\n");
-		goto free_gpios;
+		goto free_master;
 	}
 	hw->mapbase = resource.start;
 	hw->mapsize = resource_size(&resource);
@@ -492,7 +411,7 @@
 	if (hw->mapsize < sizeof(struct spi_ppc4xx_regs)) {
 		dev_err(dev, "too small to map registers\n");
 		ret = -EINVAL;
-		goto free_gpios;
+		goto free_master;
 	}
 
 	/* Request IRQ */
@@ -501,7 +420,7 @@
 			  0, "spi_ppc4xx_of", (void *)hw);
 	if (ret) {
 		dev_err(dev, "unable to allocate interrupt\n");
-		goto free_gpios;
+		goto free_master;
 	}
 
 	if (!request_mem_region(hw->mapbase, hw->mapsize, DRIVER_NAME)) {
@@ -538,8 +457,6 @@
 	release_mem_region(hw->mapbase, hw->mapsize);
 request_mem_error:
 	free_irq(hw->irqnum, hw);
-free_gpios:
-	free_gpios(hw);
 free_master:
 	spi_master_put(master);
 
@@ -556,7 +473,6 @@
 	release_mem_region(hw->mapbase, hw->mapsize);
 	free_irq(hw->irqnum, hw);
 	iounmap(hw->regs);
-	free_gpios(hw);
 	spi_master_put(master);
 	return 0;
 }
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index e32d51f..56a6209 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -4,27 +4,29 @@
  * Copyright (C) 2013, Intel Corporation
  */
 
+#include <linux/acpi.h>
 #include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/device.h>
-#include <linux/ioport.h>
-#include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/ioport.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include <linux/slab.h>
 #include <linux/spi/pxa2xx_spi.h>
 #include <linux/spi/spi.h>
-#include <linux/delay.h>
-#include <linux/gpio.h>
-#include <linux/gpio/consumer.h>
-#include <linux/slab.h>
-#include <linux/clk.h>
-#include <linux/pm_runtime.h>
-#include <linux/acpi.h>
-#include <linux/of_device.h>
 
 #include "spi-pxa2xx.h"
 
@@ -191,6 +193,11 @@
 	return drv_data->ssp_type == QUARK_X1000_SSP;
 }
 
+static bool is_mmp2_ssp(const struct driver_data *drv_data)
+{
+	return drv_data->ssp_type == MMP2_SSP;
+}
+
 static u32 pxa2xx_spi_get_ssrc1_change_mask(const struct driver_data *drv_data)
 {
 	switch (drv_data->ssp_type) {
@@ -483,6 +490,16 @@
 	return limit;
 }
 
+static void pxa2xx_spi_off(struct driver_data *drv_data)
+{
+	/* On MMP, disabling SSE seems to corrupt the Rx FIFO */
+	if (is_mmp2_ssp(drv_data))
+		return;
+
+	pxa2xx_spi_write(drv_data, SSCR0,
+			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+}
+
 static int null_writer(struct driver_data *drv_data)
 {
 	u8 n_bytes = drv_data->n_bytes;
@@ -609,8 +626,7 @@
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 	pxa2xx_spi_flush(drv_data);
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa2xx_spi_off(drv_data);
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
@@ -708,8 +724,7 @@
 
 static void handle_bad_msg(struct driver_data *drv_data)
 {
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa2xx_spi_off(drv_data);
 	pxa2xx_spi_write(drv_data, SSCR1,
 			 pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1);
 	if (!pxa25x_ssp_comp(drv_data))
@@ -1084,7 +1099,8 @@
 	    || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask)
 	    != (cr1 & change_mask)) {
 		/* stop the SSP, and update the other bits */
-		pxa2xx_spi_write(drv_data, SSCR0, cr0 & ~SSCR0_SSE);
+		if (!is_mmp2_ssp(drv_data))
+			pxa2xx_spi_write(drv_data, SSCR0, cr0 & ~SSCR0_SSE);
 		if (!pxa25x_ssp_comp(drv_data))
 			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 		/* first set CR1 without interrupt and service enables */
@@ -1097,7 +1113,7 @@
 			pxa2xx_spi_write(drv_data, SSTO, chip->timeout);
 	}
 
-	if (drv_data->ssp_type == MMP2_SSP) {
+	if (is_mmp2_ssp(drv_data)) {
 		u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR)
 					& SSSR_TFL_MASK) >> 8;
 
@@ -1140,8 +1156,7 @@
 	if (!pxa25x_ssp_comp(drv_data))
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 	pxa2xx_spi_flush(drv_data);
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa2xx_spi_off(drv_data);
 
 	dev_dbg(&drv_data->pdev->dev, "transfer aborted\n");
 
@@ -1157,8 +1172,7 @@
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP */
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa2xx_spi_off(drv_data);
 	/* Clear and disable interrupts and service requests */
 	write_SSSR_CS(drv_data, drv_data->clear_sr);
 	pxa2xx_spi_write(drv_data, SSCR1,
@@ -1183,8 +1197,7 @@
 	struct driver_data *drv_data = spi_controller_get_devdata(controller);
 
 	/* Disable the SSP now */
-	pxa2xx_spi_write(drv_data, SSCR0,
-			 pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE);
+	pxa2xx_spi_off(drv_data);
 
 	return 0;
 }
@@ -1426,6 +1439,7 @@
 	kfree(chip);
 }
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id pxa2xx_spi_acpi_match[] = {
 	{ "INT33C0", LPSS_LPT_SSP },
 	{ "INT33C1", LPSS_LPT_SSP },
@@ -1436,6 +1450,7 @@
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match);
+#endif
 
 /*
  * PCI IDs of compound devices that integrate both host controller and private
@@ -1452,6 +1467,9 @@
 	/* KBL-H */
 	{ PCI_VDEVICE(INTEL, 0xa2a9), LPSS_SPT_SSP },
 	{ PCI_VDEVICE(INTEL, 0xa2aa), LPSS_SPT_SSP },
+	/* CML-V */
+	{ PCI_VDEVICE(INTEL, 0xa3a9), LPSS_SPT_SSP },
+	{ PCI_VDEVICE(INTEL, 0xa3aa), LPSS_SPT_SSP },
 	/* BXT A-Step */
 	{ PCI_VDEVICE(INTEL, 0x0ac2), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x0ac4), LPSS_BXT_SSP },
@@ -1476,6 +1494,11 @@
 	{ PCI_VDEVICE(INTEL, 0x4daa), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0x4dab), LPSS_CNL_SSP },
 	{ PCI_VDEVICE(INTEL, 0x4dfb), LPSS_CNL_SSP },
+	/* TGL-H */
+	{ PCI_VDEVICE(INTEL, 0x43aa), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x43ab), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x43fb), LPSS_CNL_SSP },
+	{ PCI_VDEVICE(INTEL, 0x43fd), LPSS_CNL_SSP },
 	/* APL */
 	{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
 	{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
@@ -1515,11 +1538,13 @@
 
 #ifdef CONFIG_ACPI
 
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
+static int pxa2xx_spi_get_port_id(struct device *dev)
 {
+	struct acpi_device *adev;
 	unsigned int devid;
 	int port_id = -1;
 
+	adev = ACPI_COMPANION(dev);
 	if (adev && adev->pnp.unique_id &&
 	    !kstrtouint(adev->pnp.unique_id, 0, &devid))
 		port_id = devid;
@@ -1528,7 +1553,7 @@
 
 #else /* !CONFIG_ACPI */
 
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
+static int pxa2xx_spi_get_port_id(struct device *dev)
 {
 	return -1;
 }
@@ -1549,73 +1574,59 @@
 pxa2xx_spi_init_pdata(struct platform_device *pdev)
 {
 	struct pxa2xx_spi_controller *pdata;
-	struct acpi_device *adev;
 	struct ssp_device *ssp;
 	struct resource *res;
-	const struct acpi_device_id *adev_id = NULL;
+	struct device *parent = pdev->dev.parent;
+	struct pci_dev *pcidev = dev_is_pci(parent) ? to_pci_dev(parent) : NULL;
 	const struct pci_device_id *pcidev_id = NULL;
-	const struct of_device_id *of_id = NULL;
 	enum pxa_ssp_type type;
+	const void *match;
 
-	adev = ACPI_COMPANION(&pdev->dev);
+	if (pcidev)
+		pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match, pcidev);
 
-	if (pdev->dev.of_node)
-		of_id = of_match_device(pdev->dev.driver->of_match_table,
-					&pdev->dev);
-	else if (dev_is_pci(pdev->dev.parent))
-		pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match,
-					 to_pci_dev(pdev->dev.parent));
-	else if (adev)
-		adev_id = acpi_match_device(pdev->dev.driver->acpi_match_table,
-					    &pdev->dev);
-	else
-		return NULL;
-
-	if (adev_id)
-		type = (enum pxa_ssp_type)adev_id->driver_data;
+	match = device_get_match_data(&pdev->dev);
+	if (match)
+		type = (enum pxa_ssp_type)match;
 	else if (pcidev_id)
 		type = (enum pxa_ssp_type)pcidev_id->driver_data;
-	else if (of_id)
-		type = (enum pxa_ssp_type)of_id->data;
 	else
-		return NULL;
+		return ERR_PTR(-EINVAL);
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
-		return NULL;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	ssp = &pdata->ssp;
 
-	ssp->phys_base = res->start;
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ssp->mmio_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(ssp->mmio_base))
-		return NULL;
+		return ERR_CAST(ssp->mmio_base);
+
+	ssp->phys_base = res->start;
 
 #ifdef CONFIG_PCI
 	if (pcidev_id) {
-		pdata->tx_param = pdev->dev.parent;
-		pdata->rx_param = pdev->dev.parent;
+		pdata->tx_param = parent;
+		pdata->rx_param = parent;
 		pdata->dma_filter = pxa2xx_spi_idma_filter;
 	}
 #endif
 
 	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(ssp->clk))
-		return NULL;
+		return ERR_CAST(ssp->clk);
 
 	ssp->irq = platform_get_irq(pdev, 0);
 	if (ssp->irq < 0)
-		return NULL;
+		return ERR_PTR(ssp->irq);
 
 	ssp->type = type;
-	ssp->pdev = pdev;
-	ssp->port_id = pxa2xx_spi_get_port_id(adev);
+	ssp->dev = &pdev->dev;
+	ssp->port_id = pxa2xx_spi_get_port_id(&pdev->dev);
 
-	pdata->is_slave = of_property_read_bool(pdev->dev.of_node, "spi-slave");
+	pdata->is_slave = device_property_read_bool(&pdev->dev, "spi-slave");
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
 	pdata->dma_burst_size = 1;
@@ -1666,9 +1677,9 @@
 	platform_info = dev_get_platdata(dev);
 	if (!platform_info) {
 		platform_info = pxa2xx_spi_init_pdata(pdev);
-		if (!platform_info) {
+		if (IS_ERR(platform_info)) {
 			dev_err(&pdev->dev, "missing platform data\n");
-			return -ENODEV;
+			return PTR_ERR(platform_info);
 		}
 	}
 
@@ -1914,11 +1925,7 @@
 static int pxa2xx_spi_remove(struct platform_device *pdev)
 {
 	struct driver_data *drv_data = platform_get_drvdata(pdev);
-	struct ssp_device *ssp;
-
-	if (!drv_data)
-		return 0;
-	ssp = drv_data->ssp;
+	struct ssp_device *ssp = drv_data->ssp;
 
 	pm_runtime_get_sync(&pdev->dev);
 
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index 250fd60..8863be3 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -2,12 +2,14 @@
 // Copyright (c) 2017-2018, The Linux foundation. All rights reserved.
 
 #include <linux/clk.h>
+#include <linux/interconnect.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_opp.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/spi-mem.h>
 
@@ -137,9 +139,12 @@
 struct qcom_qspi {
 	void __iomem *base;
 	struct device *dev;
-	struct clk_bulk_data clks[QSPI_NUM_CLKS];
+	struct clk_bulk_data *clks;
 	struct qspi_xfer xfer;
-	/* Lock to protect xfer and IRQ accessed registers */
+	struct icc_path *icc_path_cpu_to_qspi;
+	struct opp_table *opp_table;
+	unsigned long last_speed;
+	/* Lock to protect data accessed by IRQs */
 	spinlock_t lock;
 };
 
@@ -221,6 +226,38 @@
 	spin_unlock_irqrestore(&ctrl->lock, flags);
 }
 
+static int qcom_qspi_set_speed(struct qcom_qspi *ctrl, unsigned long speed_hz)
+{
+	int ret;
+	unsigned int avg_bw_cpu;
+
+	if (speed_hz == ctrl->last_speed)
+		return 0;
+
+	/* In regular operation (SBL_EN=1) core must be 4x transfer clock */
+	ret = dev_pm_opp_set_rate(ctrl->dev, speed_hz * 4);
+	if (ret) {
+		dev_err(ctrl->dev, "Failed to set core clk %d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * Set BW quota for CPU as driver supports FIFO mode only.
+	 * We don't have explicit peak requirement so keep it equal to avg_bw.
+	 */
+	avg_bw_cpu = Bps_to_icc(speed_hz);
+	ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, avg_bw_cpu, avg_bw_cpu);
+	if (ret) {
+		dev_err(ctrl->dev, "%s: ICC BW voting failed for cpu: %d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	ctrl->last_speed = speed_hz;
+
+	return 0;
+}
+
 static int qcom_qspi_transfer_one(struct spi_master *master,
 				  struct spi_device *slv,
 				  struct spi_transfer *xfer)
@@ -234,12 +271,9 @@
 	if (xfer->speed_hz)
 		speed_hz = xfer->speed_hz;
 
-	/* In regular operation (SBL_EN=1) core must be 4x transfer clock */
-	ret = clk_set_rate(ctrl->clks[QSPI_CLK_CORE].clk, speed_hz * 4);
-	if (ret) {
-		dev_err(ctrl->dev, "Failed to set core clk %d\n", ret);
+	ret = qcom_qspi_set_speed(ctrl, speed_hz);
+	if (ret)
 		return ret;
-	}
 
 	spin_lock_irqsave(&ctrl->lock, flags);
 
@@ -386,9 +420,8 @@
 	u32 int_status;
 	struct qcom_qspi *ctrl = dev_id;
 	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
 
-	spin_lock_irqsave(&ctrl->lock, flags);
+	spin_lock(&ctrl->lock);
 
 	int_status = readl(ctrl->base + MSTR_INT_STATUS);
 	writel(int_status, ctrl->base + MSTR_INT_STATUS);
@@ -416,7 +449,7 @@
 		spi_finalize_current_transfer(dev_get_drvdata(ctrl->dev));
 	}
 
-	spin_unlock_irqrestore(&ctrl->lock, flags);
+	spin_unlock(&ctrl->lock);
 	return ret;
 }
 
@@ -429,7 +462,7 @@
 
 	dev = &pdev->dev;
 
-	master = spi_alloc_master(dev, sizeof(*ctrl));
+	master = devm_spi_alloc_master(dev, sizeof(*ctrl));
 	if (!master)
 		return -ENOMEM;
 
@@ -440,25 +473,49 @@
 	spin_lock_init(&ctrl->lock);
 	ctrl->dev = dev;
 	ctrl->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(ctrl->base)) {
-		ret = PTR_ERR(ctrl->base);
-		goto exit_probe_master_put;
-	}
+	if (IS_ERR(ctrl->base))
+		return PTR_ERR(ctrl->base);
+
+	ctrl->clks = devm_kcalloc(dev, QSPI_NUM_CLKS,
+				  sizeof(*ctrl->clks), GFP_KERNEL);
+	if (!ctrl->clks)
+		return -ENOMEM;
 
 	ctrl->clks[QSPI_CLK_CORE].id = "core";
 	ctrl->clks[QSPI_CLK_IFACE].id = "iface";
 	ret = devm_clk_bulk_get(dev, QSPI_NUM_CLKS, ctrl->clks);
 	if (ret)
-		goto exit_probe_master_put;
+		return ret;
+
+	ctrl->icc_path_cpu_to_qspi = devm_of_icc_get(dev, "qspi-config");
+	if (IS_ERR(ctrl->icc_path_cpu_to_qspi))
+		return dev_err_probe(dev, PTR_ERR(ctrl->icc_path_cpu_to_qspi),
+				     "Failed to get cpu path\n");
+
+	/* Set BW vote for register access */
+	ret = icc_set_bw(ctrl->icc_path_cpu_to_qspi, Bps_to_icc(1000),
+				Bps_to_icc(1000));
+	if (ret) {
+		dev_err(ctrl->dev, "%s: ICC BW voting failed for cpu: %d\n",
+				__func__, ret);
+		return ret;
+	}
+
+	ret = icc_disable(ctrl->icc_path_cpu_to_qspi);
+	if (ret) {
+		dev_err(ctrl->dev, "%s: ICC disable failed for cpu: %d\n",
+				__func__, ret);
+		return ret;
+	}
 
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0)
-		goto exit_probe_master_put;
+		return ret;
 	ret = devm_request_irq(dev, ret, qcom_qspi_irq,
 			IRQF_TRIGGER_HIGH, dev_name(dev), ctrl);
 	if (ret) {
 		dev_err(dev, "Failed to request irq %d\n", ret);
-		goto exit_probe_master_put;
+		return ret;
 	}
 
 	master->max_speed_hz = 300000000;
@@ -474,6 +531,18 @@
 	master->handle_err = qcom_qspi_handle_err;
 	master->auto_runtime_pm = true;
 
+	ctrl->opp_table = dev_pm_opp_set_clkname(&pdev->dev, "core");
+	if (IS_ERR(ctrl->opp_table))
+		return PTR_ERR(ctrl->opp_table);
+	/* OPP table is optional */
+	ret = dev_pm_opp_of_add_table(&pdev->dev);
+	if (ret && ret != -ENODEV) {
+		dev_err(&pdev->dev, "invalid OPP table in device tree\n");
+		goto exit_probe_put_clkname;
+	}
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 250);
 	pm_runtime_enable(dev);
 
 	ret = spi_register_master(master);
@@ -481,9 +550,10 @@
 		return 0;
 
 	pm_runtime_disable(dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
 
-exit_probe_master_put:
-	spi_master_put(master);
+exit_probe_put_clkname:
+	dev_pm_opp_put_clkname(ctrl->opp_table);
 
 	return ret;
 }
@@ -491,11 +561,14 @@
 static int qcom_qspi_remove(struct platform_device *pdev)
 {
 	struct spi_master *master = platform_get_drvdata(pdev);
+	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
 
 	/* Unregister _before_ disabling pm_runtime() so we stop transfers */
 	spi_unregister_master(master);
 
 	pm_runtime_disable(&pdev->dev);
+	dev_pm_opp_of_remove_table(&pdev->dev);
+	dev_pm_opp_put_clkname(ctrl->opp_table);
 
 	return 0;
 }
@@ -504,9 +577,19 @@
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	int ret;
 
+	/* Drop the performance state vote */
+	dev_pm_opp_set_rate(dev, 0);
 	clk_bulk_disable_unprepare(QSPI_NUM_CLKS, ctrl->clks);
 
+	ret = icc_disable(ctrl->icc_path_cpu_to_qspi);
+	if (ret) {
+		dev_err_ratelimited(ctrl->dev, "%s: ICC disable failed for cpu: %d\n",
+			__func__, ret);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -514,8 +597,20 @@
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct qcom_qspi *ctrl = spi_master_get_devdata(master);
+	int ret;
 
-	return clk_bulk_prepare_enable(QSPI_NUM_CLKS, ctrl->clks);
+	ret = icc_enable(ctrl->icc_path_cpu_to_qspi);
+	if (ret) {
+		dev_err_ratelimited(ctrl->dev, "%s: ICC enable failed for cpu: %d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(QSPI_NUM_CLKS, ctrl->clks);
+	if (ret)
+		return ret;
+
+	return dev_pm_opp_set_rate(dev, ctrl->last_speed * 4);
 }
 
 static int __maybe_unused qcom_qspi_suspend(struct device *dev)
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index d1dfb52..d39dec6 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -848,7 +848,7 @@
 {
 	struct spi_qup *controller = spi_master_get_devdata(master);
 	unsigned long timeout, flags;
-	int ret = -EIO;
+	int ret;
 
 	ret = spi_qup_io_prep(spi, xfer);
 	if (ret)
@@ -932,11 +932,11 @@
 	int ret;
 
 	/* allocate dma resources, if available */
-	master->dma_rx = dma_request_slave_channel_reason(dev, "rx");
+	master->dma_rx = dma_request_chan(dev, "rx");
 	if (IS_ERR(master->dma_rx))
 		return PTR_ERR(master->dma_rx);
 
-	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	master->dma_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(master->dma_tx)) {
 		ret = PTR_ERR(master->dma_tx);
 		goto err_tx;
diff --git a/drivers/spi/spi-rb4xx.c b/drivers/spi/spi-rb4xx.c
index 1ad3f11..9f97d18 100644
--- a/drivers/spi/spi-rb4xx.c
+++ b/drivers/spi/spi-rb4xx.c
@@ -14,6 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/spi/spi.h>
+#include <linux/of.h>
 
 #include <asm/mach-ath79/ar71xx_regs.h>
 
@@ -150,6 +151,7 @@
 	if (IS_ERR(ahb_clk))
 		return PTR_ERR(ahb_clk);
 
+	master->dev.of_node = pdev->dev.of_node;
 	master->bus_num = 0;
 	master->num_chipselect = 3;
 	master->mode_bits = SPI_TX_DUAL;
@@ -158,6 +160,11 @@
 	master->transfer_one = rb4xx_transfer_one;
 	master->set_cs = rb4xx_set_cs;
 
+	rbspi = spi_master_get_devdata(master);
+	rbspi->base = spi_base;
+	rbspi->clk = ahb_clk;
+	platform_set_drvdata(pdev, rbspi);
+
 	err = devm_spi_register_master(&pdev->dev, master);
 	if (err) {
 		dev_err(&pdev->dev, "failed to register SPI master\n");
@@ -168,11 +175,6 @@
 	if (err)
 		return err;
 
-	rbspi = spi_master_get_devdata(master);
-	rbspi->base = spi_base;
-	rbspi->clk = ahb_clk;
-	platform_set_drvdata(pdev, rbspi);
-
 	/* Enable SPI */
 	rb4xx_write(rbspi, AR71XX_SPI_REG_FS, AR71XX_SPI_FS_GPIO);
 
@@ -188,11 +190,18 @@
 	return 0;
 }
 
+static const struct of_device_id rb4xx_spi_dt_match[] = {
+	{ .compatible = "mikrotik,rb4xx-spi" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rb4xx_spi_dt_match);
+
 static struct platform_driver rb4xx_spi_drv = {
 	.probe = rb4xx_spi_probe,
 	.remove = rb4xx_spi_remove,
 	.driver = {
 		.name = "rb4xx-spi",
+		.of_match_table = of_match_ptr(rb4xx_spi_dt_match),
 	},
 };
 
diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 008b64f..a9f9702 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c
@@ -39,8 +39,9 @@
 #define ROCKCHIP_SPI_RISR			0x0034
 #define ROCKCHIP_SPI_ICR			0x0038
 #define ROCKCHIP_SPI_DMACR			0x003c
-#define ROCKCHIP_SPI_DMATDLR		0x0040
-#define ROCKCHIP_SPI_DMARDLR		0x0044
+#define ROCKCHIP_SPI_DMATDLR			0x0040
+#define ROCKCHIP_SPI_DMARDLR			0x0044
+#define ROCKCHIP_SPI_VERSION			0x0048
 #define ROCKCHIP_SPI_TXDR			0x0400
 #define ROCKCHIP_SPI_RXDR			0x0800
 
@@ -156,6 +157,8 @@
 #define ROCKCHIP_SPI_MAX_TRANLEN		0xffff
 
 #define ROCKCHIP_SPI_MAX_CS_NUM			2
+#define ROCKCHIP_SPI_VER2_TYPE1			0x05EC0002
+#define ROCKCHIP_SPI_VER2_TYPE2			0x00110002
 
 struct rockchip_spi {
 	struct device *dev;
@@ -183,6 +186,8 @@
 	u8 rsd;
 
 	bool cs_asserted[ROCKCHIP_SPI_MAX_CS_NUM];
+
+	bool slave_abort;
 };
 
 static inline void spi_enable_chip(struct rockchip_spi *rs, bool enable)
@@ -204,23 +209,23 @@
 
 static u32 get_fifo_len(struct rockchip_spi *rs)
 {
-	u32 fifo;
+	u32 ver;
 
-	for (fifo = 2; fifo < 32; fifo++) {
-		writel_relaxed(fifo, rs->regs + ROCKCHIP_SPI_TXFTLR);
-		if (fifo != readl_relaxed(rs->regs + ROCKCHIP_SPI_TXFTLR))
-			break;
+	ver = readl_relaxed(rs->regs + ROCKCHIP_SPI_VERSION);
+
+	switch (ver) {
+	case ROCKCHIP_SPI_VER2_TYPE1:
+	case ROCKCHIP_SPI_VER2_TYPE2:
+		return 64;
+	default:
+		return 32;
 	}
-
-	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_TXFTLR);
-
-	return (fifo == 31) ? 0 : fifo;
 }
 
 static void rockchip_spi_set_cs(struct spi_device *spi, bool enable)
 {
-	struct spi_master *master = spi->master;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = spi->controller;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	bool cs_asserted = !enable;
 
 	/* Return immediately for no-op */
@@ -244,10 +249,10 @@
 	rs->cs_asserted[spi->chip_select] = cs_asserted;
 }
 
-static void rockchip_spi_handle_err(struct spi_master *master,
+static void rockchip_spi_handle_err(struct spi_controller *ctlr,
 				    struct spi_message *msg)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	/* stop running spi transfer
 	 * this also flushes both rx and tx fifos
@@ -258,10 +263,10 @@
 	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
 
 	if (atomic_read(&rs->state) & TXDMA)
-		dmaengine_terminate_async(master->dma_tx);
+		dmaengine_terminate_async(ctlr->dma_tx);
 
 	if (atomic_read(&rs->state) & RXDMA)
-		dmaengine_terminate_async(master->dma_rx);
+		dmaengine_terminate_async(ctlr->dma_rx);
 }
 
 static void rockchip_spi_pio_writer(struct rockchip_spi *rs)
@@ -319,8 +324,8 @@
 
 static irqreturn_t rockchip_spi_isr(int irq, void *dev_id)
 {
-	struct spi_master *master = dev_id;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_id;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	if (rs->tx_left)
 		rockchip_spi_pio_writer(rs);
@@ -329,7 +334,7 @@
 	if (!rs->rx_left) {
 		spi_enable_chip(rs, false);
 		writel_relaxed(0, rs->regs + ROCKCHIP_SPI_IMR);
-		spi_finalize_current_transfer(master);
+		spi_finalize_current_transfer(ctlr);
 	}
 
 	return IRQ_HANDLED;
@@ -355,35 +360,48 @@
 
 static void rockchip_spi_dma_rxcb(void *data)
 {
-	struct spi_master *master = data;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = data;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(RXDMA, &rs->state);
 
-	if (state & TXDMA)
+	if (state & TXDMA && !rs->slave_abort)
 		return;
 
 	spi_enable_chip(rs, false);
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(ctlr);
 }
 
 static void rockchip_spi_dma_txcb(void *data)
 {
-	struct spi_master *master = data;
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = data;
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	int state = atomic_fetch_andnot(TXDMA, &rs->state);
 
-	if (state & RXDMA)
+	if (state & RXDMA && !rs->slave_abort)
 		return;
 
 	/* Wait until the FIFO data completely. */
 	wait_for_idle(rs);
 
 	spi_enable_chip(rs, false);
-	spi_finalize_current_transfer(master);
+	spi_finalize_current_transfer(ctlr);
+}
+
+static u32 rockchip_spi_calc_burst_size(u32 data_len)
+{
+	u32 i;
+
+	/* burst size: 1, 2, 4, 8 */
+	for (i = 1; i < 8; i <<= 1) {
+		if (data_len & i)
+			break;
+	}
+
+	return i;
 }
 
 static int rockchip_spi_prepare_dma(struct rockchip_spi *rs,
-		struct spi_master *master, struct spi_transfer *xfer)
+		struct spi_controller *ctlr, struct spi_transfer *xfer)
 {
 	struct dma_async_tx_descriptor *rxdesc, *txdesc;
 
@@ -395,20 +413,21 @@
 			.direction = DMA_DEV_TO_MEM,
 			.src_addr = rs->dma_addr_rx,
 			.src_addr_width = rs->n_bytes,
-			.src_maxburst = 1,
+			.src_maxburst = rockchip_spi_calc_burst_size(xfer->len /
+								     rs->n_bytes),
 		};
 
-		dmaengine_slave_config(master->dma_rx, &rxconf);
+		dmaengine_slave_config(ctlr->dma_rx, &rxconf);
 
 		rxdesc = dmaengine_prep_slave_sg(
-				master->dma_rx,
+				ctlr->dma_rx,
 				xfer->rx_sg.sgl, xfer->rx_sg.nents,
 				DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
 		if (!rxdesc)
 			return -EINVAL;
 
 		rxdesc->callback = rockchip_spi_dma_rxcb;
-		rxdesc->callback_param = master;
+		rxdesc->callback_param = ctlr;
 	}
 
 	txdesc = NULL;
@@ -420,27 +439,27 @@
 			.dst_maxburst = rs->fifo_len / 4,
 		};
 
-		dmaengine_slave_config(master->dma_tx, &txconf);
+		dmaengine_slave_config(ctlr->dma_tx, &txconf);
 
 		txdesc = dmaengine_prep_slave_sg(
-				master->dma_tx,
+				ctlr->dma_tx,
 				xfer->tx_sg.sgl, xfer->tx_sg.nents,
 				DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
 		if (!txdesc) {
 			if (rxdesc)
-				dmaengine_terminate_sync(master->dma_rx);
+				dmaengine_terminate_sync(ctlr->dma_rx);
 			return -EINVAL;
 		}
 
 		txdesc->callback = rockchip_spi_dma_txcb;
-		txdesc->callback_param = master;
+		txdesc->callback_param = ctlr;
 	}
 
 	/* rx must be started before tx due to spi instinct */
 	if (rxdesc) {
 		atomic_or(RXDMA, &rs->state);
 		dmaengine_submit(rxdesc);
-		dma_async_issue_pending(master->dma_rx);
+		dma_async_issue_pending(ctlr->dma_rx);
 	}
 
 	spi_enable_chip(rs, true);
@@ -448,16 +467,16 @@
 	if (txdesc) {
 		atomic_or(TXDMA, &rs->state);
 		dmaengine_submit(txdesc);
-		dma_async_issue_pending(master->dma_tx);
+		dma_async_issue_pending(ctlr->dma_tx);
 	}
 
 	/* 1 means the transfer is in progress */
 	return 1;
 }
 
-static void rockchip_spi_config(struct rockchip_spi *rs,
+static int rockchip_spi_config(struct rockchip_spi *rs,
 		struct spi_device *spi, struct spi_transfer *xfer,
-		bool use_dma)
+		bool use_dma, bool slave_mode)
 {
 	u32 cr0 = CR0_FRF_SPI  << CR0_FRF_OFFSET
 	        | CR0_BHT_8BIT << CR0_BHT_OFFSET
@@ -466,6 +485,10 @@
 	u32 cr1;
 	u32 dmacr = 0;
 
+	if (slave_mode)
+		cr0 |= CR0_OPM_SLAVE << CR0_OPM_OFFSET;
+	rs->slave_abort = false;
+
 	cr0 |= rs->rsd << CR0_RSD_OFFSET;
 	cr0 |= (spi->mode & 0x3U) << CR0_SCPH_OFFSET;
 	if (spi->mode & SPI_LSB_FIRST)
@@ -493,10 +516,12 @@
 		break;
 	default:
 		/* we only whitelist 4, 8 and 16 bit words in
-		 * master->bits_per_word_mask, so this shouldn't
+		 * ctlr->bits_per_word_mask, so this shouldn't
 		 * happen
 		 */
-		unreachable();
+		dev_err(rs->dev, "unknown bits per word: %d\n",
+			xfer->bits_per_word);
+		return -EINVAL;
 	}
 
 	if (use_dma) {
@@ -519,7 +544,8 @@
 		writel_relaxed(rs->fifo_len / 2 - 1, rs->regs + ROCKCHIP_SPI_RXFTLR);
 
 	writel_relaxed(rs->fifo_len / 2, rs->regs + ROCKCHIP_SPI_DMATDLR);
-	writel_relaxed(0, rs->regs + ROCKCHIP_SPI_DMARDLR);
+	writel_relaxed(rockchip_spi_calc_burst_size(xfer->len / rs->n_bytes) - 1,
+		       rs->regs + ROCKCHIP_SPI_DMARDLR);
 	writel_relaxed(dmacr, rs->regs + ROCKCHIP_SPI_DMACR);
 
 	/* the hardware only supports an even clock divisor, so
@@ -528,6 +554,8 @@
 	 */
 	writel_relaxed(2 * DIV_ROUND_UP(rs->freq, 2 * xfer->speed_hz),
 			rs->regs + ROCKCHIP_SPI_BAUDR);
+
+	return 0;
 }
 
 static size_t rockchip_spi_max_transfer_size(struct spi_device *spi)
@@ -535,14 +563,37 @@
 	return ROCKCHIP_SPI_MAX_TRANLEN;
 }
 
+static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
+{
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+
+	if (atomic_read(&rs->state) & RXDMA)
+		dmaengine_terminate_sync(ctlr->dma_rx);
+	if (atomic_read(&rs->state) & TXDMA)
+		dmaengine_terminate_sync(ctlr->dma_tx);
+	atomic_set(&rs->state, 0);
+	spi_enable_chip(rs, false);
+	rs->slave_abort = true;
+	complete(&ctlr->xfer_completion);
+
+	return 0;
+}
+
 static int rockchip_spi_transfer_one(
-		struct spi_master *master,
+		struct spi_controller *ctlr,
 		struct spi_device *spi,
 		struct spi_transfer *xfer)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
+	int ret;
 	bool use_dma;
 
+	/* Zero length transfers won't trigger an interrupt on completion */
+	if (!xfer->len) {
+		spi_finalize_current_transfer(ctlr);
+		return 1;
+	}
+
 	WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) &&
 		(readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY));
 
@@ -558,21 +609,23 @@
 
 	rs->n_bytes = xfer->bits_per_word <= 8 ? 1 : 2;
 
-	use_dma = master->can_dma ? master->can_dma(master, spi, xfer) : false;
+	use_dma = ctlr->can_dma ? ctlr->can_dma(ctlr, spi, xfer) : false;
 
-	rockchip_spi_config(rs, spi, xfer, use_dma);
+	ret = rockchip_spi_config(rs, spi, xfer, use_dma, ctlr->slave);
+	if (ret)
+		return ret;
 
 	if (use_dma)
-		return rockchip_spi_prepare_dma(rs, master, xfer);
+		return rockchip_spi_prepare_dma(rs, ctlr, xfer);
 
 	return rockchip_spi_prepare_irq(rs, xfer);
 }
 
-static bool rockchip_spi_can_dma(struct spi_master *master,
+static bool rockchip_spi_can_dma(struct spi_controller *ctlr,
 				 struct spi_device *spi,
 				 struct spi_transfer *xfer)
 {
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 	unsigned int bytes_per_word = xfer->bits_per_word <= 8 ? 1 : 2;
 
 	/* if the numbor of spi words to transfer is less than the fifo
@@ -586,44 +639,55 @@
 {
 	int ret;
 	struct rockchip_spi *rs;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct resource *mem;
-	u32 rsd_nsecs;
+	struct device_node *np = pdev->dev.of_node;
+	u32 rsd_nsecs, num_cs;
+	bool slave_mode;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct rockchip_spi));
-	if (!master)
+	slave_mode = of_property_read_bool(np, "spi-slave");
+
+	if (slave_mode)
+		ctlr = spi_alloc_slave(&pdev->dev,
+				sizeof(struct rockchip_spi));
+	else
+		ctlr = spi_alloc_master(&pdev->dev,
+				sizeof(struct rockchip_spi));
+
+	if (!ctlr)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, master);
+	platform_set_drvdata(pdev, ctlr);
 
-	rs = spi_master_get_devdata(master);
+	rs = spi_controller_get_devdata(ctlr);
+	ctlr->slave = slave_mode;
 
 	/* Get basic io resource and map it */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	rs->regs = devm_ioremap_resource(&pdev->dev, mem);
 	if (IS_ERR(rs->regs)) {
 		ret =  PTR_ERR(rs->regs);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	rs->apb_pclk = devm_clk_get(&pdev->dev, "apb_pclk");
 	if (IS_ERR(rs->apb_pclk)) {
 		dev_err(&pdev->dev, "Failed to get apb_pclk\n");
 		ret = PTR_ERR(rs->apb_pclk);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	rs->spiclk = devm_clk_get(&pdev->dev, "spiclk");
 	if (IS_ERR(rs->spiclk)) {
 		dev_err(&pdev->dev, "Failed to get spi_pclk\n");
 		ret = PTR_ERR(rs->spiclk);
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	ret = clk_prepare_enable(rs->apb_pclk);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to enable apb_pclk\n");
-		goto err_put_master;
+		goto err_put_ctlr;
 	}
 
 	ret = clk_prepare_enable(rs->spiclk);
@@ -639,7 +703,7 @@
 		goto err_disable_spiclk;
 
 	ret = devm_request_threaded_irq(&pdev->dev, ret, rockchip_spi_isr, NULL,
-			IRQF_ONESHOT, dev_name(&pdev->dev), master);
+			IRQF_ONESHOT, dev_name(&pdev->dev), ctlr);
 	if (ret)
 		goto err_disable_spiclk;
 
@@ -673,78 +737,91 @@
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
-	master->auto_runtime_pm = true;
-	master->bus_num = pdev->id;
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
-	master->num_chipselect = ROCKCHIP_SPI_MAX_CS_NUM;
-	master->dev.of_node = pdev->dev.of_node;
-	master->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
-	master->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
-	master->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
+	ctlr->auto_runtime_pm = true;
+	ctlr->bus_num = pdev->id;
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LOOP | SPI_LSB_FIRST;
+	if (slave_mode) {
+		ctlr->mode_bits |= SPI_NO_CS;
+		ctlr->slave_abort = rockchip_spi_slave_abort;
+	} else {
+		ctlr->flags = SPI_MASTER_GPIO_SS;
+		ctlr->max_native_cs = ROCKCHIP_SPI_MAX_CS_NUM;
+		/*
+		 * rk spi0 has two native cs, spi1..5 one cs only
+		 * if num-cs is missing in the dts, default to 1
+		 */
+		if (of_property_read_u32(np, "num-cs", &num_cs))
+			num_cs = 1;
+		ctlr->num_chipselect = num_cs;
+		ctlr->use_gpio_descriptors = true;
+	}
+	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(16) | SPI_BPW_MASK(8) | SPI_BPW_MASK(4);
+	ctlr->min_speed_hz = rs->freq / BAUDR_SCKDV_MAX;
+	ctlr->max_speed_hz = min(rs->freq / BAUDR_SCKDV_MIN, MAX_SCLK_OUT);
 
-	master->set_cs = rockchip_spi_set_cs;
-	master->transfer_one = rockchip_spi_transfer_one;
-	master->max_transfer_size = rockchip_spi_max_transfer_size;
-	master->handle_err = rockchip_spi_handle_err;
-	master->flags = SPI_MASTER_GPIO_SS;
+	ctlr->set_cs = rockchip_spi_set_cs;
+	ctlr->transfer_one = rockchip_spi_transfer_one;
+	ctlr->max_transfer_size = rockchip_spi_max_transfer_size;
+	ctlr->handle_err = rockchip_spi_handle_err;
 
-	master->dma_tx = dma_request_chan(rs->dev, "tx");
-	if (IS_ERR(master->dma_tx)) {
+	ctlr->dma_tx = dma_request_chan(rs->dev, "tx");
+	if (IS_ERR(ctlr->dma_tx)) {
 		/* Check tx to see if we need defer probing driver */
-		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
+		if (PTR_ERR(ctlr->dma_tx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_disable_pm_runtime;
 		}
 		dev_warn(rs->dev, "Failed to request TX DMA channel\n");
-		master->dma_tx = NULL;
+		ctlr->dma_tx = NULL;
 	}
 
-	master->dma_rx = dma_request_chan(rs->dev, "rx");
-	if (IS_ERR(master->dma_rx)) {
-		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+	ctlr->dma_rx = dma_request_chan(rs->dev, "rx");
+	if (IS_ERR(ctlr->dma_rx)) {
+		if (PTR_ERR(ctlr->dma_rx) == -EPROBE_DEFER) {
 			ret = -EPROBE_DEFER;
 			goto err_free_dma_tx;
 		}
 		dev_warn(rs->dev, "Failed to request RX DMA channel\n");
-		master->dma_rx = NULL;
+		ctlr->dma_rx = NULL;
 	}
 
-	if (master->dma_tx && master->dma_rx) {
+	if (ctlr->dma_tx && ctlr->dma_rx) {
 		rs->dma_addr_tx = mem->start + ROCKCHIP_SPI_TXDR;
 		rs->dma_addr_rx = mem->start + ROCKCHIP_SPI_RXDR;
-		master->can_dma = rockchip_spi_can_dma;
+		ctlr->can_dma = rockchip_spi_can_dma;
 	}
 
-	ret = devm_spi_register_master(&pdev->dev, master);
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "Failed to register master\n");
+		dev_err(&pdev->dev, "Failed to register controller\n");
 		goto err_free_dma_rx;
 	}
 
 	return 0;
 
 err_free_dma_rx:
-	if (master->dma_rx)
-		dma_release_channel(master->dma_rx);
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
 err_free_dma_tx:
-	if (master->dma_tx)
-		dma_release_channel(master->dma_tx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
 err_disable_pm_runtime:
 	pm_runtime_disable(&pdev->dev);
 err_disable_spiclk:
 	clk_disable_unprepare(rs->spiclk);
 err_disable_apbclk:
 	clk_disable_unprepare(rs->apb_pclk);
-err_put_master:
-	spi_master_put(master);
+err_put_ctlr:
+	spi_controller_put(ctlr);
 
 	return ret;
 }
 
 static int rockchip_spi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = spi_master_get(platform_get_drvdata(pdev));
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = spi_controller_get(platform_get_drvdata(pdev));
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	pm_runtime_get_sync(&pdev->dev);
 
@@ -755,12 +832,12 @@
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	if (master->dma_tx)
-		dma_release_channel(master->dma_tx);
-	if (master->dma_rx)
-		dma_release_channel(master->dma_rx);
+	if (ctlr->dma_tx)
+		dma_release_channel(ctlr->dma_tx);
+	if (ctlr->dma_rx)
+		dma_release_channel(ctlr->dma_rx);
 
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return 0;
 }
@@ -769,9 +846,9 @@
 static int rockchip_spi_suspend(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
 
-	ret = spi_master_suspend(master);
+	ret = spi_controller_suspend(ctlr);
 	if (ret < 0)
 		return ret;
 
@@ -787,8 +864,8 @@
 static int rockchip_spi_resume(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	pinctrl_pm_select_default_state(dev);
 
@@ -796,7 +873,7 @@
 	if (ret < 0)
 		return ret;
 
-	ret = spi_master_resume(master);
+	ret = spi_controller_resume(ctlr);
 	if (ret < 0) {
 		clk_disable_unprepare(rs->spiclk);
 		clk_disable_unprepare(rs->apb_pclk);
@@ -809,8 +886,8 @@
 #ifdef CONFIG_PM
 static int rockchip_spi_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	clk_disable_unprepare(rs->spiclk);
 	clk_disable_unprepare(rs->apb_pclk);
@@ -821,8 +898,8 @@
 static int rockchip_spi_runtime_resume(struct device *dev)
 {
 	int ret;
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct rockchip_spi *rs = spi_master_get_devdata(master);
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+	struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
 
 	ret = clk_prepare_enable(rs->apb_pclk);
 	if (ret < 0)
@@ -843,14 +920,17 @@
 };
 
 static const struct of_device_id rockchip_spi_dt_match[] = {
-	{ .compatible = "rockchip,rv1108-spi", },
+	{ .compatible = "rockchip,px30-spi", },
 	{ .compatible = "rockchip,rk3036-spi", },
 	{ .compatible = "rockchip,rk3066-spi", },
 	{ .compatible = "rockchip,rk3188-spi", },
 	{ .compatible = "rockchip,rk3228-spi", },
 	{ .compatible = "rockchip,rk3288-spi", },
+	{ .compatible = "rockchip,rk3308-spi", },
+	{ .compatible = "rockchip,rk3328-spi", },
 	{ .compatible = "rockchip,rk3368-spi", },
 	{ .compatible = "rockchip,rk3399-spi", },
+	{ .compatible = "rockchip,rv1108-spi", },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, rockchip_spi_dt_match);
diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c
new file mode 100644
index 0000000..727d7cf
--- /dev/null
+++ b/drivers/spi/spi-rpc-if.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// RPC-IF SPI/QSPI/Octa driver
+//
+// Copyright (C) 2018 ~ 2019 Renesas Solutions Corp.
+// Copyright (C) 2019 Macronix International Co., Ltd.
+// Copyright (C) 2019 - 2020 Cogent Embedded, Inc.
+//
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi-mem.h>
+
+#include <memory/renesas-rpc-if.h>
+
+#include <asm/unaligned.h>
+
+static void rpcif_spi_mem_prepare(struct spi_device *spi_dev,
+				  const struct spi_mem_op *spi_op,
+				  u64 *offs, size_t *len)
+{
+	struct rpcif *rpc = spi_controller_get_devdata(spi_dev->controller);
+	struct rpcif_op rpc_op = { };
+
+	rpc_op.cmd.opcode = spi_op->cmd.opcode;
+	rpc_op.cmd.buswidth = spi_op->cmd.buswidth;
+
+	if (spi_op->addr.nbytes) {
+		rpc_op.addr.buswidth = spi_op->addr.buswidth;
+		rpc_op.addr.nbytes = spi_op->addr.nbytes;
+		rpc_op.addr.val = spi_op->addr.val;
+	}
+
+	if (spi_op->dummy.nbytes) {
+		rpc_op.dummy.buswidth = spi_op->dummy.buswidth;
+		rpc_op.dummy.ncycles  = spi_op->dummy.nbytes * 8 /
+					spi_op->dummy.buswidth;
+	}
+
+	if (spi_op->data.nbytes || (offs && len)) {
+		rpc_op.data.buswidth = spi_op->data.buswidth;
+		rpc_op.data.nbytes = spi_op->data.nbytes;
+		switch (spi_op->data.dir) {
+		case SPI_MEM_DATA_IN:
+			rpc_op.data.dir = RPCIF_DATA_IN;
+			rpc_op.data.buf.in = spi_op->data.buf.in;
+			break;
+		case SPI_MEM_DATA_OUT:
+			rpc_op.data.dir = RPCIF_DATA_OUT;
+			rpc_op.data.buf.out = spi_op->data.buf.out;
+			break;
+		case SPI_MEM_NO_DATA:
+			rpc_op.data.dir = RPCIF_NO_DATA;
+			break;
+		}
+	} else	{
+		rpc_op.data.dir = RPCIF_NO_DATA;
+	}
+
+	rpcif_prepare(rpc, &rpc_op, offs, len);
+}
+
+static bool rpcif_spi_mem_supports_op(struct spi_mem *mem,
+				      const struct spi_mem_op *op)
+{
+	if (!spi_mem_default_supports_op(mem, op))
+		return false;
+
+	if (op->data.buswidth > 4 || op->addr.buswidth > 4 ||
+	    op->dummy.buswidth > 4 || op->cmd.buswidth > 4 ||
+	    op->addr.nbytes > 4)
+		return false;
+
+	return true;
+}
+
+static ssize_t rpcif_spi_mem_dirmap_read(struct spi_mem_dirmap_desc *desc,
+					 u64 offs, size_t len, void *buf)
+{
+	struct rpcif *rpc =
+		spi_controller_get_devdata(desc->mem->spi->controller);
+
+	if (offs + desc->info.offset + len > U32_MAX)
+		return -EINVAL;
+
+	rpcif_spi_mem_prepare(desc->mem->spi, &desc->info.op_tmpl, &offs, &len);
+
+	return rpcif_dirmap_read(rpc, offs, len, buf);
+}
+
+static int rpcif_spi_mem_dirmap_create(struct spi_mem_dirmap_desc *desc)
+{
+	struct rpcif *rpc =
+		spi_controller_get_devdata(desc->mem->spi->controller);
+
+	if (desc->info.offset + desc->info.length > U32_MAX)
+		return -ENOTSUPP;
+
+	if (!rpcif_spi_mem_supports_op(desc->mem, &desc->info.op_tmpl))
+		return -ENOTSUPP;
+
+	if (!rpc->dirmap && desc->info.op_tmpl.data.dir == SPI_MEM_DATA_IN)
+		return -ENOTSUPP;
+
+	if (desc->info.op_tmpl.data.dir == SPI_MEM_DATA_OUT)
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static int rpcif_spi_mem_exec_op(struct spi_mem *mem,
+				 const struct spi_mem_op *op)
+{
+	struct rpcif *rpc =
+		spi_controller_get_devdata(mem->spi->controller);
+
+	rpcif_spi_mem_prepare(mem->spi, op, NULL, NULL);
+
+	return rpcif_manual_xfer(rpc);
+}
+
+static const struct spi_controller_mem_ops rpcif_spi_mem_ops = {
+	.supports_op	= rpcif_spi_mem_supports_op,
+	.exec_op	= rpcif_spi_mem_exec_op,
+	.dirmap_create	= rpcif_spi_mem_dirmap_create,
+	.dirmap_read	= rpcif_spi_mem_dirmap_read,
+};
+
+static int rpcif_spi_probe(struct platform_device *pdev)
+{
+	struct device *parent = pdev->dev.parent;
+	struct spi_controller *ctlr;
+	struct rpcif *rpc;
+	int error;
+
+	ctlr = devm_spi_alloc_master(&pdev->dev, sizeof(*rpc));
+	if (!ctlr)
+		return -ENOMEM;
+
+	rpc = spi_controller_get_devdata(ctlr);
+	error = rpcif_sw_init(rpc, parent);
+	if (error)
+		return error;
+
+	platform_set_drvdata(pdev, ctlr);
+
+	ctlr->dev.of_node = parent->of_node;
+
+	rpcif_enable_rpm(rpc);
+
+	ctlr->num_chipselect = 1;
+	ctlr->mem_ops = &rpcif_spi_mem_ops;
+
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_TX_QUAD | SPI_RX_QUAD;
+	ctlr->flags = SPI_CONTROLLER_HALF_DUPLEX;
+
+	rpcif_hw_init(rpc, false);
+
+	error = spi_register_controller(ctlr);
+	if (error) {
+		dev_err(&pdev->dev, "spi_register_controller failed\n");
+		rpcif_disable_rpm(rpc);
+	}
+
+	return error;
+}
+
+static int rpcif_spi_remove(struct platform_device *pdev)
+{
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct rpcif *rpc = spi_controller_get_devdata(ctlr);
+
+	spi_unregister_controller(ctlr);
+	rpcif_disable_rpm(rpc);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int rpcif_spi_suspend(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+
+	return spi_controller_suspend(ctlr);
+}
+
+static int rpcif_spi_resume(struct device *dev)
+{
+	struct spi_controller *ctlr = dev_get_drvdata(dev);
+
+	return spi_controller_resume(ctlr);
+}
+
+static SIMPLE_DEV_PM_OPS(rpcif_spi_pm_ops, rpcif_spi_suspend, rpcif_spi_resume);
+#define DEV_PM_OPS	(&rpcif_spi_pm_ops)
+#else
+#define DEV_PM_OPS	NULL
+#endif
+
+static struct platform_driver rpcif_spi_driver = {
+	.probe	= rpcif_spi_probe,
+	.remove	= rpcif_spi_remove,
+	.driver = {
+		.name	= "rpc-if-spi",
+		.pm	= DEV_PM_OPS,
+	},
+};
+module_platform_driver(rpcif_spi_driver);
+
+MODULE_DESCRIPTION("Renesas RPC-IF SPI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 7222c76..e39fd38 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -24,6 +24,7 @@
 #include <linux/sh_dma.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/rspi.h>
+#include <linux/spinlock.h>
 
 #define RSPI_SPCR		0x00	/* Control Register */
 #define RSPI_SSLP		0x01	/* Slave Select Polarity Register */
@@ -79,8 +80,7 @@
 #define SPCR_BSWAP		0x01	/* Byte Swap of read-data for DMAC */
 
 /* SSLP - Slave Select Polarity Register */
-#define SSLP_SSL1P		0x02	/* SSL1 Signal Polarity Setting */
-#define SSLP_SSL0P		0x01	/* SSL0 Signal Polarity Setting */
+#define SSLP_SSLP(i)		BIT(i)	/* SSLi Signal Polarity Setting */
 
 /* SPPCR - Pin Control Register */
 #define SPPCR_MOIFE		0x20	/* MOSI Idle Value Fixing Enable */
@@ -159,8 +159,9 @@
 #define SPCMD_SPIMOD_DUAL	SPCMD_SPIMOD0
 #define SPCMD_SPIMOD_QUAD	SPCMD_SPIMOD1
 #define SPCMD_SPRW		0x0010	/* SPI Read/Write Access (Dual/Quad) */
-#define SPCMD_SSLA_MASK		0x0030	/* SSL Assert Signal Setting (RSPI) */
+#define SPCMD_SSLA(i)		((i) << 4)	/* SSL Assert Signal Setting */
 #define SPCMD_BRDV_MASK		0x000c	/* Bit Rate Division Setting */
+#define SPCMD_BRDV(brdv)	((brdv) << 2)
 #define SPCMD_CPOL		0x0002	/* Clock Polarity Setting */
 #define SPCMD_CPHA		0x0001	/* Clock Phase Setting */
 
@@ -179,9 +180,11 @@
 
 struct rspi_data {
 	void __iomem *addr;
-	u32 max_speed_hz;
+	u32 speed_hz;
 	struct spi_controller *ctlr;
+	struct platform_device *pdev;
 	wait_queue_head_t wait;
+	spinlock_t lock;		/* Protects RMW-access to RSPI_SSLP */
 	struct clk *clk;
 	u16 spcmd;
 	u8 spsr;
@@ -239,25 +242,41 @@
 	int (*set_config_register)(struct rspi_data *rspi, int access_size);
 	int (*transfer_one)(struct spi_controller *ctlr,
 			    struct spi_device *spi, struct spi_transfer *xfer);
-	u16 mode_bits;
+	u16 extra_mode_bits;
+	u16 min_div;
+	u16 max_div;
 	u16 flags;
 	u16 fifo_size;
+	u8 num_hw_ss;
 };
 
+static void rspi_set_rate(struct rspi_data *rspi)
+{
+	unsigned long clksrc;
+	int brdv = 0, spbr;
+
+	clksrc = clk_get_rate(rspi->clk);
+	spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz) - 1;
+	while (spbr > 255 && brdv < 3) {
+		brdv++;
+		spbr = DIV_ROUND_UP(spbr + 1, 2) - 1;
+	}
+
+	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	rspi->spcmd |= SPCMD_BRDV(brdv);
+	rspi->speed_hz = DIV_ROUND_UP(clksrc, (2U << brdv) * (spbr + 1));
+}
+
 /*
  * functions for RSPI on legacy SH
  */
 static int rspi_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
-
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk),
-			    2 * rspi->max_speed_hz) - 1;
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	rspi_set_rate(rspi);
 
 	/* Disable dummy transmission, set 16-bit word access, 1 frame */
 	rspi_write8(rspi, 0, RSPI_SPDCR);
@@ -287,25 +306,11 @@
  */
 static int rspi_rz_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
-	int div = 0;
-	unsigned long clksrc;
-
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
-	clksrc = clk_get_rate(rspi->clk);
-	while (div < 3) {
-		if (rspi->max_speed_hz >= clksrc/4) /* 4=(CLK/2)/2 */
-			break;
-		div++;
-		clksrc /= 2;
-	}
-
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clksrc, 2 * rspi->max_speed_hz) - 1;
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
-	rspi->spcmd |= div << 2;
+	rspi_set_rate(rspi);
 
 	/* Disable dummy transmission, set byte access */
 	rspi_write8(rspi, SPDCR_SPLBYTE, RSPI_SPDCR);
@@ -332,14 +337,28 @@
  */
 static int qspi_set_config_register(struct rspi_data *rspi, int access_size)
 {
-	int spbr;
+	unsigned long clksrc;
+	int brdv = 0, spbr;
 
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk), 2 * rspi->max_speed_hz);
-	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	clksrc = clk_get_rate(rspi->clk);
+	if (rspi->speed_hz >= clksrc) {
+		spbr = 0;
+		rspi->speed_hz = clksrc;
+	} else {
+		spbr = DIV_ROUND_UP(clksrc, 2 * rspi->speed_hz);
+		while (spbr > 255 && brdv < 3) {
+			brdv++;
+			spbr = DIV_ROUND_UP(spbr, 2);
+		}
+		spbr = clamp(spbr, 0, 255);
+		rspi->speed_hz = DIV_ROUND_UP(clksrc, (2U << brdv) * spbr);
+	}
+	rspi_write8(rspi, spbr, RSPI_SPBR);
+	rspi->spcmd |= SPCMD_BRDV(brdv);
 
 	/* Disable dummy transmission, set byte access */
 	rspi_write8(rspi, 0, RSPI_SPDCR);
@@ -426,8 +445,6 @@
 	return n;
 }
 
-#define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
-
 static void rspi_enable_irq(const struct rspi_data *rspi, u8 enable)
 {
 	rspi_write8(rspi, rspi_read8(rspi, RSPI_SPCR) | enable, RSPI_SPCR);
@@ -620,9 +637,8 @@
 		dmaengine_terminate_all(rspi->ctlr->dma_rx);
 no_dma_rx:
 	if (ret == -EAGAIN) {
-		pr_warn_once("%s %s: DMA not available, falling back to PIO\n",
-			     dev_driver_string(&rspi->ctlr->dev),
-			     dev_name(&rspi->ctlr->dev));
+		dev_warn_once(&rspi->ctlr->dev,
+			      "DMA not available, falling back to PIO\n");
 	}
 	return ret;
 }
@@ -687,6 +703,8 @@
 {
 	int ret;
 
+	xfer->effective_speed_hz = rspi->speed_hz;
+
 	ret = rspi_dma_check_then_transfer(rspi, xfer);
 	if (ret != -EAGAIN)
 		return ret;
@@ -842,6 +860,7 @@
 {
 	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 
+	xfer->effective_speed_hz = rspi->speed_hz;
 	if (spi->mode & SPI_LOOP) {
 		return qspi_transfer_out_in(rspi, xfer);
 	} else if (xfer->tx_nbits > SPI_NBITS_SINGLE) {
@@ -921,27 +940,71 @@
 	return 0;
 }
 
+static int rspi_setup(struct spi_device *spi)
+{
+	struct rspi_data *rspi = spi_controller_get_devdata(spi->controller);
+	u8 sslp;
+
+	if (spi->cs_gpiod)
+		return 0;
+
+	pm_runtime_get_sync(&rspi->pdev->dev);
+	spin_lock_irq(&rspi->lock);
+
+	sslp = rspi_read8(rspi, RSPI_SSLP);
+	if (spi->mode & SPI_CS_HIGH)
+		sslp |= SSLP_SSLP(spi->chip_select);
+	else
+		sslp &= ~SSLP_SSLP(spi->chip_select);
+	rspi_write8(rspi, sslp, RSPI_SSLP);
+
+	spin_unlock_irq(&rspi->lock);
+	pm_runtime_put(&rspi->pdev->dev);
+	return 0;
+}
+
 static int rspi_prepare_message(struct spi_controller *ctlr,
 				struct spi_message *msg)
 {
 	struct rspi_data *rspi = spi_controller_get_devdata(ctlr);
 	struct spi_device *spi = msg->spi;
+	const struct spi_transfer *xfer;
 	int ret;
 
-	rspi->max_speed_hz = spi->max_speed_hz;
+	/*
+	 * As the Bit Rate Register must not be changed while the device is
+	 * active, all transfers in a message must use the same bit rate.
+	 * In theory, the sequencer could be enabled, and each Command Register
+	 * could divide the base bit rate by a different value.
+	 * However, most RSPI variants do not have Transfer Data Length
+	 * Multiplier Setting Registers, so each sequence step would be limited
+	 * to a single word, making this feature unsuitable for large
+	 * transfers, which would gain most from it.
+	 */
+	rspi->speed_hz = spi->max_speed_hz;
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		if (xfer->speed_hz < rspi->speed_hz)
+			rspi->speed_hz = xfer->speed_hz;
+	}
 
 	rspi->spcmd = SPCMD_SSLKP;
 	if (spi->mode & SPI_CPOL)
 		rspi->spcmd |= SPCMD_CPOL;
 	if (spi->mode & SPI_CPHA)
 		rspi->spcmd |= SPCMD_CPHA;
+	if (spi->mode & SPI_LSB_FIRST)
+		rspi->spcmd |= SPCMD_LSBF;
+
+	/* Configure slave signal to assert */
+	rspi->spcmd |= SPCMD_SSLA(spi->cs_gpiod ? rspi->ctlr->unused_native_cs
+						: spi->chip_select);
 
 	/* CMOS output mode and MOSI signal from previous transfer */
 	rspi->sppcr = 0;
 	if (spi->mode & SPI_LOOP)
 		rspi->sppcr |= SPPCR_SPLP;
 
-	set_config_register(rspi, 8);
+	rspi->ops->set_config_register(rspi, 8);
 
 	if (msg->spi->mode &
 	    (SPI_TX_DUAL | SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)) {
@@ -1120,27 +1183,33 @@
 static const struct spi_ops rspi_ops = {
 	.set_config_register =	rspi_set_config_register,
 	.transfer_one =		rspi_transfer_one,
-	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP,
+	.min_div =		2,
+	.max_div =		4096,
 	.flags =		SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,
+	.num_hw_ss =		2,
 };
 
 static const struct spi_ops rspi_rz_ops = {
 	.set_config_register =	rspi_rz_set_config_register,
 	.transfer_one =		rspi_rz_transfer_one,
-	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP,
+	.min_div =		2,
+	.max_div =		4096,
 	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		8,	/* 8 for TX, 32 for RX */
+	.num_hw_ss =		1,
 };
 
 static const struct spi_ops qspi_ops = {
 	.set_config_register =	qspi_set_config_register,
 	.transfer_one =		qspi_transfer_one,
-	.mode_bits =		SPI_CPHA | SPI_CPOL | SPI_LOOP |
-				SPI_TX_DUAL | SPI_TX_QUAD |
+	.extra_mode_bits =	SPI_TX_DUAL | SPI_TX_QUAD |
 				SPI_RX_DUAL | SPI_RX_QUAD,
+	.min_div =		1,
+	.max_div =		4080,
 	.flags =		SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX,
 	.fifo_size =		32,
+	.num_hw_ss =		1,
 };
 
 #ifdef CONFIG_OF
@@ -1199,6 +1268,7 @@
 	int ret;
 	const struct rspi_plat_data *rspi_pd;
 	const struct spi_ops *ops;
+	unsigned long clksrc;
 
 	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct rspi_data));
 	if (ctlr == NULL)
@@ -1218,13 +1288,6 @@
 			ctlr->num_chipselect = 2; /* default */
 	}
 
-	/* ops parameter check */
-	if (!ops->set_config_register) {
-		dev_err(&pdev->dev, "there is no set_config_register\n");
-		ret = -ENODEV;
-		goto error1;
-	}
-
 	rspi = spi_controller_get_devdata(ctlr);
 	platform_set_drvdata(pdev, rspi);
 	rspi->ops = ops;
@@ -1244,18 +1307,27 @@
 		goto error1;
 	}
 
+	rspi->pdev = pdev;
 	pm_runtime_enable(&pdev->dev);
 
 	init_waitqueue_head(&rspi->wait);
+	spin_lock_init(&rspi->lock);
 
 	ctlr->bus_num = pdev->id;
+	ctlr->setup = rspi_setup;
 	ctlr->auto_runtime_pm = true;
 	ctlr->transfer_one = ops->transfer_one;
 	ctlr->prepare_message = rspi_prepare_message;
 	ctlr->unprepare_message = rspi_unprepare_message;
-	ctlr->mode_bits = ops->mode_bits;
+	ctlr->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH | SPI_LSB_FIRST |
+			  SPI_LOOP | ops->extra_mode_bits;
+	clksrc = clk_get_rate(rspi->clk);
+	ctlr->min_speed_hz = DIV_ROUND_UP(clksrc, ops->max_div);
+	ctlr->max_speed_hz = DIV_ROUND_UP(clksrc, ops->min_div);
 	ctlr->flags = ops->flags;
 	ctlr->dev.of_node = pdev->dev.of_node;
+	ctlr->use_gpio_descriptors = true;
+	ctlr->max_native_cs = rspi->ops->num_hw_ss;
 
 	ret = platform_get_irq_byname_optional(pdev, "rx");
 	if (ret < 0) {
@@ -1314,8 +1386,6 @@
 
 static const struct platform_device_id spi_driver_ids[] = {
 	{ "rspi",	(kernel_ulong_t)&rspi_ops },
-	{ "rspi-rz",	(kernel_ulong_t)&rspi_rz_ops },
-	{ "qspi",	(kernel_ulong_t)&qspi_ops },
 	{},
 };
 
diff --git a/drivers/spi/spi-s3c24xx-fiq.S b/drivers/spi/spi-s3c24xx-fiq.S
deleted file mode 100644
index 68ea12b..0000000
--- a/drivers/spi/spi-s3c24xx-fiq.S
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* linux/drivers/spi/spi_s3c24xx_fiq.S
- *
- * Copyright 2009 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C24XX SPI - FIQ pseudo-DMA transfer code
-*/
-
-#include <linux/linkage.h>
-#include <asm/assembler.h>
-
-#include <mach/map.h>
-#include <mach/regs-irq.h>
-#include <plat/regs-spi.h>
-
-#include "spi-s3c24xx-fiq.h"
-
-	.text
-
-	@ entry to these routines is as follows, with the register names
-	@ defined in fiq.h so that they can be shared with the C files which
-	@ setup the calling registers.
-	@
-	@ fiq_rirq	The base of the IRQ registers to find S3C2410_SRCPND
-	@ fiq_rtmp	Temporary register to hold tx/rx data
-	@ fiq_rspi	The base of the SPI register block
-	@ fiq_rtx	The tx buffer pointer
-	@ fiq_rrx	The rx buffer pointer
-	@ fiq_rcount	The number of bytes to move
-
-	@ each entry starts with a word entry of how long it is
-	@ and an offset to the irq acknowledgment word
-
-ENTRY(s3c24xx_spi_fiq_rx)
-	.word	fiq_rx_end - fiq_rx_start
-	.word	fiq_rx_irq_ack - fiq_rx_start
-fiq_rx_start:
-	ldr	fiq_rtmp, fiq_rx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-	strb	fiq_rtmp, [ fiq_rrx ], #1
-
-	mov	fiq_rtmp, #0xff
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	@@ set IRQ controller so that next op will trigger IRQ
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_rx_irq_ack:
-	.word	0
-fiq_rx_end:
-
-ENTRY(s3c24xx_spi_fiq_txrx)
-	.word	fiq_txrx_end - fiq_txrx_start
-	.word	fiq_txrx_irq_ack - fiq_txrx_start
-fiq_txrx_start:
-
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-	strb	fiq_rtmp, [ fiq_rrx ], #1
-
-	ldr	fiq_rtmp, fiq_txrx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rtx ], #1
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_txrx_irq_ack:
-	.word	0
-
-fiq_txrx_end:
-
-ENTRY(s3c24xx_spi_fiq_tx)
-	.word	fiq_tx_end - fiq_tx_start
-	.word	fiq_tx_irq_ack - fiq_tx_start
-fiq_tx_start:
-	ldrb	fiq_rtmp, [ fiq_rspi, #  S3C2410_SPRDAT ]
-
-	ldr	fiq_rtmp, fiq_tx_irq_ack
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_SRCPND - S3C24XX_VA_IRQ ]
-
-	ldrb	fiq_rtmp, [ fiq_rtx ], #1
-	strb	fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ]
-
-	subs	fiq_rcount, fiq_rcount, #1
-	subsne	pc, lr, #4		@@ return, still have work to do
-
-	mov	fiq_rtmp, #0
-	str	fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD  - S3C24XX_VA_IRQ ]
-	subs	pc, lr, #4
-
-fiq_tx_irq_ack:
-	.word	0
-
-fiq_tx_end:
-
-	.end
diff --git a/drivers/spi/spi-s3c24xx-fiq.h b/drivers/spi/spi-s3c24xx-fiq.h
deleted file mode 100644
index 7786b0e..0000000
--- a/drivers/spi/spi-s3c24xx-fiq.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* linux/drivers/spi/spi_s3c24xx_fiq.h
- *
- * Copyright 2009 Simtec Electronics
- *	Ben Dooks <ben@simtec.co.uk>
- *
- * S3C24XX SPI - FIQ pseudo-DMA transfer support
-*/
-
-/* We have R8 through R13 to play with */
-
-#ifdef __ASSEMBLY__
-#define __REG_NR(x)     r##x
-#else
-#define __REG_NR(x)     (x)
-#endif
-
-#define fiq_rspi	__REG_NR(8)
-#define fiq_rtmp	__REG_NR(9)
-#define fiq_rrx		__REG_NR(10)
-#define fiq_rtx		__REG_NR(11)
-#define fiq_rcount	__REG_NR(12)
-#define fiq_rirq	__REG_NR(13)
diff --git a/drivers/spi/spi-s3c24xx-regs.h b/drivers/spi/spi-s3c24xx-regs.h
new file mode 100644
index 0000000..f51464a
--- /dev/null
+++ b/drivers/spi/spi-s3c24xx-regs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2004 Fetron GmbH
+ *
+ * S3C2410 SPI register definition
+ */
+
+#ifndef __SPI_S3C2410_H
+#define __SPI_S3C2410_H
+
+#define S3C2410_SPCON		(0x00)
+
+#define S3C2410_SPCON_SMOD_DMA	(2 << 5)	/* DMA mode */
+#define S3C2410_SPCON_SMOD_INT	(1 << 5)	/* interrupt mode */
+#define S3C2410_SPCON_SMOD_POLL	(0 << 5)	/* polling mode */
+#define S3C2410_SPCON_ENSCK	(1 << 4)	/* Enable SCK */
+#define S3C2410_SPCON_MSTR	(1 << 3)	/* Master:1, Slave:0 select */
+#define S3C2410_SPCON_CPOL_HIGH	(1 << 2)	/* Clock polarity select */
+#define S3C2410_SPCON_CPOL_LOW	(0 << 2)	/* Clock polarity select */
+
+#define S3C2410_SPCON_CPHA_FMTB	(1 << 1)	/* Clock Phase Select */
+#define S3C2410_SPCON_CPHA_FMTA	(0 << 1)	/* Clock Phase Select */
+
+#define S3C2410_SPSTA		(0x04)
+
+#define S3C2410_SPSTA_DCOL	(1 << 2)	/* Data Collision Error */
+#define S3C2410_SPSTA_MULD	(1 << 1)	/* Multi Master Error */
+#define S3C2410_SPSTA_READY	(1 << 0)	/* Data Tx/Rx ready */
+#define S3C2412_SPSTA_READY_ORG	(1 << 3)
+
+#define S3C2410_SPPIN		(0x08)
+
+#define S3C2410_SPPIN_ENMUL	(1 << 2)	/* Multi Master Error detect */
+#define S3C2410_SPPIN_RESERVED	(1 << 1)
+#define S3C2410_SPPIN_KEEP	(1 << 0)	/* Master Out keep */
+
+#define S3C2410_SPPRE		(0x0C)
+#define S3C2410_SPTDAT		(0x10)
+#define S3C2410_SPRDAT		(0x14)
+
+#endif /* __SPI_S3C2410_H */
diff --git a/drivers/spi/spi-s3c24xx.c b/drivers/spi/spi-s3c24xx.c
index 2d6e37f..d6f5169 100644
--- a/drivers/spi/spi-s3c24xx.c
+++ b/drivers/spi/spi-s3c24xx.c
@@ -19,16 +19,15 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/spi/s3c24xx.h>
+#include <linux/spi/s3c24xx-fiq.h>
 #include <linux/module.h>
 
-#include <plat/regs-spi.h>
-
 #include <asm/fiq.h>
 
-#include "spi-s3c24xx-fiq.h"
+#include "spi-s3c24xx-regs.h"
 
 /**
- * s3c24xx_spi_devstate - per device data
+ * struct s3c24xx_spi_devstate - per device data
  * @hz: Last frequency calculated for @sppre field.
  * @mode: Last mode setting for the @spcon field.
  * @spcon: Value to write to the SPCON register.
@@ -227,24 +226,9 @@
 struct spi_fiq_code {
 	u32	length;
 	u32	ack_offset;
-	u8	data[0];
+	u8	data[];
 };
 
-extern struct spi_fiq_code s3c24xx_spi_fiq_txrx;
-extern struct spi_fiq_code s3c24xx_spi_fiq_tx;
-extern struct spi_fiq_code s3c24xx_spi_fiq_rx;
-
-/**
- * ack_bit - turn IRQ into IRQ acknowledgement bit
- * @irq: The interrupt number
- *
- * Returns the bit to write to the interrupt acknowledge register.
- */
-static inline u32 ack_bit(unsigned int irq)
-{
-	return 1 << (irq - IRQ_EINT0);
-}
-
 /**
  * s3c24xx_spi_tryfiq - attempt to claim and setup FIQ for transfer
  * @hw: The hardware state.
@@ -261,6 +245,7 @@
 	struct pt_regs regs;
 	enum spi_fiq_mode mode;
 	struct spi_fiq_code *code;
+	u32 *ack_ptr = NULL;
 	int ret;
 
 	if (!hw->fiq_claimed) {
@@ -283,13 +268,10 @@
 	regs.uregs[fiq_rrx]  = (long)hw->rx;
 	regs.uregs[fiq_rtx]  = (long)hw->tx + 1;
 	regs.uregs[fiq_rcount] = hw->len - 1;
-	regs.uregs[fiq_rirq] = (long)S3C24XX_VA_IRQ;
 
 	set_fiq_regs(&regs);
 
 	if (hw->fiq_mode != mode) {
-		u32 *ack_ptr;
-
 		hw->fiq_mode = mode;
 
 		switch (mode) {
@@ -309,12 +291,10 @@
 		BUG_ON(!code);
 
 		ack_ptr = (u32 *)&code->data[code->ack_offset];
-		*ack_ptr = ack_bit(hw->irq);
-
 		set_fiq_handler(&code->data, code->length);
 	}
 
-	s3c24xx_set_fiq(hw->irq, true);
+	s3c24xx_set_fiq(hw->irq, ack_ptr, true);
 
 	hw->fiq_mode = mode;
 	hw->fiq_inuse = 1;
diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 1d948fe..dfa7c91 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c
@@ -29,7 +29,7 @@
 #define S3C64XX_SPI_CH_CFG		0x00
 #define S3C64XX_SPI_CLK_CFG		0x04
 #define S3C64XX_SPI_MODE_CFG		0x08
-#define S3C64XX_SPI_SLAVE_SEL		0x0C
+#define S3C64XX_SPI_CS_REG		0x0C
 #define S3C64XX_SPI_INT_EN		0x10
 #define S3C64XX_SPI_STATUS		0x14
 #define S3C64XX_SPI_TX_DATA		0x18
@@ -64,9 +64,9 @@
 #define S3C64XX_SPI_MODE_TXDMA_ON		(1<<1)
 #define S3C64XX_SPI_MODE_4BURST			(1<<0)
 
-#define S3C64XX_SPI_SLAVE_AUTO			(1<<1)
-#define S3C64XX_SPI_SLAVE_SIG_INACT		(1<<0)
-#define S3C64XX_SPI_SLAVE_NSC_CNT_2		(2<<4)
+#define S3C64XX_SPI_CS_NSC_CNT_2		(2<<4)
+#define S3C64XX_SPI_CS_AUTO			(1<<1)
+#define S3C64XX_SPI_CS_SIG_INACT		(1<<0)
 
 #define S3C64XX_SPI_INT_TRAILING_EN		(1<<6)
 #define S3C64XX_SPI_INT_RX_OVERRUN_EN		(1<<5)
@@ -131,9 +131,11 @@
  * @fifo_lvl_mask: Bit-mask for {TX|RX}_FIFO_LVL bits in SPI_STATUS register.
  * @rx_lvl_offset: Bit offset of RX_FIFO_LVL bits in SPI_STATUS regiter.
  * @tx_st_done: Bit offset of TX_DONE bit in SPI_STATUS regiter.
+ * @quirks: Bitmask of known quirks
  * @high_speed: True, if the controller supports HIGH_SPEED_EN bit.
  * @clk_from_cmu: True, if the controller does not include a clock mux and
  *	prescaler unit.
+ * @clk_ioclk: True if clock is present on this device
  *
  * The Samsung s3c64xx SPI controller are used on various Samsung SoC's but
  * differ in some aspects such as the size of the fifo and spi bus clock
@@ -155,19 +157,21 @@
  * @clk: Pointer to the spi clock.
  * @src_clk: Pointer to the clock used to generate SPI signals.
  * @ioclk: Pointer to the i/o clock between master and slave
+ * @pdev: Pointer to device's platform device data
  * @master: Pointer to the SPI Protocol master.
  * @cntrlr_info: Platform specific data for the controller this driver manages.
  * @lock: Controller specific lock.
  * @state: Set of FLAGS to indicate status.
- * @rx_dmach: Controller's DMA channel for Rx.
- * @tx_dmach: Controller's DMA channel for Tx.
  * @sfr_start: BUS address of SPI controller regs.
  * @regs: Pointer to ioremap'ed controller registers.
- * @irq: interrupt
  * @xfer_completion: To indicate completion of xfer task.
  * @cur_mode: Stores the active configuration of the controller.
  * @cur_bpw: Stores the active bits per word settings.
- * @cur_speed: Stores the active xfer clock speed.
+ * @cur_speed: Current clock speed
+ * @rx_dma: Local receive DMA data (e.g. chan and direction)
+ * @tx_dma: Local transmit DMA data (e.g. chan and direction)
+ * @port_conf: Local SPI port configuartion data
+ * @port_id: Port identification number
  */
 struct s3c64xx_spi_driver_data {
 	void __iomem                    *regs;
@@ -176,7 +180,7 @@
 	struct clk                      *ioclk;
 	struct platform_device          *pdev;
 	struct spi_master               *master;
-	struct s3c64xx_spi_info  *cntrlr_info;
+	struct s3c64xx_spi_info         *cntrlr_info;
 	spinlock_t                      lock;
 	unsigned long                   sfr_start;
 	struct completion               xfer_completion;
@@ -325,18 +329,18 @@
 
 	if (enable) {
 		if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO)) {
-			writel(0, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			writel(0, sdd->regs + S3C64XX_SPI_CS_REG);
 		} else {
-			u32 ssel = readl(sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			u32 ssel = readl(sdd->regs + S3C64XX_SPI_CS_REG);
 
-			ssel |= (S3C64XX_SPI_SLAVE_AUTO |
-						S3C64XX_SPI_SLAVE_NSC_CNT_2);
-			writel(ssel, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			ssel |= (S3C64XX_SPI_CS_AUTO |
+						S3C64XX_SPI_CS_NSC_CNT_2);
+			writel(ssel, sdd->regs + S3C64XX_SPI_CS_REG);
 		}
 	} else {
 		if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO))
-			writel(S3C64XX_SPI_SLAVE_SIG_INACT,
-			       sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+			writel(S3C64XX_SPI_CS_SIG_INACT,
+			       sdd->regs + S3C64XX_SPI_CS_REG);
 	}
 }
 
@@ -469,7 +473,8 @@
 
 	/* millisecs to xfer 'len' bytes @ 'cur_speed' */
 	ms = xfer->len * 8 * 1000 / sdd->cur_speed;
-	ms += 10; /* some tolerance */
+	ms += 30;               /* some tolerance */
+	ms = max(ms, 100);      /* minimum timeout */
 
 	val = msecs_to_jiffies(ms) + 10;
 	val = wait_for_completion_timeout(&sdd->xfer_completion, val);
@@ -622,6 +627,7 @@
 		ret = clk_set_rate(sdd->src_clk, sdd->cur_speed * 2);
 		if (ret)
 			return ret;
+		sdd->cur_speed = clk_get_rate(sdd->src_clk) / 2;
 	} else {
 		/* Configure Clock */
 		val = readl(regs + S3C64XX_SPI_CLK_CFG);
@@ -724,17 +730,28 @@
 
 		if (status) {
 			dev_err(&spi->dev,
-				"I/O Error: rx-%d tx-%d res:rx-%c tx-%c len-%d\n",
+				"I/O Error: rx-%d tx-%d rx-%c tx-%c len-%d dma-%d res-(%d)\n",
 				xfer->rx_buf ? 1 : 0, xfer->tx_buf ? 1 : 0,
 				(sdd->state & RXBUSY) ? 'f' : 'p',
 				(sdd->state & TXBUSY) ? 'f' : 'p',
-				xfer->len);
+				xfer->len, use_dma ? 1 : 0, status);
 
 			if (use_dma) {
-				if (xfer->tx_buf && (sdd->state & TXBUSY))
+				struct dma_tx_state s;
+
+				if (xfer->tx_buf && (sdd->state & TXBUSY)) {
+					dmaengine_pause(sdd->tx_dma.ch);
+					dmaengine_tx_status(sdd->tx_dma.ch, sdd->tx_dma.cookie, &s);
 					dmaengine_terminate_all(sdd->tx_dma.ch);
-				if (xfer->rx_buf && (sdd->state & RXBUSY))
+					dev_err(&spi->dev, "TX residue: %d\n", s.residue);
+
+				}
+				if (xfer->rx_buf && (sdd->state & RXBUSY)) {
+					dmaengine_pause(sdd->rx_dma.ch);
+					dmaengine_tx_status(sdd->rx_dma.ch, sdd->rx_dma.cookie, &s);
 					dmaengine_terminate_all(sdd->rx_dma.ch);
+					dev_err(&spi->dev, "RX residue: %d\n", s.residue);
+				}
 			}
 		} else {
 			s3c64xx_flush_fifo(sdd);
@@ -964,9 +981,9 @@
 	sdd->cur_speed = 0;
 
 	if (sci->no_cs)
-		writel(0, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+		writel(0, sdd->regs + S3C64XX_SPI_CS_REG);
 	else if (!(sdd->port_conf->quirks & S3C64XX_SPI_QUIRK_CS_AUTO))
-		writel(S3C64XX_SPI_SLAVE_SIG_INACT, sdd->regs + S3C64XX_SPI_SLAVE_SEL);
+		writel(S3C64XX_SPI_CS_SIG_INACT, sdd->regs + S3C64XX_SPI_CS_REG);
 
 	/* Disable Interrupts - we use Polling if not DMA mode */
 	writel(0, regs + S3C64XX_SPI_INT_EN);
@@ -1186,15 +1203,13 @@
 
 	if (!is_polling(sdd)) {
 		/* Acquire DMA channels */
-		sdd->rx_dma.ch = dma_request_slave_channel_reason(&pdev->dev,
-								  "rx");
+		sdd->rx_dma.ch = dma_request_chan(&pdev->dev, "rx");
 		if (IS_ERR(sdd->rx_dma.ch)) {
 			dev_err(&pdev->dev, "Failed to get RX DMA channel\n");
 			ret = PTR_ERR(sdd->rx_dma.ch);
 			goto err_disable_io_clk;
 		}
-		sdd->tx_dma.ch = dma_request_slave_channel_reason(&pdev->dev,
-								  "tx");
+		sdd->tx_dma.ch = dma_request_chan(&pdev->dev, "tx");
 		if (IS_ERR(sdd->tx_dma.ch)) {
 			dev_err(&pdev->dev, "Failed to get TX DMA channel\n");
 			ret = PTR_ERR(sdd->tx_dma.ch);
@@ -1363,6 +1378,10 @@
 
 	s3c64xx_spi_hwinit(sdd);
 
+	writel(S3C64XX_SPI_INT_RX_OVERRUN_EN | S3C64XX_SPI_INT_RX_UNDERRUN_EN |
+	       S3C64XX_SPI_INT_TX_OVERRUN_EN | S3C64XX_SPI_INT_TX_UNDERRUN_EN,
+	       sdd->regs + S3C64XX_SPI_INT_EN);
+
 	return 0;
 
 err_disable_src_clk:
@@ -1406,6 +1425,7 @@
 	.tx_st_done	= 25,
 	.high_speed	= true,
 	.clk_from_cmu	= true,
+	.quirks		= S3C64XX_SPI_QUIRK_CS_AUTO,
 };
 
 static struct s3c64xx_spi_port_config exynos7_spi_port_config = {
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index ced365e..297c512 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -211,8 +211,7 @@
 		}
 		status = 0;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 	}
 	m->status = status;
 	spi_finalize_current_message(master);
@@ -337,6 +336,6 @@
 
 module_i2c_driver(sc18is602_driver);
 
-MODULE_DESCRIPTION("SC18IC602/603 SPI Master Driver");
+MODULE_DESCRIPTION("SC18IS602/603 SPI Master Driver");
 MODULE_AUTHOR("Guenter Roeck");
 MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-sh-hspi.c b/drivers/spi/spi-sh-hspi.c
index 7f73f91..a62034e 100644
--- a/drivers/spi/spi-sh-hspi.c
+++ b/drivers/spi/spi-sh-hspi.c
@@ -190,8 +190,7 @@
 
 		msg->actual_length += t->len;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		if (cs_change) {
 			ndelay(nsecs);
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 8f13473..b2579af 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -14,8 +14,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
-#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -55,7 +53,6 @@
 	void *rx_dma_page;
 	dma_addr_t tx_dma_addr;
 	dma_addr_t rx_dma_addr;
-	unsigned short unused_ss;
 	bool native_cs_inited;
 	bool native_cs_high;
 	bool slave_aborted;
@@ -63,140 +60,140 @@
 
 #define MAX_SS	3	/* Maximum number of native chip selects */
 
-#define TMDR1	0x00	/* Transmit Mode Register 1 */
-#define TMDR2	0x04	/* Transmit Mode Register 2 */
-#define TMDR3	0x08	/* Transmit Mode Register 3 */
-#define RMDR1	0x10	/* Receive Mode Register 1 */
-#define RMDR2	0x14	/* Receive Mode Register 2 */
-#define RMDR3	0x18	/* Receive Mode Register 3 */
-#define TSCR	0x20	/* Transmit Clock Select Register */
-#define RSCR	0x22	/* Receive Clock Select Register (SH, A1, APE6) */
-#define CTR	0x28	/* Control Register */
-#define FCTR	0x30	/* FIFO Control Register */
-#define STR	0x40	/* Status Register */
-#define IER	0x44	/* Interrupt Enable Register */
-#define TDR1	0x48	/* Transmit Control Data Register 1 (SH, A1) */
-#define TDR2	0x4c	/* Transmit Control Data Register 2 (SH, A1) */
-#define TFDR	0x50	/* Transmit FIFO Data Register */
-#define RDR1	0x58	/* Receive Control Data Register 1 (SH, A1) */
-#define RDR2	0x5c	/* Receive Control Data Register 2 (SH, A1) */
-#define RFDR	0x60	/* Receive FIFO Data Register */
+#define SITMDR1	0x00	/* Transmit Mode Register 1 */
+#define SITMDR2	0x04	/* Transmit Mode Register 2 */
+#define SITMDR3	0x08	/* Transmit Mode Register 3 */
+#define SIRMDR1	0x10	/* Receive Mode Register 1 */
+#define SIRMDR2	0x14	/* Receive Mode Register 2 */
+#define SIRMDR3	0x18	/* Receive Mode Register 3 */
+#define SITSCR	0x20	/* Transmit Clock Select Register */
+#define SIRSCR	0x22	/* Receive Clock Select Register (SH, A1, APE6) */
+#define SICTR	0x28	/* Control Register */
+#define SIFCTR	0x30	/* FIFO Control Register */
+#define SISTR	0x40	/* Status Register */
+#define SIIER	0x44	/* Interrupt Enable Register */
+#define SITDR1	0x48	/* Transmit Control Data Register 1 (SH, A1) */
+#define SITDR2	0x4c	/* Transmit Control Data Register 2 (SH, A1) */
+#define SITFDR	0x50	/* Transmit FIFO Data Register */
+#define SIRDR1	0x58	/* Receive Control Data Register 1 (SH, A1) */
+#define SIRDR2	0x5c	/* Receive Control Data Register 2 (SH, A1) */
+#define SIRFDR	0x60	/* Receive FIFO Data Register */
 
-/* TMDR1 and RMDR1 */
-#define MDR1_TRMD	   BIT(31)  /* Transfer Mode (1 = Master mode) */
-#define MDR1_SYNCMD_MASK   GENMASK(29, 28) /* SYNC Mode */
-#define MDR1_SYNCMD_SPI	   (2 << 28)/*   Level mode/SPI */
-#define MDR1_SYNCMD_LR	   (3 << 28)/*   L/R mode */
-#define MDR1_SYNCAC_SHIFT  25       /* Sync Polarity (1 = Active-low) */
-#define MDR1_BITLSB_SHIFT  24       /* MSB/LSB First (1 = LSB first) */
-#define MDR1_DTDL_SHIFT	   20       /* Data Pin Bit Delay for MSIOF_SYNC */
-#define MDR1_SYNCDL_SHIFT  16       /* Frame Sync Signal Timing Delay */
-#define MDR1_FLD_MASK	   GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */
-#define MDR1_FLD_SHIFT	   2
-#define MDR1_XXSTP	   BIT(0)   /* Transmission/Reception Stop on FIFO */
-/* TMDR1 */
-#define TMDR1_PCON	   BIT(30)  /* Transfer Signal Connection */
-#define TMDR1_SYNCCH_MASK  GENMASK(27, 26) /* Sync Signal Channel Select */
-#define TMDR1_SYNCCH_SHIFT 26       /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
+/* SITMDR1 and SIRMDR1 */
+#define SIMDR1_TRMD		BIT(31)		/* Transfer Mode (1 = Master mode) */
+#define SIMDR1_SYNCMD_MASK	GENMASK(29, 28)	/* SYNC Mode */
+#define SIMDR1_SYNCMD_SPI	(2 << 28)	/*   Level mode/SPI */
+#define SIMDR1_SYNCMD_LR	(3 << 28)	/*   L/R mode */
+#define SIMDR1_SYNCAC_SHIFT	25		/* Sync Polarity (1 = Active-low) */
+#define SIMDR1_BITLSB_SHIFT	24		/* MSB/LSB First (1 = LSB first) */
+#define SIMDR1_DTDL_SHIFT	20		/* Data Pin Bit Delay for MSIOF_SYNC */
+#define SIMDR1_SYNCDL_SHIFT	16		/* Frame Sync Signal Timing Delay */
+#define SIMDR1_FLD_MASK		GENMASK(3, 2)	/* Frame Sync Signal Interval (0-3) */
+#define SIMDR1_FLD_SHIFT	2
+#define SIMDR1_XXSTP		BIT(0)		/* Transmission/Reception Stop on FIFO */
+/* SITMDR1 */
+#define SITMDR1_PCON		BIT(30)		/* Transfer Signal Connection */
+#define SITMDR1_SYNCCH_MASK	GENMASK(27, 26)	/* Sync Signal Channel Select */
+#define SITMDR1_SYNCCH_SHIFT	26		/* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */
 
-/* TMDR2 and RMDR2 */
-#define MDR2_BITLEN1(i)	(((i) - 1) << 24) /* Data Size (8-32 bits) */
-#define MDR2_WDLEN1(i)	(((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */
-#define MDR2_GRPMASK1	BIT(0)      /* Group Output Mask 1 (SH, A1) */
+/* SITMDR2 and SIRMDR2 */
+#define SIMDR2_BITLEN1(i)	(((i) - 1) << 24) /* Data Size (8-32 bits) */
+#define SIMDR2_WDLEN1(i)	(((i) - 1) << 16) /* Word Count (1-64/256 (SH, A1))) */
+#define SIMDR2_GRPMASK1		BIT(0)		/* Group Output Mask 1 (SH, A1) */
 
-/* TSCR and RSCR */
-#define SCR_BRPS_MASK	GENMASK(12, 8) /* Prescaler Setting (1-32) */
-#define SCR_BRPS(i)	(((i) - 1) << 8)
-#define SCR_BRDV_MASK	GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */
-#define SCR_BRDV_DIV_2	0
-#define SCR_BRDV_DIV_4	1
-#define SCR_BRDV_DIV_8	2
-#define SCR_BRDV_DIV_16	3
-#define SCR_BRDV_DIV_32	4
-#define SCR_BRDV_DIV_1	7
+/* SITSCR and SIRSCR */
+#define SISCR_BRPS_MASK		GENMASK(12, 8)	/* Prescaler Setting (1-32) */
+#define SISCR_BRPS(i)		(((i) - 1) << 8)
+#define SISCR_BRDV_MASK		GENMASK(2, 0)	/* Baud Rate Generator's Division Ratio */
+#define SISCR_BRDV_DIV_2	0
+#define SISCR_BRDV_DIV_4	1
+#define SISCR_BRDV_DIV_8	2
+#define SISCR_BRDV_DIV_16	3
+#define SISCR_BRDV_DIV_32	4
+#define SISCR_BRDV_DIV_1	7
 
-/* CTR */
-#define CTR_TSCKIZ_MASK	GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */
-#define CTR_TSCKIZ_SCK	BIT(31)   /*   Disable SCK when TX disabled */
-#define CTR_TSCKIZ_POL_SHIFT 30   /*   Transmit Clock Polarity */
-#define CTR_RSCKIZ_MASK	GENMASK(29, 28) /* Receive Clock Polarity Select */
-#define CTR_RSCKIZ_SCK	BIT(29)   /*   Must match CTR_TSCKIZ_SCK */
-#define CTR_RSCKIZ_POL_SHIFT 28   /*   Receive Clock Polarity */
-#define CTR_TEDG_SHIFT	     27   /* Transmit Timing (1 = falling edge) */
-#define CTR_REDG_SHIFT	     26   /* Receive Timing (1 = falling edge) */
-#define CTR_TXDIZ_MASK	GENMASK(23, 22) /* Pin Output When TX is Disabled */
-#define CTR_TXDIZ_LOW	(0 << 22) /*   0 */
-#define CTR_TXDIZ_HIGH	(1 << 22) /*   1 */
-#define CTR_TXDIZ_HIZ	(2 << 22) /*   High-impedance */
-#define CTR_TSCKE	BIT(15)   /* Transmit Serial Clock Output Enable */
-#define CTR_TFSE	BIT(14)   /* Transmit Frame Sync Signal Output Enable */
-#define CTR_TXE		BIT(9)    /* Transmit Enable */
-#define CTR_RXE		BIT(8)    /* Receive Enable */
-#define CTR_TXRST	BIT(1)    /* Transmit Reset */
-#define CTR_RXRST	BIT(0)    /* Receive Reset */
+/* SICTR */
+#define SICTR_TSCKIZ_MASK	GENMASK(31, 30)	/* Transmit Clock I/O Polarity Select */
+#define SICTR_TSCKIZ_SCK	BIT(31)		/*   Disable SCK when TX disabled */
+#define SICTR_TSCKIZ_POL_SHIFT	30		/*   Transmit Clock Polarity */
+#define SICTR_RSCKIZ_MASK	GENMASK(29, 28) /* Receive Clock Polarity Select */
+#define SICTR_RSCKIZ_SCK	BIT(29)		/*   Must match CTR_TSCKIZ_SCK */
+#define SICTR_RSCKIZ_POL_SHIFT	28		/*   Receive Clock Polarity */
+#define SICTR_TEDG_SHIFT	27		/* Transmit Timing (1 = falling edge) */
+#define SICTR_REDG_SHIFT	26		/* Receive Timing (1 = falling edge) */
+#define SICTR_TXDIZ_MASK	GENMASK(23, 22)	/* Pin Output When TX is Disabled */
+#define SICTR_TXDIZ_LOW		(0 << 22)	/*   0 */
+#define SICTR_TXDIZ_HIGH	(1 << 22)	/*   1 */
+#define SICTR_TXDIZ_HIZ		(2 << 22)	/*   High-impedance */
+#define SICTR_TSCKE		BIT(15)		/* Transmit Serial Clock Output Enable */
+#define SICTR_TFSE		BIT(14)		/* Transmit Frame Sync Signal Output Enable */
+#define SICTR_TXE		BIT(9)		/* Transmit Enable */
+#define SICTR_RXE		BIT(8)		/* Receive Enable */
+#define SICTR_TXRST		BIT(1)		/* Transmit Reset */
+#define SICTR_RXRST		BIT(0)		/* Receive Reset */
 
-/* FCTR */
-#define FCTR_TFWM_MASK	GENMASK(31, 29) /* Transmit FIFO Watermark */
-#define FCTR_TFWM_64	(0 << 29) /*  Transfer Request when 64 empty stages */
-#define FCTR_TFWM_32	(1 << 29) /*  Transfer Request when 32 empty stages */
-#define FCTR_TFWM_24	(2 << 29) /*  Transfer Request when 24 empty stages */
-#define FCTR_TFWM_16	(3 << 29) /*  Transfer Request when 16 empty stages */
-#define FCTR_TFWM_12	(4 << 29) /*  Transfer Request when 12 empty stages */
-#define FCTR_TFWM_8	(5 << 29) /*  Transfer Request when 8 empty stages */
-#define FCTR_TFWM_4	(6 << 29) /*  Transfer Request when 4 empty stages */
-#define FCTR_TFWM_1	(7 << 29) /*  Transfer Request when 1 empty stage */
-#define FCTR_TFUA_MASK	GENMASK(26, 20) /* Transmit FIFO Usable Area */
-#define FCTR_TFUA_SHIFT	20
-#define FCTR_TFUA(i)	((i) << FCTR_TFUA_SHIFT)
-#define FCTR_RFWM_MASK	GENMASK(15, 13) /* Receive FIFO Watermark */
-#define FCTR_RFWM_1	(0 << 13) /*  Transfer Request when 1 valid stages */
-#define FCTR_RFWM_4	(1 << 13) /*  Transfer Request when 4 valid stages */
-#define FCTR_RFWM_8	(2 << 13) /*  Transfer Request when 8 valid stages */
-#define FCTR_RFWM_16	(3 << 13) /*  Transfer Request when 16 valid stages */
-#define FCTR_RFWM_32	(4 << 13) /*  Transfer Request when 32 valid stages */
-#define FCTR_RFWM_64	(5 << 13) /*  Transfer Request when 64 valid stages */
-#define FCTR_RFWM_128	(6 << 13) /*  Transfer Request when 128 valid stages */
-#define FCTR_RFWM_256	(7 << 13) /*  Transfer Request when 256 valid stages */
-#define FCTR_RFUA_MASK	GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */
-#define FCTR_RFUA_SHIFT	4
-#define FCTR_RFUA(i)	((i) << FCTR_RFUA_SHIFT)
+/* SIFCTR */
+#define SIFCTR_TFWM_MASK	GENMASK(31, 29)	/* Transmit FIFO Watermark */
+#define SIFCTR_TFWM_64		(0 << 29)	/*  Transfer Request when 64 empty stages */
+#define SIFCTR_TFWM_32		(1 << 29)	/*  Transfer Request when 32 empty stages */
+#define SIFCTR_TFWM_24		(2 << 29)	/*  Transfer Request when 24 empty stages */
+#define SIFCTR_TFWM_16		(3 << 29)	/*  Transfer Request when 16 empty stages */
+#define SIFCTR_TFWM_12		(4 << 29)	/*  Transfer Request when 12 empty stages */
+#define SIFCTR_TFWM_8		(5 << 29)	/*  Transfer Request when 8 empty stages */
+#define SIFCTR_TFWM_4		(6 << 29)	/*  Transfer Request when 4 empty stages */
+#define SIFCTR_TFWM_1		(7 << 29)	/*  Transfer Request when 1 empty stage */
+#define SIFCTR_TFUA_MASK	GENMASK(26, 20) /* Transmit FIFO Usable Area */
+#define SIFCTR_TFUA_SHIFT	20
+#define SIFCTR_TFUA(i)		((i) << SIFCTR_TFUA_SHIFT)
+#define SIFCTR_RFWM_MASK	GENMASK(15, 13)	/* Receive FIFO Watermark */
+#define SIFCTR_RFWM_1		(0 << 13)	/*  Transfer Request when 1 valid stages */
+#define SIFCTR_RFWM_4		(1 << 13)	/*  Transfer Request when 4 valid stages */
+#define SIFCTR_RFWM_8		(2 << 13)	/*  Transfer Request when 8 valid stages */
+#define SIFCTR_RFWM_16		(3 << 13)	/*  Transfer Request when 16 valid stages */
+#define SIFCTR_RFWM_32		(4 << 13)	/*  Transfer Request when 32 valid stages */
+#define SIFCTR_RFWM_64		(5 << 13)	/*  Transfer Request when 64 valid stages */
+#define SIFCTR_RFWM_128		(6 << 13)	/*  Transfer Request when 128 valid stages */
+#define SIFCTR_RFWM_256		(7 << 13)	/*  Transfer Request when 256 valid stages */
+#define SIFCTR_RFUA_MASK	GENMASK(12, 4)	/* Receive FIFO Usable Area (0x40 = full) */
+#define SIFCTR_RFUA_SHIFT	4
+#define SIFCTR_RFUA(i)		((i) << SIFCTR_RFUA_SHIFT)
 
-/* STR */
-#define STR_TFEMP	BIT(29) /* Transmit FIFO Empty */
-#define STR_TDREQ	BIT(28) /* Transmit Data Transfer Request */
-#define STR_TEOF	BIT(23) /* Frame Transmission End */
-#define STR_TFSERR	BIT(21) /* Transmit Frame Synchronization Error */
-#define STR_TFOVF	BIT(20) /* Transmit FIFO Overflow */
-#define STR_TFUDF	BIT(19) /* Transmit FIFO Underflow */
-#define STR_RFFUL	BIT(13) /* Receive FIFO Full */
-#define STR_RDREQ	BIT(12) /* Receive Data Transfer Request */
-#define STR_REOF	BIT(7)  /* Frame Reception End */
-#define STR_RFSERR	BIT(5)  /* Receive Frame Synchronization Error */
-#define STR_RFUDF	BIT(4)  /* Receive FIFO Underflow */
-#define STR_RFOVF	BIT(3)  /* Receive FIFO Overflow */
+/* SISTR */
+#define SISTR_TFEMP		BIT(29) /* Transmit FIFO Empty */
+#define SISTR_TDREQ		BIT(28) /* Transmit Data Transfer Request */
+#define SISTR_TEOF		BIT(23) /* Frame Transmission End */
+#define SISTR_TFSERR		BIT(21) /* Transmit Frame Synchronization Error */
+#define SISTR_TFOVF		BIT(20) /* Transmit FIFO Overflow */
+#define SISTR_TFUDF		BIT(19) /* Transmit FIFO Underflow */
+#define SISTR_RFFUL		BIT(13) /* Receive FIFO Full */
+#define SISTR_RDREQ		BIT(12) /* Receive Data Transfer Request */
+#define SISTR_REOF		BIT(7)  /* Frame Reception End */
+#define SISTR_RFSERR		BIT(5)  /* Receive Frame Synchronization Error */
+#define SISTR_RFUDF		BIT(4)  /* Receive FIFO Underflow */
+#define SISTR_RFOVF		BIT(3)  /* Receive FIFO Overflow */
 
-/* IER */
-#define IER_TDMAE	BIT(31) /* Transmit Data DMA Transfer Req. Enable */
-#define IER_TFEMPE	BIT(29) /* Transmit FIFO Empty Enable */
-#define IER_TDREQE	BIT(28) /* Transmit Data Transfer Request Enable */
-#define IER_TEOFE	BIT(23) /* Frame Transmission End Enable */
-#define IER_TFSERRE	BIT(21) /* Transmit Frame Sync Error Enable */
-#define IER_TFOVFE	BIT(20) /* Transmit FIFO Overflow Enable */
-#define IER_TFUDFE	BIT(19) /* Transmit FIFO Underflow Enable */
-#define IER_RDMAE	BIT(15) /* Receive Data DMA Transfer Req. Enable */
-#define IER_RFFULE	BIT(13) /* Receive FIFO Full Enable */
-#define IER_RDREQE	BIT(12) /* Receive Data Transfer Request Enable */
-#define IER_REOFE	BIT(7)  /* Frame Reception End Enable */
-#define IER_RFSERRE	BIT(5)  /* Receive Frame Sync Error Enable */
-#define IER_RFUDFE	BIT(4)  /* Receive FIFO Underflow Enable */
-#define IER_RFOVFE	BIT(3)  /* Receive FIFO Overflow Enable */
+/* SIIER */
+#define SIIER_TDMAE		BIT(31) /* Transmit Data DMA Transfer Req. Enable */
+#define SIIER_TFEMPE		BIT(29) /* Transmit FIFO Empty Enable */
+#define SIIER_TDREQE		BIT(28) /* Transmit Data Transfer Request Enable */
+#define SIIER_TEOFE		BIT(23) /* Frame Transmission End Enable */
+#define SIIER_TFSERRE		BIT(21) /* Transmit Frame Sync Error Enable */
+#define SIIER_TFOVFE		BIT(20) /* Transmit FIFO Overflow Enable */
+#define SIIER_TFUDFE		BIT(19) /* Transmit FIFO Underflow Enable */
+#define SIIER_RDMAE		BIT(15) /* Receive Data DMA Transfer Req. Enable */
+#define SIIER_RFFULE		BIT(13) /* Receive FIFO Full Enable */
+#define SIIER_RDREQE		BIT(12) /* Receive Data Transfer Request Enable */
+#define SIIER_REOFE		BIT(7)  /* Frame Reception End Enable */
+#define SIIER_RFSERRE		BIT(5)  /* Receive Frame Sync Error Enable */
+#define SIIER_RFUDFE		BIT(4)  /* Receive FIFO Underflow Enable */
+#define SIIER_RFOVFE		BIT(3)  /* Receive FIFO Overflow Enable */
 
 
 static u32 sh_msiof_read(struct sh_msiof_spi_priv *p, int reg_offs)
 {
 	switch (reg_offs) {
-	case TSCR:
-	case RSCR:
+	case SITSCR:
+	case SIRSCR:
 		return ioread16(p->mapbase + reg_offs);
 	default:
 		return ioread32(p->mapbase + reg_offs);
@@ -207,8 +204,8 @@
 			   u32 value)
 {
 	switch (reg_offs) {
-	case TSCR:
-	case RSCR:
+	case SITSCR:
+	case SIRSCR:
 		iowrite16(value, p->mapbase + reg_offs);
 		break;
 	default:
@@ -223,12 +220,12 @@
 	u32 mask = clr | set;
 	u32 data;
 
-	data = sh_msiof_read(p, CTR);
+	data = sh_msiof_read(p, SICTR);
 	data &= ~clr;
 	data |= set;
-	sh_msiof_write(p, CTR, data);
+	sh_msiof_write(p, SICTR, data);
 
-	return readl_poll_timeout_atomic(p->mapbase + CTR, data,
+	return readl_poll_timeout_atomic(p->mapbase + SICTR, data,
 					 (data & mask) == set, 1, 100);
 }
 
@@ -237,7 +234,7 @@
 	struct sh_msiof_spi_priv *p = data;
 
 	/* just disable the interrupt and wake up */
-	sh_msiof_write(p, IER, 0);
+	sh_msiof_write(p, SIIER, 0);
 	complete(&p->done);
 
 	return IRQ_HANDLED;
@@ -245,20 +242,20 @@
 
 static void sh_msiof_spi_reset_regs(struct sh_msiof_spi_priv *p)
 {
-	u32 mask = CTR_TXRST | CTR_RXRST;
+	u32 mask = SICTR_TXRST | SICTR_RXRST;
 	u32 data;
 
-	data = sh_msiof_read(p, CTR);
+	data = sh_msiof_read(p, SICTR);
 	data |= mask;
-	sh_msiof_write(p, CTR, data);
+	sh_msiof_write(p, SICTR, data);
 
-	readl_poll_timeout_atomic(p->mapbase + CTR, data, !(data & mask), 1,
+	readl_poll_timeout_atomic(p->mapbase + SICTR, data, !(data & mask), 1,
 				  100);
 }
 
 static const u32 sh_msiof_spi_div_array[] = {
-	SCR_BRDV_DIV_1, SCR_BRDV_DIV_2,	 SCR_BRDV_DIV_4,
-	SCR_BRDV_DIV_8,	SCR_BRDV_DIV_16, SCR_BRDV_DIV_32,
+	SISCR_BRDV_DIV_1, SISCR_BRDV_DIV_2, SISCR_BRDV_DIV_4,
+	SISCR_BRDV_DIV_8, SISCR_BRDV_DIV_16, SISCR_BRDV_DIV_32,
 };
 
 static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p,
@@ -276,7 +273,7 @@
 
 	div = DIV_ROUND_UP(parent_rate, spi_hz);
 	if (div <= 1024) {
-		/* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
+		/* SISCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
 		if (!div_pow && div <= 32 && div > 2)
 			div_pow = 1;
 
@@ -295,10 +292,10 @@
 		brps = 32;
 	}
 
-	scr = sh_msiof_spi_div_array[div_pow] | SCR_BRPS(brps);
-	sh_msiof_write(p, TSCR, scr);
+	scr = sh_msiof_spi_div_array[div_pow] | SISCR_BRPS(brps);
+	sh_msiof_write(p, SITSCR, scr);
 	if (!(p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
-		sh_msiof_write(p, RSCR, scr);
+		sh_msiof_write(p, SIRSCR, scr);
 }
 
 static u32 sh_msiof_get_delay_bit(u32 dtdl_or_syncdl)
@@ -337,8 +334,8 @@
 		return 0;
 	}
 
-	val = sh_msiof_get_delay_bit(p->info->dtdl) << MDR1_DTDL_SHIFT;
-	val |= sh_msiof_get_delay_bit(p->info->syncdl) << MDR1_SYNCDL_SHIFT;
+	val = sh_msiof_get_delay_bit(p->info->dtdl) << SIMDR1_DTDL_SHIFT;
+	val |= sh_msiof_get_delay_bit(p->info->syncdl) << SIMDR1_SYNCDL_SHIFT;
 
 	return val;
 }
@@ -357,54 +354,54 @@
 	 *    1    0         11     11    0    0
 	 *    1    1         11     11    1    1
 	 */
-	tmp = MDR1_SYNCMD_SPI | 1 << MDR1_FLD_SHIFT | MDR1_XXSTP;
-	tmp |= !cs_high << MDR1_SYNCAC_SHIFT;
-	tmp |= lsb_first << MDR1_BITLSB_SHIFT;
+	tmp = SIMDR1_SYNCMD_SPI | 1 << SIMDR1_FLD_SHIFT | SIMDR1_XXSTP;
+	tmp |= !cs_high << SIMDR1_SYNCAC_SHIFT;
+	tmp |= lsb_first << SIMDR1_BITLSB_SHIFT;
 	tmp |= sh_msiof_spi_get_dtdl_and_syncdl(p);
 	if (spi_controller_is_slave(p->ctlr)) {
-		sh_msiof_write(p, TMDR1, tmp | TMDR1_PCON);
+		sh_msiof_write(p, SITMDR1, tmp | SITMDR1_PCON);
 	} else {
-		sh_msiof_write(p, TMDR1,
-			       tmp | MDR1_TRMD | TMDR1_PCON |
-			       (ss < MAX_SS ? ss : 0) << TMDR1_SYNCCH_SHIFT);
+		sh_msiof_write(p, SITMDR1,
+			       tmp | SIMDR1_TRMD | SITMDR1_PCON |
+			       (ss < MAX_SS ? ss : 0) << SITMDR1_SYNCCH_SHIFT);
 	}
 	if (p->ctlr->flags & SPI_CONTROLLER_MUST_TX) {
 		/* These bits are reserved if RX needs TX */
 		tmp &= ~0x0000ffff;
 	}
-	sh_msiof_write(p, RMDR1, tmp);
+	sh_msiof_write(p, SIRMDR1, tmp);
 
 	tmp = 0;
-	tmp |= CTR_TSCKIZ_SCK | cpol << CTR_TSCKIZ_POL_SHIFT;
-	tmp |= CTR_RSCKIZ_SCK | cpol << CTR_RSCKIZ_POL_SHIFT;
+	tmp |= SICTR_TSCKIZ_SCK | cpol << SICTR_TSCKIZ_POL_SHIFT;
+	tmp |= SICTR_RSCKIZ_SCK | cpol << SICTR_RSCKIZ_POL_SHIFT;
 
 	edge = cpol ^ !cpha;
 
-	tmp |= edge << CTR_TEDG_SHIFT;
-	tmp |= edge << CTR_REDG_SHIFT;
-	tmp |= tx_hi_z ? CTR_TXDIZ_HIZ : CTR_TXDIZ_LOW;
-	sh_msiof_write(p, CTR, tmp);
+	tmp |= edge << SICTR_TEDG_SHIFT;
+	tmp |= edge << SICTR_REDG_SHIFT;
+	tmp |= tx_hi_z ? SICTR_TXDIZ_HIZ : SICTR_TXDIZ_LOW;
+	sh_msiof_write(p, SICTR, tmp);
 }
 
 static void sh_msiof_spi_set_mode_regs(struct sh_msiof_spi_priv *p,
 				       const void *tx_buf, void *rx_buf,
 				       u32 bits, u32 words)
 {
-	u32 dr2 = MDR2_BITLEN1(bits) | MDR2_WDLEN1(words);
+	u32 dr2 = SIMDR2_BITLEN1(bits) | SIMDR2_WDLEN1(words);
 
 	if (tx_buf || (p->ctlr->flags & SPI_CONTROLLER_MUST_TX))
-		sh_msiof_write(p, TMDR2, dr2);
+		sh_msiof_write(p, SITMDR2, dr2);
 	else
-		sh_msiof_write(p, TMDR2, dr2 | MDR2_GRPMASK1);
+		sh_msiof_write(p, SITMDR2, dr2 | SIMDR2_GRPMASK1);
 
 	if (rx_buf)
-		sh_msiof_write(p, RMDR2, dr2);
+		sh_msiof_write(p, SIRMDR2, dr2);
 }
 
 static void sh_msiof_reset_str(struct sh_msiof_spi_priv *p)
 {
-	sh_msiof_write(p, STR,
-		       sh_msiof_read(p, STR) & ~(STR_TDREQ | STR_RDREQ));
+	sh_msiof_write(p, SISTR,
+		       sh_msiof_read(p, SISTR) & ~(SISTR_TDREQ | SISTR_RDREQ));
 }
 
 static void sh_msiof_spi_write_fifo_8(struct sh_msiof_spi_priv *p,
@@ -414,7 +411,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, buf_8[k] << fs);
+		sh_msiof_write(p, SITFDR, buf_8[k] << fs);
 }
 
 static void sh_msiof_spi_write_fifo_16(struct sh_msiof_spi_priv *p,
@@ -424,7 +421,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, buf_16[k] << fs);
+		sh_msiof_write(p, SITFDR, buf_16[k] << fs);
 }
 
 static void sh_msiof_spi_write_fifo_16u(struct sh_msiof_spi_priv *p,
@@ -434,7 +431,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, get_unaligned(&buf_16[k]) << fs);
+		sh_msiof_write(p, SITFDR, get_unaligned(&buf_16[k]) << fs);
 }
 
 static void sh_msiof_spi_write_fifo_32(struct sh_msiof_spi_priv *p,
@@ -444,7 +441,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, buf_32[k] << fs);
+		sh_msiof_write(p, SITFDR, buf_32[k] << fs);
 }
 
 static void sh_msiof_spi_write_fifo_32u(struct sh_msiof_spi_priv *p,
@@ -454,7 +451,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, get_unaligned(&buf_32[k]) << fs);
+		sh_msiof_write(p, SITFDR, get_unaligned(&buf_32[k]) << fs);
 }
 
 static void sh_msiof_spi_write_fifo_s32(struct sh_msiof_spi_priv *p,
@@ -464,7 +461,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, swab32(buf_32[k] << fs));
+		sh_msiof_write(p, SITFDR, swab32(buf_32[k] << fs));
 }
 
 static void sh_msiof_spi_write_fifo_s32u(struct sh_msiof_spi_priv *p,
@@ -474,7 +471,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		sh_msiof_write(p, TFDR, swab32(get_unaligned(&buf_32[k]) << fs));
+		sh_msiof_write(p, SITFDR, swab32(get_unaligned(&buf_32[k]) << fs));
 }
 
 static void sh_msiof_spi_read_fifo_8(struct sh_msiof_spi_priv *p,
@@ -484,7 +481,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		buf_8[k] = sh_msiof_read(p, RFDR) >> fs;
+		buf_8[k] = sh_msiof_read(p, SIRFDR) >> fs;
 }
 
 static void sh_msiof_spi_read_fifo_16(struct sh_msiof_spi_priv *p,
@@ -494,7 +491,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		buf_16[k] = sh_msiof_read(p, RFDR) >> fs;
+		buf_16[k] = sh_msiof_read(p, SIRFDR) >> fs;
 }
 
 static void sh_msiof_spi_read_fifo_16u(struct sh_msiof_spi_priv *p,
@@ -504,7 +501,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		put_unaligned(sh_msiof_read(p, RFDR) >> fs, &buf_16[k]);
+		put_unaligned(sh_msiof_read(p, SIRFDR) >> fs, &buf_16[k]);
 }
 
 static void sh_msiof_spi_read_fifo_32(struct sh_msiof_spi_priv *p,
@@ -514,7 +511,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		buf_32[k] = sh_msiof_read(p, RFDR) >> fs;
+		buf_32[k] = sh_msiof_read(p, SIRFDR) >> fs;
 }
 
 static void sh_msiof_spi_read_fifo_32u(struct sh_msiof_spi_priv *p,
@@ -524,7 +521,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		put_unaligned(sh_msiof_read(p, RFDR) >> fs, &buf_32[k]);
+		put_unaligned(sh_msiof_read(p, SIRFDR) >> fs, &buf_32[k]);
 }
 
 static void sh_msiof_spi_read_fifo_s32(struct sh_msiof_spi_priv *p,
@@ -534,7 +531,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		buf_32[k] = swab32(sh_msiof_read(p, RFDR) >> fs);
+		buf_32[k] = swab32(sh_msiof_read(p, SIRFDR) >> fs);
 }
 
 static void sh_msiof_spi_read_fifo_s32u(struct sh_msiof_spi_priv *p,
@@ -544,7 +541,7 @@
 	int k;
 
 	for (k = 0; k < words; k++)
-		put_unaligned(swab32(sh_msiof_read(p, RFDR) >> fs), &buf_32[k]);
+		put_unaligned(swab32(sh_msiof_read(p, SIRFDR) >> fs), &buf_32[k]);
 }
 
 static int sh_msiof_spi_setup(struct spi_device *spi)
@@ -561,17 +558,17 @@
 		return 0;
 
 	/* Configure native chip select mode/polarity early */
-	clr = MDR1_SYNCMD_MASK;
-	set = MDR1_SYNCMD_SPI;
+	clr = SIMDR1_SYNCMD_MASK;
+	set = SIMDR1_SYNCMD_SPI;
 	if (spi->mode & SPI_CS_HIGH)
-		clr |= BIT(MDR1_SYNCAC_SHIFT);
+		clr |= BIT(SIMDR1_SYNCAC_SHIFT);
 	else
-		set |= BIT(MDR1_SYNCAC_SHIFT);
+		set |= BIT(SIMDR1_SYNCAC_SHIFT);
 	pm_runtime_get_sync(&p->pdev->dev);
-	tmp = sh_msiof_read(p, TMDR1) & ~clr;
-	sh_msiof_write(p, TMDR1, tmp | set | MDR1_TRMD | TMDR1_PCON);
-	tmp = sh_msiof_read(p, RMDR1) & ~clr;
-	sh_msiof_write(p, RMDR1, tmp | set);
+	tmp = sh_msiof_read(p, SITMDR1) & ~clr;
+	sh_msiof_write(p, SITMDR1, tmp | set | SIMDR1_TRMD | SITMDR1_PCON);
+	tmp = sh_msiof_read(p, SIRMDR1) & ~clr;
+	sh_msiof_write(p, SIRMDR1, tmp | set);
 	pm_runtime_put(&p->pdev->dev);
 	p->native_cs_high = spi->mode & SPI_CS_HIGH;
 	p->native_cs_inited = true;
@@ -587,7 +584,7 @@
 
 	/* Configure pins before asserting CS */
 	if (spi->cs_gpiod) {
-		ss = p->unused_ss;
+		ss = ctlr->unused_native_cs;
 		cs_high = p->native_cs_high;
 	} else {
 		ss = spi->chip_select;
@@ -607,15 +604,15 @@
 
 	/* setup clock and rx/tx signals */
 	if (!slave)
-		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TSCKE);
+		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TSCKE);
 	if (rx_buf && !ret)
-		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_RXE);
+		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_RXE);
 	if (!ret)
-		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TXE);
+		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TXE);
 
 	/* start by setting frame bit */
 	if (!ret && !slave)
-		ret = sh_msiof_modify_ctr_wait(p, 0, CTR_TFSE);
+		ret = sh_msiof_modify_ctr_wait(p, 0, SICTR_TFSE);
 
 	return ret;
 }
@@ -627,13 +624,13 @@
 
 	/* shut down frame, rx/tx and clock signals */
 	if (!slave)
-		ret = sh_msiof_modify_ctr_wait(p, CTR_TFSE, 0);
+		ret = sh_msiof_modify_ctr_wait(p, SICTR_TFSE, 0);
 	if (!ret)
-		ret = sh_msiof_modify_ctr_wait(p, CTR_TXE, 0);
+		ret = sh_msiof_modify_ctr_wait(p, SICTR_TXE, 0);
 	if (rx_buf && !ret)
-		ret = sh_msiof_modify_ctr_wait(p, CTR_RXE, 0);
+		ret = sh_msiof_modify_ctr_wait(p, SICTR_RXE, 0);
 	if (!ret && !slave)
-		ret = sh_msiof_modify_ctr_wait(p, CTR_TSCKE, 0);
+		ret = sh_msiof_modify_ctr_wait(p, SICTR_TSCKE, 0);
 
 	return ret;
 }
@@ -688,11 +685,11 @@
 	fifo_shift = 32 - bits;
 
 	/* default FIFO watermarks for PIO */
-	sh_msiof_write(p, FCTR, 0);
+	sh_msiof_write(p, SIFCTR, 0);
 
 	/* setup msiof transfer mode registers */
 	sh_msiof_spi_set_mode_regs(p, tx_buf, rx_buf, bits, words);
-	sh_msiof_write(p, IER, IER_TEOFE | IER_REOFE);
+	sh_msiof_write(p, SIIER, SIIER_TEOFE | SIIER_REOFE);
 
 	/* write tx fifo */
 	if (tx_buf)
@@ -731,7 +728,7 @@
 	sh_msiof_reset_str(p);
 	sh_msiof_spi_stop(p, rx_buf);
 stop_ier:
-	sh_msiof_write(p, IER, 0);
+	sh_msiof_write(p, SIIER, 0);
 	return ret;
 }
 
@@ -750,7 +747,7 @@
 
 	/* First prepare and submit the DMA request(s), as this may fail */
 	if (rx) {
-		ier_bits |= IER_RDREQE | IER_RDMAE;
+		ier_bits |= SIIER_RDREQE | SIIER_RDMAE;
 		desc_rx = dmaengine_prep_slave_single(p->ctlr->dma_rx,
 					p->rx_dma_addr, len, DMA_DEV_TO_MEM,
 					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -765,7 +762,7 @@
 	}
 
 	if (tx) {
-		ier_bits |= IER_TDREQE | IER_TDMAE;
+		ier_bits |= SIIER_TDREQE | SIIER_TDMAE;
 		dma_sync_single_for_device(p->ctlr->dma_tx->device->dev,
 					   p->tx_dma_addr, len, DMA_TO_DEVICE);
 		desc_tx = dmaengine_prep_slave_single(p->ctlr->dma_tx,
@@ -786,12 +783,12 @@
 	}
 
 	/* 1 stage FIFO watermarks for DMA */
-	sh_msiof_write(p, FCTR, FCTR_TFWM_1 | FCTR_RFWM_1);
+	sh_msiof_write(p, SIFCTR, SIFCTR_TFWM_1 | SIFCTR_RFWM_1);
 
 	/* setup msiof transfer mode registers (32-bit words) */
 	sh_msiof_spi_set_mode_regs(p, tx, rx, 32, len / 4);
 
-	sh_msiof_write(p, IER, ier_bits);
+	sh_msiof_write(p, SIIER, ier_bits);
 
 	reinit_completion(&p->done);
 	if (tx)
@@ -823,10 +820,10 @@
 		if (ret)
 			goto stop_reset;
 
-		sh_msiof_write(p, IER, 0);
+		sh_msiof_write(p, SIIER, 0);
 	} else {
 		/* wait for tx fifo to be emptied */
-		sh_msiof_write(p, IER, IER_TEOFE);
+		sh_msiof_write(p, SIIER, SIIER_TEOFE);
 		ret = sh_msiof_wait_for_completion(p, &p->done);
 		if (ret)
 			goto stop_reset;
@@ -856,7 +853,7 @@
 no_dma_tx:
 	if (rx)
 		dmaengine_terminate_all(p->ctlr->dma_rx);
-	sh_msiof_write(p, IER, 0);
+	sh_msiof_write(p, SIIER, 0);
 	return ret;
 }
 
@@ -1124,46 +1121,6 @@
 }
 #endif
 
-static int sh_msiof_get_cs_gpios(struct sh_msiof_spi_priv *p)
-{
-	struct device *dev = &p->pdev->dev;
-	unsigned int used_ss_mask = 0;
-	unsigned int cs_gpios = 0;
-	unsigned int num_cs, i;
-	int ret;
-
-	ret = gpiod_count(dev, "cs");
-	if (ret <= 0)
-		return 0;
-
-	num_cs = max_t(unsigned int, ret, p->ctlr->num_chipselect);
-	for (i = 0; i < num_cs; i++) {
-		struct gpio_desc *gpiod;
-
-		gpiod = devm_gpiod_get_index(dev, "cs", i, GPIOD_ASIS);
-		if (!IS_ERR(gpiod)) {
-			devm_gpiod_put(dev, gpiod);
-			cs_gpios++;
-			continue;
-		}
-
-		if (PTR_ERR(gpiod) != -ENOENT)
-			return PTR_ERR(gpiod);
-
-		if (i >= MAX_SS) {
-			dev_err(dev, "Invalid native chip select %d\n", i);
-			return -EINVAL;
-		}
-		used_ss_mask |= BIT(i);
-	}
-	p->unused_ss = ffz(used_ss_mask);
-	if (cs_gpios && p->unused_ss >= MAX_SS) {
-		dev_err(dev, "No unused native chip select available\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev,
 	enum dma_transfer_direction dir, unsigned int id, dma_addr_t port_addr)
 {
@@ -1232,12 +1189,12 @@
 
 	ctlr = p->ctlr;
 	ctlr->dma_tx = sh_msiof_request_dma_chan(dev, DMA_MEM_TO_DEV,
-						 dma_tx_id, res->start + TFDR);
+						 dma_tx_id, res->start + SITFDR);
 	if (!ctlr->dma_tx)
 		return -ENODEV;
 
 	ctlr->dma_rx = sh_msiof_request_dma_chan(dev, DMA_DEV_TO_MEM,
-						 dma_rx_id, res->start + RFDR);
+						 dma_rx_id, res->start + SIRFDR);
 	if (!ctlr->dma_rx)
 		goto free_tx_chan;
 
@@ -1373,17 +1330,12 @@
 	if (p->info->rx_fifo_override)
 		p->rx_fifo_size = p->info->rx_fifo_override;
 
-	/* Setup GPIO chip selects */
-	ctlr->num_chipselect = p->info->num_chipselect;
-	ret = sh_msiof_get_cs_gpios(p);
-	if (ret)
-		goto err1;
-
 	/* init controller code */
 	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 	ctlr->mode_bits |= SPI_LSB_FIRST | SPI_3WIRE;
 	ctlr->flags = chipdata->ctlr_flags;
 	ctlr->bus_num = pdev->id;
+	ctlr->num_chipselect = p->info->num_chipselect;
 	ctlr->dev.of_node = pdev->dev.of_node;
 	ctlr->setup = sh_msiof_spi_setup;
 	ctlr->prepare_message = sh_msiof_prepare_message;
@@ -1392,6 +1344,7 @@
 	ctlr->auto_runtime_pm = true;
 	ctlr->transfer_one = sh_msiof_transfer_one;
 	ctlr->use_gpio_descriptors = true;
+	ctlr->max_native_cs = MAX_SS;
 
 	ret = sh_msiof_request_dma(p);
 	if (ret < 0)
@@ -1445,7 +1398,7 @@
 
 static SIMPLE_DEV_PM_OPS(sh_msiof_spi_pm_ops, sh_msiof_spi_suspend,
 			 sh_msiof_spi_resume);
-#define DEV_PM_OPS	&sh_msiof_spi_pm_ops
+#define DEV_PM_OPS	(&sh_msiof_spi_pm_ops)
 #else
 #define DEV_PM_OPS	NULL
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/spi/spi-sirf.c b/drivers/spi/spi-sirf.c
index e1e6391..8419e67 100644
--- a/drivers/spi/spi-sirf.c
+++ b/drivers/spi/spi-sirf.c
@@ -1126,16 +1126,16 @@
 	sspi->bitbang.master->dev.of_node = pdev->dev.of_node;
 
 	/* request DMA channels */
-	sspi->rx_chan = dma_request_slave_channel(&pdev->dev, "rx");
-	if (!sspi->rx_chan) {
+	sspi->rx_chan = dma_request_chan(&pdev->dev, "rx");
+	if (IS_ERR(sspi->rx_chan)) {
 		dev_err(&pdev->dev, "can not allocate rx dma channel\n");
-		ret = -ENODEV;
+		ret = PTR_ERR(sspi->rx_chan);
 		goto free_master;
 	}
-	sspi->tx_chan = dma_request_slave_channel(&pdev->dev, "tx");
-	if (!sspi->tx_chan) {
+	sspi->tx_chan = dma_request_chan(&pdev->dev, "tx");
+	if (IS_ERR(sspi->tx_chan)) {
 		dev_err(&pdev->dev, "can not allocate tx dma channel\n");
-		ret = -ENODEV;
+		ret = PTR_ERR(sspi->tx_chan);
 		goto free_rx_dma;
 	}
 
diff --git a/drivers/spi/spi-slave-mt27xx.c b/drivers/spi/spi-slave-mt27xx.c
index 61bc43b..44edaa3 100644
--- a/drivers/spi/spi-slave-mt27xx.c
+++ b/drivers/spi/spi-slave-mt27xx.c
@@ -368,7 +368,6 @@
 {
 	struct spi_controller *ctlr;
 	struct mtk_spi_slave *mdata;
-	struct resource *res;
 	int irq, ret;
 
 	ctlr = spi_alloc_slave(&pdev->dev, sizeof(*mdata));
@@ -392,17 +391,8 @@
 	platform_set_drvdata(pdev, ctlr);
 
 	init_completion(&mdata->xfer_done);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		ret = -ENODEV;
-		dev_err(&pdev->dev, "failed to determine base address\n");
-		goto err_put_ctlr;
-	}
-
 	mdata->dev = &pdev->dev;
-
-	mdata->base = devm_ioremap_resource(&pdev->dev, res);
+	mdata->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mdata->base)) {
 		ret = PTR_ERR(mdata->base);
 		goto err_put_ctlr;
diff --git a/drivers/spi/spi-sprd-adi.c b/drivers/spi/spi-sprd-adi.c
index e804a38..307c079 100644
--- a/drivers/spi/spi-sprd-adi.c
+++ b/drivers/spi/spi-sprd-adi.c
@@ -77,6 +77,7 @@
 
 /* Bits definitions for register REG_WDG_CTRL */
 #define BIT_WDG_RUN			BIT(1)
+#define BIT_WDG_NEW			BIT(2)
 #define BIT_WDG_RST			BIT(3)
 
 /* Registers definitions for PMIC */
@@ -318,7 +319,7 @@
 
 static void sprd_adi_set_wdt_rst_mode(struct sprd_adi *sadi)
 {
-#ifdef CONFIG_SPRD_WATCHDOG
+#if IS_ENABLED(CONFIG_SPRD_WATCHDOG)
 	u32 val;
 
 	/* Set default watchdog reboot mode */
@@ -383,6 +384,10 @@
 	/* Unlock the watchdog */
 	sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOCK, WDG_UNLOCK_KEY);
 
+	sprd_adi_read(sadi, sadi->slave_pbase + REG_WDG_CTRL, &val);
+	val |= BIT_WDG_NEW;
+	sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_CTRL, val);
+
 	/* Load the watchdog timeout value, 50ms is always enough. */
 	sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_HIGH, 0);
 	sprd_adi_write(sadi, sadi->slave_pbase + REG_WDG_LOAD_LOW,
@@ -499,10 +504,7 @@
 			dev_info(&pdev->dev, "no hardware spinlock supplied\n");
 			break;
 		default:
-			dev_err(&pdev->dev,
-				"failed to find hwlock id, %d\n", ret);
-			/* fall-through */
-		case -EPROBE_DEFER:
+			dev_err_probe(&pdev->dev, ret, "failed to find hwlock id\n");
 			goto put_ctlr;
 		}
 	}
diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index 44dc7b5..28e70db 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -553,22 +553,15 @@
 static int sprd_spi_dma_request(struct sprd_spi *ss)
 {
 	ss->dma.dma_chan[SPRD_SPI_RX] = dma_request_chan(ss->dev, "rx_chn");
-	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_RX])) {
-		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]) == -EPROBE_DEFER)
-			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
-
-		dev_err(ss->dev, "request RX DMA channel failed!\n");
-		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]);
-	}
+	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_RX]))
+		return dev_err_probe(ss->dev, PTR_ERR(ss->dma.dma_chan[SPRD_SPI_RX]),
+				     "request RX DMA channel failed!\n");
 
 	ss->dma.dma_chan[SPRD_SPI_TX]  = dma_request_chan(ss->dev, "tx_chn");
 	if (IS_ERR_OR_NULL(ss->dma.dma_chan[SPRD_SPI_TX])) {
 		dma_release_channel(ss->dma.dma_chan[SPRD_SPI_RX]);
-		if (PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]) == -EPROBE_DEFER)
-			return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
-
-		dev_err(ss->dev, "request TX DMA channel failed!\n");
-		return PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]);
+		return dev_err_probe(ss->dev, PTR_ERR(ss->dma.dma_chan[SPRD_SPI_TX]),
+				     "request TX DMA channel failed!\n");
 	}
 
 	return 0;
@@ -669,11 +662,15 @@
 	writel_relaxed(clk_div, ss->base + SPRD_SPI_CLKD);
 }
 
-static void sprd_spi_init_hw(struct sprd_spi *ss, struct spi_transfer *t)
+static int sprd_spi_init_hw(struct sprd_spi *ss, struct spi_transfer *t)
 {
+	struct spi_delay *d = &t->word_delay;
 	u16 word_delay, interval;
 	u32 val;
 
+	if (d->unit != SPI_DELAY_UNIT_SCK)
+		return -EINVAL;
+
 	val = readl_relaxed(ss->base + SPRD_SPI_CTL0);
 	val &= ~(SPRD_SPI_SCK_REV | SPRD_SPI_NG_TX | SPRD_SPI_NG_RX);
 	/* Set default chip selection, clock phase and clock polarity */
@@ -686,7 +683,7 @@
 	 * formula as below per datasheet:
 	 * interval time (source clock cycles) = interval * 4 + 10.
 	 */
-	word_delay = clamp_t(u16, t->word_delay, SPRD_SPI_MIN_DELAY_CYCLE,
+	word_delay = clamp_t(u16, d->value, SPRD_SPI_MIN_DELAY_CYCLE,
 			     SPRD_SPI_MAX_DELAY_CYCLE);
 	interval = DIV_ROUND_UP(word_delay - 10, 4);
 	ss->word_delay = interval * 4 + 10;
@@ -711,6 +708,8 @@
 		val &= ~SPRD_SPI_DATA_LINE2_EN;
 
 	writel_relaxed(val, ss->base + SPRD_SPI_CTL7);
+
+	return 0;
 }
 
 static int sprd_spi_setup_transfer(struct spi_device *sdev,
@@ -719,13 +718,16 @@
 	struct sprd_spi *ss = spi_controller_get_devdata(sdev->controller);
 	u8 bits_per_word = t->bits_per_word;
 	u32 val, mode = 0;
+	int ret;
 
 	ss->len = t->len;
 	ss->tx_buf = t->tx_buf;
 	ss->rx_buf = t->rx_buf;
 
 	ss->hw_mode = sdev->mode;
-	sprd_spi_init_hw(ss, t);
+	ret = sprd_spi_init_hw(ss, t);
+	if (ret)
+		return ret;
 
 	/* Set tansfer speed and valid bits */
 	sprd_spi_set_speed(ss, t->speed_hz);
diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c
index ea77d91..4f24f63 100644
--- a/drivers/spi/spi-stm32-qspi.c
+++ b/drivers/spi/spi-stm32-qspi.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/sizes.h>
@@ -87,6 +88,7 @@
 #define STM32_BUSY_TIMEOUT_US 100000
 #define STM32_ABT_TIMEOUT_US 100000
 #define STM32_COMP_TIMEOUT_MS 1000
+#define STM32_AUTOSUSPEND_DELAY -1
 
 struct stm32_qspi_flash {
 	struct stm32_qspi *qspi;
@@ -434,10 +436,19 @@
 	struct stm32_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
 	int ret;
 
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(qspi->dev);
+		return ret;
+	}
+
 	mutex_lock(&qspi->lock);
 	ret = stm32_qspi_send(mem, op);
 	mutex_unlock(&qspi->lock);
 
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
 	return ret;
 }
 
@@ -447,6 +458,7 @@
 	struct stm32_qspi *qspi = spi_controller_get_devdata(ctrl);
 	struct stm32_qspi_flash *flash;
 	u32 presc;
+	int ret;
 
 	if (ctrl->busy)
 		return -EBUSY;
@@ -454,6 +466,12 @@
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
+	ret = pm_runtime_get_sync(qspi->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(qspi->dev);
+		return ret;
+	}
+
 	presc = DIV_ROUND_UP(qspi->clk_rate, spi->max_speed_hz) - 1;
 
 	flash = &qspi->flash[spi->chip_select];
@@ -470,13 +488,17 @@
 	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
 	mutex_unlock(&qspi->lock);
 
+	pm_runtime_mark_last_busy(qspi->dev);
+	pm_runtime_put_autosuspend(qspi->dev);
+
 	return 0;
 }
 
-static void stm32_qspi_dma_setup(struct stm32_qspi *qspi)
+static int stm32_qspi_dma_setup(struct stm32_qspi *qspi)
 {
 	struct dma_slave_config dma_cfg;
 	struct device *dev = qspi->dev;
+	int ret = 0;
 
 	memset(&dma_cfg, 0, sizeof(dma_cfg));
 
@@ -487,8 +509,13 @@
 	dma_cfg.src_maxburst = 4;
 	dma_cfg.dst_maxburst = 4;
 
-	qspi->dma_chrx = dma_request_slave_channel(dev, "rx");
-	if (qspi->dma_chrx) {
+	qspi->dma_chrx = dma_request_chan(dev, "rx");
+	if (IS_ERR(qspi->dma_chrx)) {
+		ret = PTR_ERR(qspi->dma_chrx);
+		qspi->dma_chrx = NULL;
+		if (ret == -EPROBE_DEFER)
+			goto out;
+	} else {
 		if (dmaengine_slave_config(qspi->dma_chrx, &dma_cfg)) {
 			dev_err(dev, "dma rx config failed\n");
 			dma_release_channel(qspi->dma_chrx);
@@ -496,8 +523,11 @@
 		}
 	}
 
-	qspi->dma_chtx = dma_request_slave_channel(dev, "tx");
-	if (qspi->dma_chtx) {
+	qspi->dma_chtx = dma_request_chan(dev, "tx");
+	if (IS_ERR(qspi->dma_chtx)) {
+		ret = PTR_ERR(qspi->dma_chtx);
+		qspi->dma_chtx = NULL;
+	} else {
 		if (dmaengine_slave_config(qspi->dma_chtx, &dma_cfg)) {
 			dev_err(dev, "dma tx config failed\n");
 			dma_release_channel(qspi->dma_chtx);
@@ -505,7 +535,13 @@
 		}
 	}
 
+out:
 	init_completion(&qspi->dma_completion);
+
+	if (ret != -EPROBE_DEFER)
+		ret = 0;
+
+	return ret;
 }
 
 static void stm32_qspi_dma_free(struct stm32_qspi *qspi)
@@ -524,15 +560,6 @@
 	.exec_op = stm32_qspi_exec_op,
 };
 
-static void stm32_qspi_release(struct stm32_qspi *qspi)
-{
-	/* disable qspi */
-	writel_relaxed(0, qspi->io_base + QSPI_CR);
-	stm32_qspi_dma_free(qspi);
-	mutex_destroy(&qspi->lock);
-	clk_disable_unprepare(qspi->clk);
-}
-
 static int stm32_qspi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -553,7 +580,7 @@
 	qspi->io_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(qspi->io_base)) {
 		ret = PTR_ERR(qspi->io_base);
-		goto err;
+		goto err_master_put;
 	}
 
 	qspi->phys_base = res->start;
@@ -562,24 +589,26 @@
 	qspi->mm_base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(qspi->mm_base)) {
 		ret = PTR_ERR(qspi->mm_base);
-		goto err;
+		goto err_master_put;
 	}
 
 	qspi->mm_size = resource_size(res);
 	if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) {
 		ret = -EINVAL;
-		goto err;
+		goto err_master_put;
 	}
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0)
-		return irq;
+	if (irq < 0) {
+		ret = irq;
+		goto err_master_put;
+	}
 
 	ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0,
 			       dev_name(dev), qspi);
 	if (ret) {
 		dev_err(dev, "failed to request irq\n");
-		goto err;
+		goto err_master_put;
 	}
 
 	init_completion(&qspi->data_completion);
@@ -587,23 +616,27 @@
 	qspi->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(qspi->clk)) {
 		ret = PTR_ERR(qspi->clk);
-		goto err;
+		goto err_master_put;
 	}
 
 	qspi->clk_rate = clk_get_rate(qspi->clk);
 	if (!qspi->clk_rate) {
 		ret = -EINVAL;
-		goto err;
+		goto err_master_put;
 	}
 
 	ret = clk_prepare_enable(qspi->clk);
 	if (ret) {
 		dev_err(dev, "can not enable the clock\n");
-		goto err;
+		goto err_master_put;
 	}
 
 	rstc = devm_reset_control_get_exclusive(dev, NULL);
-	if (!IS_ERR(rstc)) {
+	if (IS_ERR(rstc)) {
+		ret = PTR_ERR(rstc);
+		if (ret == -EPROBE_DEFER)
+			goto err_clk_disable;
+	} else {
 		reset_control_assert(rstc);
 		udelay(2);
 		reset_control_deassert(rstc);
@@ -611,7 +644,10 @@
 
 	qspi->dev = dev;
 	platform_set_drvdata(pdev, qspi);
-	stm32_qspi_dma_setup(qspi);
+	ret = stm32_qspi_dma_setup(qspi);
+	if (ret)
+		goto err_dma_free;
+
 	mutex_init(&qspi->lock);
 
 	ctrl->mode_bits = SPI_RX_DUAL | SPI_RX_QUAD
@@ -622,12 +658,35 @@
 	ctrl->num_chipselect = STM32_QSPI_MAX_NORCHIP;
 	ctrl->dev.of_node = dev->of_node;
 
-	ret = devm_spi_register_master(dev, ctrl);
-	if (!ret)
-		return 0;
+	pm_runtime_set_autosuspend_delay(dev, STM32_AUTOSUSPEND_DELAY);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
+	pm_runtime_get_noresume(dev);
 
-err:
-	stm32_qspi_release(qspi);
+	ret = devm_spi_register_master(dev, ctrl);
+	if (ret)
+		goto err_pm_runtime_free;
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
+	return 0;
+
+err_pm_runtime_free:
+	pm_runtime_get_sync(qspi->dev);
+	/* disable qspi */
+	writel_relaxed(0, qspi->io_base + QSPI_CR);
+	mutex_destroy(&qspi->lock);
+	pm_runtime_put_noidle(qspi->dev);
+	pm_runtime_disable(qspi->dev);
+	pm_runtime_set_suspended(qspi->dev);
+	pm_runtime_dont_use_autosuspend(qspi->dev);
+err_dma_free:
+	stm32_qspi_dma_free(qspi);
+err_clk_disable:
+	clk_disable_unprepare(qspi->clk);
+err_master_put:
 	spi_master_put(qspi->ctrl);
 
 	return ret;
@@ -637,34 +696,74 @@
 {
 	struct stm32_qspi *qspi = platform_get_drvdata(pdev);
 
-	stm32_qspi_release(qspi);
+	pm_runtime_get_sync(qspi->dev);
+	/* disable qspi */
+	writel_relaxed(0, qspi->io_base + QSPI_CR);
+	stm32_qspi_dma_free(qspi);
+	mutex_destroy(&qspi->lock);
+	pm_runtime_put_noidle(qspi->dev);
+	pm_runtime_disable(qspi->dev);
+	pm_runtime_set_suspended(qspi->dev);
+	pm_runtime_dont_use_autosuspend(qspi->dev);
+	clk_disable_unprepare(qspi->clk);
+
 	return 0;
 }
 
-static int __maybe_unused stm32_qspi_suspend(struct device *dev)
+static int __maybe_unused stm32_qspi_runtime_suspend(struct device *dev)
 {
 	struct stm32_qspi *qspi = dev_get_drvdata(dev);
 
 	clk_disable_unprepare(qspi->clk);
-	pinctrl_pm_select_sleep_state(dev);
 
 	return 0;
 }
 
+static int __maybe_unused stm32_qspi_runtime_resume(struct device *dev)
+{
+	struct stm32_qspi *qspi = dev_get_drvdata(dev);
+
+	return clk_prepare_enable(qspi->clk);
+}
+
+static int __maybe_unused stm32_qspi_suspend(struct device *dev)
+{
+	pinctrl_pm_select_sleep_state(dev);
+
+	return pm_runtime_force_suspend(dev);
+}
+
 static int __maybe_unused stm32_qspi_resume(struct device *dev)
 {
 	struct stm32_qspi *qspi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret < 0)
+		return ret;
 
 	pinctrl_pm_select_default_state(dev);
-	clk_prepare_enable(qspi->clk);
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(dev);
+		return ret;
+	}
 
 	writel_relaxed(qspi->cr_reg, qspi->io_base + QSPI_CR);
 	writel_relaxed(qspi->dcr_reg, qspi->io_base + QSPI_DCR);
 
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
+
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(stm32_qspi_pm_ops, stm32_qspi_suspend, stm32_qspi_resume);
+static const struct dev_pm_ops stm32_qspi_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_qspi_runtime_suspend,
+			   stm32_qspi_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(stm32_qspi_suspend, stm32_qspi_resume)
+};
 
 static const struct of_device_id stm32_qspi_match[] = {
 	{.compatible = "st,stm32f469-qspi"},
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 9ae1609..a6dfc8f 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -9,7 +9,6 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
@@ -177,7 +176,7 @@
 #define SPI_DMA_MIN_BYTES	16
 
 /**
- * stm32_spi_reg - stm32 SPI register & bitfield desc
+ * struct stm32_spi_reg - stm32 SPI register & bitfield desc
  * @reg:		register offset
  * @mask:		bitfield mask
  * @shift:		left shift
@@ -189,16 +188,16 @@
 };
 
 /**
- * stm32_spi_regspec - stm32 registers definition, compatible dependent data
- * en: enable register and SPI enable bit
- * dma_rx_en: SPI DMA RX enable register end SPI DMA RX enable bit
- * dma_tx_en: SPI DMA TX enable register end SPI DMA TX enable bit
- * cpol: clock polarity register and polarity bit
- * cpha: clock phase register and phase bit
- * lsb_first: LSB transmitted first register and bit
- * br: baud rate register and bitfields
- * rx: SPI RX data register
- * tx: SPI TX data register
+ * struct stm32_spi_regspec - stm32 registers definition, compatible dependent data
+ * @en: enable register and SPI enable bit
+ * @dma_rx_en: SPI DMA RX enable register end SPI DMA RX enable bit
+ * @dma_tx_en: SPI DMA TX enable register end SPI DMA TX enable bit
+ * @cpol: clock polarity register and polarity bit
+ * @cpha: clock phase register and phase bit
+ * @lsb_first: LSB transmitted first register and bit
+ * @br: baud rate register and bitfields
+ * @rx: SPI RX data register
+ * @tx: SPI TX data register
  */
 struct stm32_spi_regspec {
 	const struct stm32_spi_reg en;
@@ -215,7 +214,7 @@
 struct stm32_spi;
 
 /**
- * stm32_spi_cfg - stm32 compatible configuration data
+ * struct stm32_spi_cfg - stm32 compatible configuration data
  * @regs: registers descriptions
  * @get_fifo_size: routine to get fifo size
  * @get_bpw_mask: routine to get bits per word mask
@@ -225,13 +224,13 @@
  * @set_mode: routine to configure registers to desired mode
  * @set_data_idleness: optional routine to configure registers to desired idle
  * time between frames (if driver has this functionality)
- * set_number_of_data: optional routine to configure registers to desired
+ * @set_number_of_data: optional routine to configure registers to desired
  * number of data (if driver has this functionality)
  * @can_dma: routine to determine if the transfer is eligible for DMA use
  * @transfer_one_dma_start: routine to start transfer a single spi_transfer
  * using DMA
- * @dma_rx cb: routine to call after DMA RX channel operation is complete
- * @dma_tx cb: routine to call after DMA TX channel operation is complete
+ * @dma_rx_cb: routine to call after DMA RX channel operation is complete
+ * @dma_tx_cb: routine to call after DMA TX channel operation is complete
  * @transfer_one_irq: routine to configure interrupts for driver
  * @irq_handler_event: Interrupt handler for SPI controller events
  * @irq_handler_thread: thread of interrupt handler for SPI controller
@@ -600,6 +599,7 @@
 /**
  * stm32h7_spi_read_rxfifo - Read bytes in Receive Data Register
  * @spi: pointer to the spi controller data structure
+ * @flush: boolean indicating that FIFO should be flushed
  *
  * Write in rx_buf depends on remaining bytes to avoid to write beyond
  * rx_buf end.
@@ -769,6 +769,9 @@
 
 /**
  * stm32_spi_can_dma - Determine if the transfer is eligible for DMA use
+ * @master: controller master interface
+ * @spi_dev: pointer to the spi device
+ * @transfer: pointer to spi transfer
  *
  * If driver has fifo and the current transfer size is greater than fifo size,
  * use DMA. Otherwise use DMA for transfer longer than defined DMA min bytes.
@@ -801,10 +804,9 @@
 	struct spi_master *master = dev_id;
 	struct stm32_spi *spi = spi_master_get_devdata(master);
 	u32 sr, mask = 0;
-	unsigned long flags;
 	bool end = false;
 
-	spin_lock_irqsave(&spi->lock, flags);
+	spin_lock(&spi->lock);
 
 	sr = readl_relaxed(spi->base + STM32F4_SPI_SR);
 	/*
@@ -820,7 +822,9 @@
 		mask |= STM32F4_SPI_SR_TXE;
 	}
 
-	if (!spi->cur_usedma && spi->cur_comm == SPI_FULL_DUPLEX) {
+	if (!spi->cur_usedma && (spi->cur_comm == SPI_FULL_DUPLEX ||
+				spi->cur_comm == SPI_SIMPLEX_RX ||
+				spi->cur_comm == SPI_3WIRE_RX)) {
 		/* TXE flag is set and is handled when RXNE flag occurs */
 		sr &= ~STM32F4_SPI_SR_TXE;
 		mask |= STM32F4_SPI_SR_RXNE | STM32F4_SPI_SR_OVR;
@@ -828,7 +832,7 @@
 
 	if (!(sr & mask)) {
 		dev_dbg(spi->dev, "spurious IT (sr=0x%08x)\n", sr);
-		spin_unlock_irqrestore(&spi->lock, flags);
+		spin_unlock(&spi->lock);
 		return IRQ_NONE;
 	}
 
@@ -859,7 +863,7 @@
 		stm32f4_spi_read_rx(spi);
 		if (spi->rx_len == 0)
 			end = true;
-		else /* Load data for discontinuous mode */
+		else if (spi->tx_buf)/* Load data for discontinuous mode */
 			stm32f4_spi_write_tx(spi);
 	}
 
@@ -870,11 +874,11 @@
 					STM32F4_SPI_CR2_TXEIE |
 					STM32F4_SPI_CR2_RXNEIE |
 					STM32F4_SPI_CR2_ERRIE);
-		spin_unlock_irqrestore(&spi->lock, flags);
+		spin_unlock(&spi->lock);
 		return IRQ_WAKE_THREAD;
 	}
 
-	spin_unlock_irqrestore(&spi->lock, flags);
+	spin_unlock(&spi->lock);
 	return IRQ_HANDLED;
 }
 
@@ -986,30 +990,9 @@
 }
 
 /**
- * stm32_spi_setup - setup device chip select
- */
-static int stm32_spi_setup(struct spi_device *spi_dev)
-{
-	int ret = 0;
-
-	if (!gpio_is_valid(spi_dev->cs_gpio)) {
-		dev_err(&spi_dev->dev, "%d is not a valid gpio\n",
-			spi_dev->cs_gpio);
-		return -EINVAL;
-	}
-
-	dev_dbg(&spi_dev->dev, "%s: set gpio%d output %s\n", __func__,
-		spi_dev->cs_gpio,
-		(spi_dev->mode & SPI_CS_HIGH) ? "low" : "high");
-
-	ret = gpio_direction_output(spi_dev->cs_gpio,
-				    !(spi_dev->mode & SPI_CS_HIGH));
-
-	return ret;
-}
-
-/**
  * stm32_spi_prepare_msg - set up the controller to transfer a single message
+ * @master: controller master interface
+ * @msg: pointer to spi message
  */
 static int stm32_spi_prepare_msg(struct spi_master *master,
 				 struct spi_message *msg)
@@ -1062,6 +1045,7 @@
 
 /**
  * stm32f4_spi_dma_tx_cb - dma callback
+ * @data: pointer to the spi controller data structure
  *
  * DMA callback is called when the transfer is complete for DMA TX channel.
  */
@@ -1077,6 +1061,7 @@
 
 /**
  * stm32f4_spi_dma_rx_cb - dma callback
+ * @data: pointer to the spi controller data structure
  *
  * DMA callback is called when the transfer is complete for DMA RX channel.
  */
@@ -1090,6 +1075,7 @@
 
 /**
  * stm32h7_spi_dma_cb - dma callback
+ * @data: pointer to the spi controller data structure
  *
  * DMA callback is called when the transfer is complete or when an error
  * occurs. If the transfer is complete, EOT flag is raised.
@@ -1115,6 +1101,9 @@
 /**
  * stm32_spi_dma_config - configure dma slave channel depending on current
  *			  transfer bits_per_word.
+ * @spi: pointer to the spi controller data structure
+ * @dma_conf: pointer to the dma_slave_config structure
+ * @dir: direction of the dma transfer
  */
 static void stm32_spi_dma_config(struct stm32_spi *spi,
 				 struct dma_slave_config *dma_conf,
@@ -1162,6 +1151,7 @@
 /**
  * stm32f4_spi_transfer_one_irq - transfer a single spi_transfer using
  *				  interrupts
+ * @spi: pointer to the spi controller data structure
  *
  * It must returns 0 if the transfer is finished or 1 if the transfer is still
  * in progress.
@@ -1174,7 +1164,9 @@
 	/* Enable the interrupts relative to the current communication mode */
 	if (spi->cur_comm == SPI_SIMPLEX_TX || spi->cur_comm == SPI_3WIRE_TX) {
 		cr2 |= STM32F4_SPI_CR2_TXEIE;
-	} else if (spi->cur_comm == SPI_FULL_DUPLEX) {
+	} else if (spi->cur_comm == SPI_FULL_DUPLEX ||
+				spi->cur_comm == SPI_SIMPLEX_RX ||
+				spi->cur_comm == SPI_3WIRE_RX) {
 		/* In transmit-only mode, the OVR flag is set in the SR register
 		 * since the received data are never read. Therefore set OVR
 		 * interrupt only when rx buffer is available.
@@ -1202,6 +1194,7 @@
 /**
  * stm32h7_spi_transfer_one_irq - transfer a single spi_transfer using
  *				  interrupts
+ * @spi: pointer to the spi controller data structure
  *
  * It must returns 0 if the transfer is finished or 1 if the transfer is still
  * in progress.
@@ -1243,6 +1236,7 @@
 /**
  * stm32f4_spi_transfer_one_dma_start - Set SPI driver registers to start
  *					transfer using DMA
+ * @spi: pointer to the spi controller data structure
  */
 static void stm32f4_spi_transfer_one_dma_start(struct stm32_spi *spi)
 {
@@ -1263,6 +1257,7 @@
 /**
  * stm32h7_spi_transfer_one_dma_start - Set SPI driver registers to start
  *					transfer using DMA
+ * @spi: pointer to the spi controller data structure
  */
 static void stm32h7_spi_transfer_one_dma_start(struct stm32_spi *spi)
 {
@@ -1279,6 +1274,8 @@
 
 /**
  * stm32_spi_transfer_one_dma - transfer a single spi_transfer using DMA
+ * @spi: pointer to the spi controller data structure
+ * @xfer: pointer to the spi_transfer structure
  *
  * It must returns 0 if the transfer is finished or 1 if the transfer is still
  * in progress.
@@ -1441,7 +1438,7 @@
 /**
  * stm32_spi_communication_type - return transfer communication type
  * @spi_dev: pointer to the spi device
- * transfer: pointer to spi transfer
+ * @transfer: pointer to spi transfer
  */
 static unsigned int stm32_spi_communication_type(struct spi_device *spi_dev,
 						 struct spi_transfer *transfer)
@@ -1480,10 +1477,16 @@
 		stm32_spi_set_bits(spi, STM32F4_SPI_CR1,
 					STM32F4_SPI_CR1_BIDIMODE |
 					STM32F4_SPI_CR1_BIDIOE);
-	} else if (comm_type == SPI_FULL_DUPLEX) {
+	} else if (comm_type == SPI_FULL_DUPLEX ||
+				comm_type == SPI_SIMPLEX_RX) {
 		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1,
 					STM32F4_SPI_CR1_BIDIMODE |
 					STM32F4_SPI_CR1_BIDIOE);
+	} else if (comm_type == SPI_3WIRE_RX) {
+		stm32_spi_set_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIMODE);
+		stm32_spi_clr_bits(spi, STM32F4_SPI_CR1,
+					STM32F4_SPI_CR1_BIDIOE);
 	} else {
 		return -EINVAL;
 	}
@@ -1558,7 +1561,7 @@
 /**
  * stm32h7_spi_number_of_data - configure number of data at current transfer
  * @spi: pointer to the spi controller data structure
- * @len: transfer length
+ * @nb_words: transfer length (in words)
  */
 static int stm32h7_spi_number_of_data(struct stm32_spi *spi, u32 nb_words)
 {
@@ -1582,6 +1585,9 @@
  * stm32_spi_transfer_one_setup - common setup to transfer a single
  *				  spi_transfer either using DMA or
  *				  interrupts.
+ * @spi: pointer to the spi controller data structure
+ * @spi_dev: pointer to the spi device
+ * @transfer: pointer to spi transfer
  */
 static int stm32_spi_transfer_one_setup(struct stm32_spi *spi,
 					struct spi_device *spi_dev,
@@ -1653,6 +1659,9 @@
 
 /**
  * stm32_spi_transfer_one - transfer a single spi_transfer
+ * @master: controller master interface
+ * @spi_dev: pointer to the spi device
+ * @transfer: pointer to spi transfer
  *
  * It must return 0 if the transfer is finished or 1 if the transfer is still
  * in progress.
@@ -1690,6 +1699,8 @@
 
 /**
  * stm32_spi_unprepare_msg - relax the hardware
+ * @master: controller master interface
+ * @msg: pointer to the spi message
  */
 static int stm32_spi_unprepare_msg(struct spi_master *master,
 				   struct spi_message *msg)
@@ -1703,6 +1714,7 @@
 
 /**
  * stm32f4_spi_config - Configure SPI controller as SPI master
+ * @spi: pointer to the spi controller data structure
  */
 static int stm32f4_spi_config(struct stm32_spi *spi)
 {
@@ -1733,6 +1745,7 @@
 
 /**
  * stm32h7_spi_config - Configure SPI controller as SPI master
+ * @spi: pointer to the spi controller data structure
  */
 static int stm32h7_spi_config(struct stm32_spi *spi)
 {
@@ -1818,9 +1831,9 @@
 	struct spi_master *master;
 	struct stm32_spi *spi;
 	struct resource *res;
-	int i, ret;
+	int ret;
 
-	master = spi_alloc_master(&pdev->dev, sizeof(struct stm32_spi));
+	master = devm_spi_alloc_master(&pdev->dev, sizeof(struct stm32_spi));
 	if (!master) {
 		dev_err(&pdev->dev, "spi master allocation failed\n");
 		return -ENOMEM;
@@ -1838,20 +1851,16 @@
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	spi->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(spi->base)) {
-		ret = PTR_ERR(spi->base);
-		goto err_master_put;
-	}
+	if (IS_ERR(spi->base))
+		return PTR_ERR(spi->base);
 
 	spi->phys_addr = (dma_addr_t)res->start;
 
 	spi->irq = platform_get_irq(pdev, 0);
-	if (spi->irq <= 0) {
-		ret = spi->irq;
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
-		goto err_master_put;
-	}
+	if (spi->irq <= 0)
+		return dev_err_probe(&pdev->dev, spi->irq,
+				     "failed to get irq\n");
+
 	ret = devm_request_threaded_irq(&pdev->dev, spi->irq,
 					spi->cfg->irq_handler_event,
 					spi->cfg->irq_handler_thread,
@@ -1859,20 +1868,20 @@
 	if (ret) {
 		dev_err(&pdev->dev, "irq%d request failed: %d\n", spi->irq,
 			ret);
-		goto err_master_put;
+		return ret;
 	}
 
 	spi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(spi->clk)) {
 		ret = PTR_ERR(spi->clk);
 		dev_err(&pdev->dev, "clk get failed: %d\n", ret);
-		goto err_master_put;
+		return ret;
 	}
 
 	ret = clk_prepare_enable(spi->clk);
 	if (ret) {
 		dev_err(&pdev->dev, "clk enable failed: %d\n", ret);
-		goto err_master_put;
+		return ret;
 	}
 	spi->clk_rate = clk_get_rate(spi->clk);
 	if (!spi->clk_rate) {
@@ -1906,10 +1915,11 @@
 	master->bits_per_word_mask = spi->cfg->get_bpw_mask(spi);
 	master->max_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_min;
 	master->min_speed_hz = spi->clk_rate / spi->cfg->baud_rate_div_max;
-	master->setup = stm32_spi_setup;
+	master->use_gpio_descriptors = true;
 	master->prepare_message = stm32_spi_prepare_msg;
 	master->transfer_one = stm32_spi_transfer_one;
 	master->unprepare_message = stm32_spi_unprepare_msg;
+	master->flags = SPI_MASTER_MUST_TX;
 
 	spi->dma_tx = dma_request_chan(spi->dev, "tx");
 	if (IS_ERR(spi->dma_tx)) {
@@ -1949,29 +1959,12 @@
 		goto err_pm_disable;
 	}
 
-	if (!master->cs_gpios) {
+	if (!master->cs_gpiods) {
 		dev_err(&pdev->dev, "no CS gpios available\n");
 		ret = -EINVAL;
 		goto err_pm_disable;
 	}
 
-	for (i = 0; i < master->num_chipselect; i++) {
-		if (!gpio_is_valid(master->cs_gpios[i])) {
-			dev_err(&pdev->dev, "%i is not a valid gpio\n",
-				master->cs_gpios[i]);
-			ret = -EINVAL;
-			goto err_dma_release;
-		}
-
-		ret = devm_gpio_request(&pdev->dev, master->cs_gpios[i],
-					DRIVER_NAME);
-		if (ret) {
-			dev_err(&pdev->dev, "can't get CS gpio %i\n",
-				master->cs_gpios[i]);
-			goto err_dma_release;
-		}
-	}
-
 	dev_info(&pdev->dev, "driver initialized\n");
 
 	return 0;
@@ -1987,8 +1980,6 @@
 		dma_release_channel(spi->dma_rx);
 err_clk_disable:
 	clk_disable_unprepare(spi->clk);
-err_master_put:
-	spi_master_put(master);
 
 	return ret;
 }
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index cbfac65..1fdfc6e 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -198,7 +198,7 @@
 
 static size_t sun4i_spi_max_transfer_size(struct spi_device *spi)
 {
-	return SUN4I_FIFO_DEPTH - 1;
+	return SUN4I_MAX_XFER_SIZE - 1;
 }
 
 static int sun4i_spi_transfer_one(struct spi_master *master,
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 3a8acd7..803d92f 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -7,6 +7,7 @@
  * Maxime Ripard <maxime.ripard@free-electrons.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/device.h>
@@ -58,10 +59,8 @@
 #define SUN6I_FIFO_CTL_TF_RST			BIT(31)
 
 #define SUN6I_FIFO_STA_REG		0x1c
-#define SUN6I_FIFO_STA_RF_CNT_MASK		0x7f
-#define SUN6I_FIFO_STA_RF_CNT_BITS		0
-#define SUN6I_FIFO_STA_TF_CNT_MASK		0x7f
-#define SUN6I_FIFO_STA_TF_CNT_BITS		16
+#define SUN6I_FIFO_STA_RF_CNT_MASK		GENMASK(7, 0)
+#define SUN6I_FIFO_STA_TF_CNT_MASK		GENMASK(23, 16)
 
 #define SUN6I_CLK_CTL_REG		0x24
 #define SUN6I_CLK_CTL_CDR2_MASK			0xff
@@ -73,13 +72,10 @@
 #define SUN6I_MAX_XFER_SIZE		0xffffff
 
 #define SUN6I_BURST_CNT_REG		0x30
-#define SUN6I_BURST_CNT(cnt)			((cnt) & SUN6I_MAX_XFER_SIZE)
 
 #define SUN6I_XMIT_CNT_REG		0x34
-#define SUN6I_XMIT_CNT(cnt)			((cnt) & SUN6I_MAX_XFER_SIZE)
 
 #define SUN6I_BURST_CTL_CNT_REG		0x38
-#define SUN6I_BURST_CTL_CNT_STC(cnt)		((cnt) & SUN6I_MAX_XFER_SIZE)
 
 #define SUN6I_TXDATA_REG		0x200
 #define SUN6I_RXDATA_REG		0x300
@@ -109,21 +105,18 @@
 	writel(value, sspi->base_addr + reg);
 }
 
+static inline u32 sun6i_spi_get_rx_fifo_count(struct sun6i_spi *sspi)
+{
+	u32 reg = sun6i_spi_read(sspi, SUN6I_FIFO_STA_REG);
+
+	return FIELD_GET(SUN6I_FIFO_STA_RF_CNT_MASK, reg);
+}
+
 static inline u32 sun6i_spi_get_tx_fifo_count(struct sun6i_spi *sspi)
 {
 	u32 reg = sun6i_spi_read(sspi, SUN6I_FIFO_STA_REG);
 
-	reg >>= SUN6I_FIFO_STA_TF_CNT_BITS;
-
-	return reg & SUN6I_FIFO_STA_TF_CNT_MASK;
-}
-
-static inline void sun6i_spi_enable_interrupt(struct sun6i_spi *sspi, u32 mask)
-{
-	u32 reg = sun6i_spi_read(sspi, SUN6I_INT_CTL_REG);
-
-	reg |= mask;
-	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, reg);
+	return FIELD_GET(SUN6I_FIFO_STA_TF_CNT_MASK, reg);
 }
 
 static inline void sun6i_spi_disable_interrupt(struct sun6i_spi *sspi, u32 mask)
@@ -134,18 +127,13 @@
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, reg);
 }
 
-static inline void sun6i_spi_drain_fifo(struct sun6i_spi *sspi, int len)
+static inline void sun6i_spi_drain_fifo(struct sun6i_spi *sspi)
 {
-	u32 reg, cnt;
+	u32 len;
 	u8 byte;
 
 	/* See how much data is available */
-	reg = sun6i_spi_read(sspi, SUN6I_FIFO_STA_REG);
-	reg &= SUN6I_FIFO_STA_RF_CNT_MASK;
-	cnt = reg >> SUN6I_FIFO_STA_RF_CNT_BITS;
-
-	if (len > cnt)
-		len = cnt;
+	len = sun6i_spi_get_rx_fifo_count(sspi);
 
 	while (len--) {
 		byte = readb(sspi->base_addr + SUN6I_RXDATA_REG);
@@ -154,15 +142,16 @@
 	}
 }
 
-static inline void sun6i_spi_fill_fifo(struct sun6i_spi *sspi, int len)
+static inline void sun6i_spi_fill_fifo(struct sun6i_spi *sspi)
 {
 	u32 cnt;
+	int len;
 	u8 byte;
 
 	/* See how much data we can fit */
 	cnt = sspi->fifo_depth - sun6i_spi_get_tx_fifo_count(sspi);
 
-	len = min3(len, (int)cnt, sspi->len);
+	len = min((int)cnt, sspi->len);
 
 	while (len--) {
 		byte = sspi->tx_buf ? *sspi->tx_buf++ : 0;
@@ -201,7 +190,7 @@
 	unsigned int mclk_rate, div, div_cdr1, div_cdr2, timeout;
 	unsigned int start, end, tx_time;
 	unsigned int trig_level;
-	unsigned int tx_len = 0;
+	unsigned int tx_len = 0, rx_len = 0;
 	int ret = 0;
 	u32 reg;
 
@@ -256,10 +245,12 @@
 	 * If it's a TX only transfer, we don't want to fill the RX
 	 * FIFO with bogus data
 	 */
-	if (sspi->rx_buf)
+	if (sspi->rx_buf) {
 		reg &= ~SUN6I_TFR_CTL_DHB;
-	else
+		rx_len = tfr->len;
+	} else {
 		reg |= SUN6I_TFR_CTL_DHB;
+	}
 
 	/* We want to control the chip select manually */
 	reg |= SUN6I_TFR_CTL_CS_MANUAL;
@@ -291,9 +282,11 @@
 	div_cdr2 = DIV_ROUND_UP(div_cdr1, 2);
 	if (div_cdr2 <= (SUN6I_CLK_CTL_CDR2_MASK + 1)) {
 		reg = SUN6I_CLK_CTL_CDR2(div_cdr2 - 1) | SUN6I_CLK_CTL_DRS;
+		tfr->effective_speed_hz = mclk_rate / (2 * div_cdr2);
 	} else {
 		div = min(SUN6I_CLK_CTL_CDR1_MASK, order_base_2(div_cdr1));
 		reg = SUN6I_CLK_CTL_CDR1(div);
+		tfr->effective_speed_hz = mclk_rate / (1 << div);
 	}
 
 	sun6i_spi_write(sspi, SUN6I_CLK_CTL_REG, reg);
@@ -307,20 +300,22 @@
 		tx_len = tfr->len;
 
 	/* Setup the counters */
-	sun6i_spi_write(sspi, SUN6I_BURST_CNT_REG, SUN6I_BURST_CNT(tfr->len));
-	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, SUN6I_XMIT_CNT(tx_len));
-	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG,
-			SUN6I_BURST_CTL_CNT_STC(tx_len));
+	sun6i_spi_write(sspi, SUN6I_BURST_CNT_REG, tfr->len);
+	sun6i_spi_write(sspi, SUN6I_XMIT_CNT_REG, tx_len);
+	sun6i_spi_write(sspi, SUN6I_BURST_CTL_CNT_REG, tx_len);
 
 	/* Fill the TX FIFO */
-	sun6i_spi_fill_fifo(sspi, sspi->fifo_depth);
+	sun6i_spi_fill_fifo(sspi);
 
 	/* Enable the interrupts */
-	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, SUN6I_INT_CTL_TC);
-	sun6i_spi_enable_interrupt(sspi, SUN6I_INT_CTL_TC |
-					 SUN6I_INT_CTL_RF_RDY);
+	reg = SUN6I_INT_CTL_TC;
+
+	if (rx_len > sspi->fifo_depth)
+		reg |= SUN6I_INT_CTL_RF_RDY;
 	if (tx_len > sspi->fifo_depth)
-		sun6i_spi_enable_interrupt(sspi, SUN6I_INT_CTL_TF_ERQ);
+		reg |= SUN6I_INT_CTL_TF_ERQ;
+
+	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, reg);
 
 	/* Start the transfer */
 	reg = sun6i_spi_read(sspi, SUN6I_TFR_CTL_REG);
@@ -337,10 +332,8 @@
 			 dev_name(&spi->dev), tfr->len, tfr->speed_hz,
 			 jiffies_to_msecs(end - start), tx_time);
 		ret = -ETIMEDOUT;
-		goto out;
 	}
 
-out:
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, 0);
 
 	return ret;
@@ -354,14 +347,14 @@
 	/* Transfer complete */
 	if (status & SUN6I_INT_CTL_TC) {
 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
-		sun6i_spi_drain_fifo(sspi, sspi->fifo_depth);
+		sun6i_spi_drain_fifo(sspi);
 		complete(&sspi->done);
 		return IRQ_HANDLED;
 	}
 
 	/* Receive FIFO 3/4 full */
 	if (status & SUN6I_INT_CTL_RF_RDY) {
-		sun6i_spi_drain_fifo(sspi, SUN6I_FIFO_DEPTH);
+		sun6i_spi_drain_fifo(sspi);
 		/* Only clear the interrupt _after_ draining the FIFO */
 		sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_RF_RDY);
 		return IRQ_HANDLED;
@@ -369,7 +362,7 @@
 
 	/* Transmit FIFO 3/4 empty */
 	if (status & SUN6I_INT_CTL_TF_ERQ) {
-		sun6i_spi_fill_fifo(sspi, SUN6I_FIFO_DEPTH);
+		sun6i_spi_fill_fifo(sspi);
 
 		if (!sspi->len)
 			/* nothing left to transmit */
@@ -472,6 +465,7 @@
 
 	master->max_speed_hz = 100 * 1000 * 1000;
 	master->min_speed_hz = 3 * 1000;
+	master->use_gpio_descriptors = true;
 	master->set_cs = sun6i_spi_set_cs;
 	master->transfer_one = sun6i_spi_transfer_one;
 	master->num_chipselect = 4;
diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c
index 785e7c4..ea706d9 100644
--- a/drivers/spi/spi-synquacer.c
+++ b/drivers/spi/spi-synquacer.c
@@ -644,9 +644,8 @@
 		}
 
 		if (IS_ERR(sspi->clk)) {
-			if (!(PTR_ERR(sspi->clk) == -EPROBE_DEFER))
-				dev_err(&pdev->dev, "clock not found\n");
-			ret = PTR_ERR(sspi->clk);
+			ret = dev_err_probe(&pdev->dev, PTR_ERR(sspi->clk),
+					    "clock not found\n");
 			goto put_spi;
 		}
 
diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c
index 594905b..a2e5907 100644
--- a/drivers/spi/spi-tegra114.c
+++ b/drivers/spi/spi-tegra114.c
@@ -269,10 +269,10 @@
 
 	if ((bits_per_word == 8 || bits_per_word == 16 ||
 	     bits_per_word == 32) && t->len > 3) {
-		tspi->is_packed = 1;
+		tspi->is_packed = true;
 		tspi->words_per_32bit = 32/bits_per_word;
 	} else {
-		tspi->is_packed = 0;
+		tspi->is_packed = false;
 		tspi->words_per_32bit = 1;
 	}
 
@@ -664,17 +664,11 @@
 	struct dma_chan *dma_chan;
 	u32 *dma_buf;
 	dma_addr_t dma_phys;
-	int ret;
 
-	dma_chan = dma_request_slave_channel_reason(tspi->dev,
-					dma_to_memory ? "rx" : "tx");
-	if (IS_ERR(dma_chan)) {
-		ret = PTR_ERR(dma_chan);
-		if (ret != -EPROBE_DEFER)
-			dev_err(tspi->dev,
-				"Dma channel is not available: %d\n", ret);
-		return ret;
-	}
+	dma_chan = dma_request_chan(tspi->dev, dma_to_memory ? "rx" : "tx");
+	if (IS_ERR(dma_chan))
+		return dev_err_probe(tspi->dev, PTR_ERR(dma_chan),
+				     "Dma channel is not available\n");
 
 	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
 				&dma_phys, GFP_KERNEL);
@@ -723,15 +717,31 @@
 	dma_release_channel(dma_chan);
 }
 
-static void tegra_spi_set_hw_cs_timing(struct spi_device *spi, u8 setup_dly,
-				       u8 hold_dly, u8 inactive_dly)
+static int tegra_spi_set_hw_cs_timing(struct spi_device *spi,
+				      struct spi_delay *setup,
+				      struct spi_delay *hold,
+				      struct spi_delay *inactive)
 {
 	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
+	u8 setup_dly, hold_dly, inactive_dly;
 	u32 setup_hold;
 	u32 spi_cs_timing;
 	u32 inactive_cycles;
 	u8 cs_state;
 
+	if ((setup && setup->unit != SPI_DELAY_UNIT_SCK) ||
+	    (hold && hold->unit != SPI_DELAY_UNIT_SCK) ||
+	    (inactive && inactive->unit != SPI_DELAY_UNIT_SCK)) {
+		dev_err(&spi->dev,
+			"Invalid delay unit %d, should be SPI_DELAY_UNIT_SCK\n",
+			SPI_DELAY_UNIT_SCK);
+		return -EINVAL;
+	}
+
+	setup_dly = setup ? setup->value : 0;
+	hold_dly = hold ? hold->value : 0;
+	inactive_dly = inactive ? inactive->value : 0;
+
 	setup_dly = min_t(u8, setup_dly, MAX_SETUP_HOLD_CYCLES);
 	hold_dly = min_t(u8, hold_dly, MAX_SETUP_HOLD_CYCLES);
 	if (setup_dly && hold_dly) {
@@ -758,6 +768,8 @@
 		tspi->spi_cs_timing2 = spi_cs_timing;
 		tegra_spi_writel(tspi, spi_cs_timing, SPI_CS_TIMING2);
 	}
+
+	return 0;
 }
 
 static u32 tegra_spi_setup_transfer_one(struct spi_device *spi,
@@ -985,17 +997,6 @@
 	return 0;
 }
 
-static void tegra_spi_transfer_delay(int delay)
-{
-	if (!delay)
-		return;
-
-	if (delay >= 1000)
-		mdelay(delay / 1000);
-
-	udelay(delay % 1000);
-}
-
 static void tegra_spi_transfer_end(struct spi_device *spi)
 {
 	struct tegra_spi_data *tspi = spi_master_get_devdata(spi->master);
@@ -1099,7 +1100,7 @@
 complete_xfer:
 		if (ret < 0 || skip) {
 			tegra_spi_transfer_end(spi);
-			tegra_spi_transfer_delay(xfer->delay_usecs);
+			spi_transfer_delay_exec(xfer);
 			goto exit;
 		} else if (list_is_last(&xfer->transfer_list,
 					&msg->transfers)) {
@@ -1107,11 +1108,11 @@
 				tspi->cs_control = spi;
 			else {
 				tegra_spi_transfer_end(spi);
-				tegra_spi_transfer_delay(xfer->delay_usecs);
+				spi_transfer_delay_exec(xfer);
 			}
 		} else if (xfer->cs_change) {
 			tegra_spi_transfer_end(spi);
-			tegra_spi_transfer_delay(xfer->delay_usecs);
+			spi_transfer_delay_exec(xfer);
 		}
 
 	}
@@ -1393,6 +1394,7 @@
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 
diff --git a/drivers/spi/spi-tegra20-sflash.c b/drivers/spi/spi-tegra20-sflash.c
index ecb6201..cfb7de7 100644
--- a/drivers/spi/spi-tegra20-sflash.c
+++ b/drivers/spi/spi-tegra20-sflash.c
@@ -341,10 +341,11 @@
 			goto exit;
 		}
 		msg->actual_length += xfer->len;
-		if (xfer->cs_change && xfer->delay_usecs) {
+		if (xfer->cs_change &&
+		    (xfer->delay_usecs || xfer->delay.value)) {
 			tegra_sflash_writel(tsd, tsd->def_command_reg,
 					SPI_COMMAND);
-			udelay(xfer->delay_usecs);
+			spi_transfer_delay_exec(xfer);
 		}
 	}
 	ret = 0;
@@ -358,9 +359,8 @@
 static irqreturn_t handle_cpu_based_xfer(struct tegra_sflash_data *tsd)
 {
 	struct spi_transfer *t = tsd->curr_xfer;
-	unsigned long flags;
 
-	spin_lock_irqsave(&tsd->lock, flags);
+	spin_lock(&tsd->lock);
 	if (tsd->tx_status || tsd->rx_status || (tsd->status_reg & SPI_BSY)) {
 		dev_err(tsd->dev,
 			"CpuXfer ERROR bit set 0x%x\n", tsd->status_reg);
@@ -390,7 +390,7 @@
 	tegra_sflash_calculate_curr_xfer_param(tsd->cur_spi, tsd, t);
 	tegra_sflash_start_cpu_based_transfer(tsd, t);
 exit:
-	spin_unlock_irqrestore(&tsd->lock, flags);
+	spin_unlock(&tsd->lock);
 	return IRQ_HANDLED;
 }
 
@@ -490,6 +490,7 @@
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 
diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c
index 2a1905c..669fc42 100644
--- a/drivers/spi/spi-tegra20-slink.c
+++ b/drivers/spi/spi-tegra20-slink.c
@@ -599,15 +599,10 @@
 	int ret;
 	struct dma_slave_config dma_sconfig;
 
-	dma_chan = dma_request_slave_channel_reason(tspi->dev,
-						dma_to_memory ? "rx" : "tx");
-	if (IS_ERR(dma_chan)) {
-		ret = PTR_ERR(dma_chan);
-		if (ret != -EPROBE_DEFER)
-			dev_err(tspi->dev,
-				"Dma channel is not available: %d\n", ret);
-		return ret;
-	}
+	dma_chan = dma_request_chan(tspi->dev, dma_to_memory ? "rx" : "tx");
+	if (IS_ERR(dma_chan))
+		return dev_err_probe(tspi->dev, PTR_ERR(dma_chan),
+				     "Dma channel is not available\n");
 
 	dma_buf = dma_alloc_coherent(tspi->dev, tspi->dma_buf_size,
 				&dma_phys, GFP_KERNEL);
@@ -1120,6 +1115,7 @@
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "pm runtime get failed, e = %d\n", ret);
+		pm_runtime_put_noidle(&pdev->dev);
 		goto exit_pm_disable;
 	}
 	tspi->def_command_reg  = SLINK_M_S;
@@ -1205,7 +1201,7 @@
 }
 #endif
 
-static int tegra_slink_runtime_suspend(struct device *dev)
+static int __maybe_unused tegra_slink_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
@@ -1217,7 +1213,7 @@
 	return 0;
 }
 
-static int tegra_slink_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_slink_runtime_resume(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct tegra_slink_data *tspi = spi_master_get_devdata(master);
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 6b6ef89..e06aafe 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -2,7 +2,7 @@
 /*
  * TI QSPI driver
  *
- * Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2013 Texas Instruments Incorporated - https://www.ti.com
  * Author: Sourav Poddar <sourav.poddar@ti.com>
  */
 
@@ -80,8 +80,6 @@
 
 #define QSPI_COMPLETION_TIMEOUT		msecs_to_jiffies(2000)
 
-#define QSPI_FCLK			192000000
-
 /* Clock Control */
 #define QSPI_CLK_EN			(1 << 31)
 #define QSPI_CLK_DIV_MAX		0xffff
@@ -317,6 +315,8 @@
 {
 	int wlen;
 	unsigned int cmd;
+	u32 rx;
+	u8 rxlen, rx_wlen;
 	u8 *rxbuf;
 
 	rxbuf = t->rx_buf;
@@ -333,20 +333,67 @@
 		break;
 	}
 	wlen = t->bits_per_word >> 3;	/* in bytes */
+	rx_wlen = wlen;
 
 	while (count) {
 		dev_dbg(qspi->dev, "rx cmd %08x dc %08x\n", cmd, qspi->dc);
 		if (qspi_is_busy(qspi))
 			return -EBUSY;
 
+		switch (wlen) {
+		case 1:
+			/*
+			 * Optimize the 8-bit words transfers, as used by
+			 * the SPI flash devices.
+			 */
+			if (count >= QSPI_WLEN_MAX_BYTES) {
+				rxlen = QSPI_WLEN_MAX_BYTES;
+			} else {
+				rxlen = min(count, 4);
+			}
+			rx_wlen = rxlen << 3;
+			cmd &= ~QSPI_WLEN_MASK;
+			cmd |= QSPI_WLEN(rx_wlen);
+			break;
+		default:
+			rxlen = wlen;
+			break;
+		}
+
 		ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG);
 		if (ti_qspi_poll_wc(qspi)) {
 			dev_err(qspi->dev, "read timed out\n");
 			return -ETIMEDOUT;
 		}
+
 		switch (wlen) {
 		case 1:
-			*rxbuf = readb(qspi->base + QSPI_SPI_DATA_REG);
+			/*
+			 * Optimize the 8-bit words transfers, as used by
+			 * the SPI flash devices.
+			 */
+			if (count >= QSPI_WLEN_MAX_BYTES) {
+				u32 *rxp = (u32 *) rxbuf;
+				rx = readl(qspi->base + QSPI_SPI_DATA_REG_3);
+				*rxp++ = be32_to_cpu(rx);
+				rx = readl(qspi->base + QSPI_SPI_DATA_REG_2);
+				*rxp++ = be32_to_cpu(rx);
+				rx = readl(qspi->base + QSPI_SPI_DATA_REG_1);
+				*rxp++ = be32_to_cpu(rx);
+				rx = readl(qspi->base + QSPI_SPI_DATA_REG);
+				*rxp++ = be32_to_cpu(rx);
+			} else {
+				u8 *rxp = rxbuf;
+				rx = readl(qspi->base + QSPI_SPI_DATA_REG);
+				if (rx_wlen >= 8)
+					*rxp++ = rx >> (rx_wlen - 8);
+				if (rx_wlen >= 16)
+					*rxp++ = rx >> (rx_wlen - 16);
+				if (rx_wlen >= 24)
+					*rxp++ = rx >> (rx_wlen - 24);
+				if (rx_wlen >= 32)
+					*rxp++ = rx;
+			}
 			break;
 		case 2:
 			*((u16 *)rxbuf) = readw(qspi->base + QSPI_SPI_DATA_REG);
@@ -355,8 +402,8 @@
 			*((u32 *)rxbuf) = readl(qspi->base + QSPI_SPI_DATA_REG);
 			break;
 		}
-		rxbuf += wlen;
-		count -= wlen;
+		rxbuf += rxlen;
+		count -= rxlen;
 	}
 
 	return 0;
@@ -528,6 +575,35 @@
 		      QSPI_SPI_SETUP_REG(spi->chip_select));
 }
 
+static int ti_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+	struct ti_qspi *qspi = spi_controller_get_devdata(mem->spi->master);
+	size_t max_len;
+
+	if (op->data.dir == SPI_MEM_DATA_IN) {
+		if (op->addr.val < qspi->mmap_size) {
+			/* Limit MMIO to the mmaped region */
+			if (op->addr.val + op->data.nbytes > qspi->mmap_size) {
+				max_len = qspi->mmap_size - op->addr.val;
+				op->data.nbytes = min((size_t) op->data.nbytes,
+						      max_len);
+			}
+		} else {
+			/*
+			 * Use fallback mode (SW generated transfers) above the
+			 * mmaped region.
+			 * Adjust size to comply with the QSPI max frame length.
+			 */
+			max_len = QSPI_FRAME;
+			max_len -= 1 + op->addr.nbytes + op->dummy.nbytes;
+			op->data.nbytes = min((size_t) op->data.nbytes,
+					      max_len);
+		}
+	}
+
+	return 0;
+}
+
 static int ti_qspi_exec_mem_op(struct spi_mem *mem,
 			       const struct spi_mem_op *op)
 {
@@ -578,6 +654,7 @@
 
 static const struct spi_controller_mem_ops ti_qspi_mem_ops = {
 	.exec_op = ti_qspi_exec_mem_op,
+	.adjust_op_size = ti_qspi_adjust_op_size,
 };
 
 static int ti_qspi_start_transfer_one(struct spi_master *master,
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index 181ea30..7fb020a 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -122,6 +122,7 @@
 /**
  * struct pch_spi_data - Holds the SPI channel specific details
  * @io_remap_addr:		The remapped PCI base address
+ * @io_base_addr:		Base address
  * @master:			Pointer to the SPI master structure
  * @work:			Reference to work queue handler
  * @wait:			Wait queue for waking up upon receiving an
@@ -138,8 +139,8 @@
  *				transfer
  * @rx_index:			Receive data count; for bookkeeping during
  *				transfer
- * @tx_buff:			Buffer for data to be transmitted
- * @rx_index:			Buffer for Received data
+ * @pkt_tx_buff:		Buffer for data to be transmitted
+ * @pkt_rx_buff:		Buffer for received data
  * @n_curnt_chip:		The chip number that this SPI driver currently
  *				operates on
  * @current_chip:		Reference to the current chip that this SPI
@@ -151,7 +152,10 @@
  * @board_dat:			Reference to the SPI device data structure
  * @plat_dev:			platform_device structure
  * @ch:				SPI channel number
+ * @dma:			Local DMA information
+ * @use_dma:			True if DMA is to be used
  * @irq_reg_sts:		Status of IRQ registration
+ * @save_total_len:		Save length while data is being transferred
  */
 struct pch_spi_data {
 	void __iomem *io_remap_addr;
@@ -865,7 +869,7 @@
 	/* Set Tx DMA */
 	param = &dma->param_tx;
 	param->dma_dev = &dma_dev->dev;
-	param->chan_id = data->ch * 2; /* Tx = 0, 2 */;
+	param->chan_id = data->ch * 2; /* Tx = 0, 2 */
 	param->tx_reg = data->io_base_addr + PCH_SPDWR;
 	param->width = width;
 	chan = dma_request_channel(mask, pch_spi_filter, param);
@@ -880,7 +884,7 @@
 	/* Set Rx DMA */
 	param = &dma->param_rx;
 	param->dma_dev = &dma_dev->dev;
-	param->chan_id = data->ch * 2 + 1; /* Rx = Tx + 1 */;
+	param->chan_id = data->ch * 2 + 1; /* Rx = Tx + 1 */
 	param->rx_reg = data->io_base_addr + PCH_SPDRR;
 	param->width = width;
 	chan = dma_request_channel(mask, pch_spi_filter, param);
@@ -1000,7 +1004,7 @@
 	spin_unlock_irqrestore(&data->lock, flags);
 
 	/* RX */
-	dma->sg_rx_p = kcalloc(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC);
+	dma->sg_rx_p = kmalloc_array(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC);
 	if (!dma->sg_rx_p)
 		return;
 
@@ -1063,7 +1067,7 @@
 		head = 0;
 	}
 
-	dma->sg_tx_p = kcalloc(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC);
+	dma->sg_tx_p = kmalloc_array(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC);
 	if (!dma->sg_tx_p)
 		return;
 
@@ -1231,12 +1235,7 @@
 			"%s:data->current_msg->actual_length=%d\n",
 			__func__, data->current_msg->actual_length);
 
-		/* check for delay */
-		if (data->cur_trans->delay_usecs) {
-			dev_dbg(&data->master->dev, "%s:delay in usec=%d\n",
-				__func__, data->cur_trans->delay_usecs);
-			udelay(data->cur_trans->delay_usecs);
-		}
+		spi_transfer_delay_exec(data->cur_trans);
 
 		spin_lock(&data->lock);
 
@@ -1638,64 +1637,37 @@
 	kfree(pd_dev_save);
 }
 
-#ifdef CONFIG_PM
-static int pch_spi_suspend(struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused pch_spi_suspend(struct device *dev)
 {
-	int retval;
-	struct pch_pd_dev_save *pd_dev_save = pci_get_drvdata(pdev);
+	struct pch_pd_dev_save *pd_dev_save = dev_get_drvdata(dev);
 
-	dev_dbg(&pdev->dev, "%s ENTRY\n", __func__);
+	dev_dbg(dev, "%s ENTRY\n", __func__);
 
 	pd_dev_save->board_dat->suspend_sts = true;
 
-	/* save config space */
-	retval = pci_save_state(pdev);
-	if (retval == 0) {
-		pci_enable_wake(pdev, PCI_D3hot, 0);
-		pci_disable_device(pdev);
-		pci_set_power_state(pdev, PCI_D3hot);
-	} else {
-		dev_err(&pdev->dev, "%s pci_save_state failed\n", __func__);
-	}
-
-	return retval;
+	return 0;
 }
 
-static int pch_spi_resume(struct pci_dev *pdev)
+static int __maybe_unused pch_spi_resume(struct device *dev)
 {
-	int retval;
-	struct pch_pd_dev_save *pd_dev_save = pci_get_drvdata(pdev);
-	dev_dbg(&pdev->dev, "%s ENTRY\n", __func__);
+	struct pch_pd_dev_save *pd_dev_save = dev_get_drvdata(dev);
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
+	dev_dbg(dev, "%s ENTRY\n", __func__);
 
-	retval = pci_enable_device(pdev);
-	if (retval < 0) {
-		dev_err(&pdev->dev,
-			"%s pci_enable_device failed\n", __func__);
-	} else {
-		pci_enable_wake(pdev, PCI_D3hot, 0);
+	/* set suspend status to false */
+	pd_dev_save->board_dat->suspend_sts = false;
 
-		/* set suspend status to false */
-		pd_dev_save->board_dat->suspend_sts = false;
-	}
-
-	return retval;
+	return 0;
 }
-#else
-#define pch_spi_suspend NULL
-#define pch_spi_resume NULL
 
-#endif
+static SIMPLE_DEV_PM_OPS(pch_spi_pm_ops, pch_spi_suspend, pch_spi_resume);
 
 static struct pci_driver pch_spi_pcidev_driver = {
 	.name = "pch_spi",
 	.id_table = pch_spi_pcidev_id,
 	.probe = pch_spi_probe,
 	.remove = pch_spi_remove,
-	.suspend = pch_spi_suspend,
-	.resume = pch_spi_resume,
+	.driver.pm = &pch_spi_pm_ops,
 };
 
 static int __init pch_spi_init(void)
diff --git a/drivers/spi/spi-txx9.c b/drivers/spi/spi-txx9.c
index 51759d3..3606232 100644
--- a/drivers/spi/spi-txx9.c
+++ b/drivers/spi/spi-txx9.c
@@ -26,7 +26,8 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
+#include <linux/gpio/consumer.h>
 
 
 #define SPI_FIFO_SIZE 4
@@ -79,7 +80,7 @@
 	void __iomem *membase;
 	int baseclk;
 	struct clk *clk;
-	int last_chipselect;
+	struct gpio_desc *last_chipselect;
 	int last_chipselect_val;
 };
 
@@ -95,20 +96,22 @@
 static void txx9spi_cs_func(struct spi_device *spi, struct txx9spi *c,
 		int on, unsigned int cs_delay)
 {
-	int val = (spi->mode & SPI_CS_HIGH) ? on : !on;
-
+	/*
+	 * The GPIO descriptor will track polarity inversion inside
+	 * gpiolib.
+	 */
 	if (on) {
 		/* deselect the chip with cs_change hint in last transfer */
-		if (c->last_chipselect >= 0)
-			gpio_set_value(c->last_chipselect,
+		if (c->last_chipselect)
+			gpiod_set_value(c->last_chipselect,
 					!c->last_chipselect_val);
-		c->last_chipselect = spi->chip_select;
-		c->last_chipselect_val = val;
+		c->last_chipselect = spi->cs_gpiod;
+		c->last_chipselect_val = on;
 	} else {
-		c->last_chipselect = -1;
+		c->last_chipselect = NULL;
 		ndelay(cs_delay);	/* CS Hold Time */
 	}
-	gpio_set_value(spi->chip_select, val);
+	gpiod_set_value(spi->cs_gpiod, on);
 	ndelay(cs_delay);	/* CS Setup Time / CS Recovery Time */
 }
 
@@ -119,12 +122,6 @@
 	if (!spi->max_speed_hz)
 		return -EINVAL;
 
-	if (gpio_direction_output(spi->chip_select,
-			!(spi->mode & SPI_CS_HIGH))) {
-		dev_err(&spi->dev, "Cannot setup GPIO for chipselect.\n");
-		return -EINVAL;
-	}
-
 	/* deselect chip */
 	spin_lock(&c->lock);
 	txx9spi_cs_func(spi, c, 0, (NSEC_PER_SEC / 2) / spi->max_speed_hz);
@@ -248,8 +245,7 @@
 			len -= count * wsize;
 		}
 		m->actual_length += t->len;
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		if (!cs_change)
 			continue;
@@ -320,6 +316,47 @@
 	return 0;
 }
 
+/*
+ * Chip select uses GPIO only, further the driver is using the chip select
+ * numer (from the device tree "reg" property, and this can only come from
+ * device tree since this i MIPS and there is no way to pass platform data) as
+ * the GPIO number. As the platform has only one GPIO controller (the txx9 GPIO
+ * chip) it is thus using the chip select number as an offset into that chip.
+ * This chip has a maximum of 16 GPIOs 0..15 and this is what all platforms
+ * register.
+ *
+ * We modernized this behaviour by explicitly converting that offset to an
+ * offset on the GPIO chip using a GPIO descriptor machine table of the same
+ * size as the txx9 GPIO chip with a 1-to-1 mapping of chip select to GPIO
+ * offset.
+ *
+ * This is admittedly a hack, but it is countering the hack of using "reg" to
+ * contain a GPIO offset when it should be using "cs-gpios" as the SPI bindings
+ * state.
+ */
+static struct gpiod_lookup_table txx9spi_cs_gpio_table = {
+	.dev_id = "spi0",
+	.table = {
+		GPIO_LOOKUP_IDX("TXx9", 0, "cs", 0, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 1, "cs", 1, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 2, "cs", 2, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 3, "cs", 3, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 4, "cs", 4, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 5, "cs", 5, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 6, "cs", 6, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 7, "cs", 7, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 8, "cs", 8, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 9, "cs", 9, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 10, "cs", 10, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 11, "cs", 11, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 12, "cs", 12, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 13, "cs", 13, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 14, "cs", 14, GPIO_ACTIVE_LOW),
+		GPIO_LOOKUP_IDX("TXx9", 15, "cs", 15, GPIO_ACTIVE_LOW),
+		{ },
+	},
+};
+
 static int txx9spi_probe(struct platform_device *dev)
 {
 	struct spi_master *master;
@@ -373,12 +410,14 @@
 	if (ret)
 		goto exit;
 
-	c->last_chipselect = -1;
+	c->last_chipselect = NULL;
 
 	dev_info(&dev->dev, "at %#llx, irq %d, %dMHz\n",
 		 (unsigned long long)res->start, irq,
 		 (c->baseclk + 500000) / 1000000);
 
+	gpiod_add_lookup_table(&txx9spi_cs_gpio_table);
+
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CS_HIGH | SPI_CPOL | SPI_CPHA;
 
@@ -387,6 +426,7 @@
 	master->transfer = txx9spi_transfer;
 	master->num_chipselect = (u16)UINT_MAX; /* any GPIO numbers */
 	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(16);
+	master->use_gpio_descriptors = true;
 
 	ret = devm_spi_register_master(&dev->dev, master);
 	if (ret)
diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index ce9b301..ad0088e 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -8,6 +8,7 @@
 #include <linux/bitops.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -23,6 +24,7 @@
 
 struct uniphier_spi_priv {
 	void __iomem *base;
+	dma_addr_t base_dma_addr;
 	struct clk *clk;
 	struct spi_master *master;
 	struct completion xfer_done;
@@ -32,6 +34,7 @@
 	unsigned int rx_bytes;
 	const u8 *tx_buf;
 	u8 *rx_buf;
+	atomic_t dma_busy;
 
 	bool is_save_param;
 	u8 bits_per_word;
@@ -61,11 +64,16 @@
 #define   SSI_FPS_FSTRT		BIT(14)
 
 #define SSI_SR			0x14
+#define   SSI_SR_BUSY		BIT(7)
 #define   SSI_SR_RNE		BIT(0)
 
 #define SSI_IE			0x18
+#define   SSI_IE_TCIE		BIT(4)
 #define   SSI_IE_RCIE		BIT(3)
+#define   SSI_IE_TXRE		BIT(2)
+#define   SSI_IE_RXRE		BIT(1)
 #define   SSI_IE_RORIE		BIT(0)
+#define   SSI_IE_ALL_MASK	GENMASK(4, 0)
 
 #define SSI_IS			0x1c
 #define   SSI_IS_RXRS		BIT(9)
@@ -87,15 +95,19 @@
 #define SSI_RXDR		0x24
 
 #define SSI_FIFO_DEPTH		8U
+#define SSI_FIFO_BURST_NUM	1
+
+#define SSI_DMA_RX_BUSY		BIT(1)
+#define SSI_DMA_TX_BUSY		BIT(0)
 
 static inline unsigned int bytes_per_word(unsigned int bits)
 {
 	return bits <= 8 ? 1 : (bits <= 16 ? 2 : 4);
 }
 
-static inline void uniphier_spi_irq_enable(struct spi_device *spi, u32 mask)
+static inline void uniphier_spi_irq_enable(struct uniphier_spi_priv *priv,
+					   u32 mask)
 {
-	struct uniphier_spi_priv *priv = spi_master_get_devdata(spi->master);
 	u32 val;
 
 	val = readl(priv->base + SSI_IE);
@@ -103,9 +115,9 @@
 	writel(val, priv->base + SSI_IE);
 }
 
-static inline void uniphier_spi_irq_disable(struct spi_device *spi, u32 mask)
+static inline void uniphier_spi_irq_disable(struct uniphier_spi_priv *priv,
+					    u32 mask)
 {
-	struct uniphier_spi_priv *priv = spi_master_get_devdata(spi->master);
 	u32 val;
 
 	val = readl(priv->base + SSI_IE);
@@ -334,6 +346,128 @@
 	writel(val, priv->base + SSI_FPS);
 }
 
+static bool uniphier_spi_can_dma(struct spi_master *master,
+				 struct spi_device *spi,
+				 struct spi_transfer *t)
+{
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	unsigned int bpw = bytes_per_word(priv->bits_per_word);
+
+	if ((!master->dma_tx && !master->dma_rx)
+	    || (!master->dma_tx && t->tx_buf)
+	    || (!master->dma_rx && t->rx_buf))
+		return false;
+
+	return DIV_ROUND_UP(t->len, bpw) > SSI_FIFO_DEPTH;
+}
+
+static void uniphier_spi_dma_rxcb(void *data)
+{
+	struct spi_master *master = data;
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(SSI_DMA_RX_BUSY, &priv->dma_busy);
+
+	uniphier_spi_irq_disable(priv, SSI_IE_RXRE);
+
+	if (!(state & SSI_DMA_TX_BUSY))
+		spi_finalize_current_transfer(master);
+}
+
+static void uniphier_spi_dma_txcb(void *data)
+{
+	struct spi_master *master = data;
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	int state = atomic_fetch_andnot(SSI_DMA_TX_BUSY, &priv->dma_busy);
+
+	uniphier_spi_irq_disable(priv, SSI_IE_TXRE);
+
+	if (!(state & SSI_DMA_RX_BUSY))
+		spi_finalize_current_transfer(master);
+}
+
+static int uniphier_spi_transfer_one_dma(struct spi_master *master,
+					 struct spi_device *spi,
+					 struct spi_transfer *t)
+{
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	struct dma_async_tx_descriptor *rxdesc = NULL, *txdesc = NULL;
+	int buswidth;
+
+	atomic_set(&priv->dma_busy, 0);
+
+	uniphier_spi_set_fifo_threshold(priv, SSI_FIFO_BURST_NUM);
+
+	if (priv->bits_per_word <= 8)
+		buswidth = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	else if (priv->bits_per_word <= 16)
+		buswidth = DMA_SLAVE_BUSWIDTH_2_BYTES;
+	else
+		buswidth = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	if (priv->rx_buf) {
+		struct dma_slave_config rxconf = {
+			.direction = DMA_DEV_TO_MEM,
+			.src_addr = priv->base_dma_addr + SSI_RXDR,
+			.src_addr_width = buswidth,
+			.src_maxburst = SSI_FIFO_BURST_NUM,
+		};
+
+		dmaengine_slave_config(master->dma_rx, &rxconf);
+
+		rxdesc = dmaengine_prep_slave_sg(
+			master->dma_rx,
+			t->rx_sg.sgl, t->rx_sg.nents,
+			DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!rxdesc)
+			goto out_err_prep;
+
+		rxdesc->callback = uniphier_spi_dma_rxcb;
+		rxdesc->callback_param = master;
+
+		uniphier_spi_irq_enable(priv, SSI_IE_RXRE);
+		atomic_or(SSI_DMA_RX_BUSY, &priv->dma_busy);
+
+		dmaengine_submit(rxdesc);
+		dma_async_issue_pending(master->dma_rx);
+	}
+
+	if (priv->tx_buf) {
+		struct dma_slave_config txconf = {
+			.direction = DMA_MEM_TO_DEV,
+			.dst_addr = priv->base_dma_addr + SSI_TXDR,
+			.dst_addr_width = buswidth,
+			.dst_maxburst = SSI_FIFO_BURST_NUM,
+		};
+
+		dmaengine_slave_config(master->dma_tx, &txconf);
+
+		txdesc = dmaengine_prep_slave_sg(
+			master->dma_tx,
+			t->tx_sg.sgl, t->tx_sg.nents,
+			DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+		if (!txdesc)
+			goto out_err_prep;
+
+		txdesc->callback = uniphier_spi_dma_txcb;
+		txdesc->callback_param = master;
+
+		uniphier_spi_irq_enable(priv, SSI_IE_TXRE);
+		atomic_or(SSI_DMA_TX_BUSY, &priv->dma_busy);
+
+		dmaengine_submit(txdesc);
+		dma_async_issue_pending(master->dma_tx);
+	}
+
+	/* signal that we need to wait for completion */
+	return (priv->tx_buf || priv->rx_buf);
+
+out_err_prep:
+	if (rxdesc)
+		dmaengine_terminate_sync(master->dma_rx);
+
+	return -EINVAL;
+}
+
 static int uniphier_spi_transfer_one_irq(struct spi_master *master,
 					 struct spi_device *spi,
 					 struct spi_transfer *t)
@@ -346,12 +480,12 @@
 
 	uniphier_spi_fill_tx_fifo(priv);
 
-	uniphier_spi_irq_enable(spi, SSI_IE_RCIE | SSI_IE_RORIE);
+	uniphier_spi_irq_enable(priv, SSI_IE_RCIE | SSI_IE_RORIE);
 
 	time_left = wait_for_completion_timeout(&priv->xfer_done,
 					msecs_to_jiffies(SSI_TIMEOUT_MS));
 
-	uniphier_spi_irq_disable(spi, SSI_IE_RCIE | SSI_IE_RORIE);
+	uniphier_spi_irq_disable(priv, SSI_IE_RCIE | SSI_IE_RORIE);
 
 	if (!time_left) {
 		dev_err(dev, "transfer timeout.\n");
@@ -395,6 +529,7 @@
 {
 	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
 	unsigned long threshold;
+	bool use_dma;
 
 	/* Terminate and return success for 0 byte length transfer */
 	if (!t->len)
@@ -402,6 +537,10 @@
 
 	uniphier_spi_setup_transfer(spi, t);
 
+	use_dma = master->can_dma ? master->can_dma(master, spi, t) : false;
+	if (use_dma)
+		return uniphier_spi_transfer_one_dma(master, spi, t);
+
 	/*
 	 * If the transfer operation will take longer than
 	 * SSI_POLL_TIMEOUT_US, it should use irq.
@@ -432,6 +571,32 @@
 	return 0;
 }
 
+static void uniphier_spi_handle_err(struct spi_master *master,
+				    struct spi_message *msg)
+{
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+	u32 val;
+
+	/* stop running spi transfer */
+	writel(0, priv->base + SSI_CTL);
+
+	/* reset FIFOs */
+	val = SSI_FC_TXFFL | SSI_FC_RXFFL;
+	writel(val, priv->base + SSI_FC);
+
+	uniphier_spi_irq_disable(priv, SSI_IE_ALL_MASK);
+
+	if (atomic_read(&priv->dma_busy) & SSI_DMA_TX_BUSY) {
+		dmaengine_terminate_async(master->dma_tx);
+		atomic_andnot(SSI_DMA_TX_BUSY, &priv->dma_busy);
+	}
+
+	if (atomic_read(&priv->dma_busy) & SSI_DMA_RX_BUSY) {
+		dmaengine_terminate_async(master->dma_rx);
+		atomic_andnot(SSI_DMA_RX_BUSY, &priv->dma_busy);
+	}
+}
+
 static irqreturn_t uniphier_spi_handler(int irq, void *dev_id)
 {
 	struct uniphier_spi_priv *priv = dev_id;
@@ -477,6 +642,9 @@
 {
 	struct uniphier_spi_priv *priv;
 	struct spi_master *master;
+	struct resource *res;
+	struct dma_slave_caps caps;
+	u32 dma_tx_burst = 0, dma_rx_burst = 0;
 	unsigned long clk_rate;
 	int irq;
 	int ret;
@@ -491,11 +659,12 @@
 	priv->master = master;
 	priv->is_save_param = false;
 
-	priv->base = devm_platform_ioremap_resource(pdev, 0);
+	priv->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
 		goto out_master_put;
 	}
+	priv->base_dma_addr = res->start;
 
 	priv->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(priv->clk)) {
@@ -538,14 +707,66 @@
 				= uniphier_spi_prepare_transfer_hardware;
 	master->unprepare_transfer_hardware
 				= uniphier_spi_unprepare_transfer_hardware;
+	master->handle_err = uniphier_spi_handle_err;
+	master->can_dma = uniphier_spi_can_dma;
+
 	master->num_chipselect = 1;
+	master->flags = SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX;
+
+	master->dma_tx = dma_request_chan(&pdev->dev, "tx");
+	if (IS_ERR_OR_NULL(master->dma_tx)) {
+		if (PTR_ERR(master->dma_tx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto out_disable_clk;
+		}
+		master->dma_tx = NULL;
+		dma_tx_burst = INT_MAX;
+	} else {
+		ret = dma_get_slave_caps(master->dma_tx, &caps);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n",
+				ret);
+			goto out_release_dma;
+		}
+		dma_tx_burst = caps.max_burst;
+	}
+
+	master->dma_rx = dma_request_chan(&pdev->dev, "rx");
+	if (IS_ERR_OR_NULL(master->dma_rx)) {
+		if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto out_release_dma;
+		}
+		master->dma_rx = NULL;
+		dma_rx_burst = INT_MAX;
+	} else {
+		ret = dma_get_slave_caps(master->dma_rx, &caps);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n",
+				ret);
+			goto out_release_dma;
+		}
+		dma_rx_burst = caps.max_burst;
+	}
+
+	master->max_dma_len = min(dma_tx_burst, dma_rx_burst);
 
 	ret = devm_spi_register_master(&pdev->dev, master);
 	if (ret)
-		goto out_disable_clk;
+		goto out_release_dma;
 
 	return 0;
 
+out_release_dma:
+	if (!IS_ERR_OR_NULL(master->dma_rx)) {
+		dma_release_channel(master->dma_rx);
+		master->dma_rx = NULL;
+	}
+	if (!IS_ERR_OR_NULL(master->dma_tx)) {
+		dma_release_channel(master->dma_tx);
+		master->dma_tx = NULL;
+	}
+
 out_disable_clk:
 	clk_disable_unprepare(priv->clk);
 
@@ -556,7 +777,13 @@
 
 static int uniphier_spi_remove(struct platform_device *pdev)
 {
-	struct uniphier_spi_priv *priv = platform_get_drvdata(pdev);
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct uniphier_spi_priv *priv = spi_master_get_devdata(master);
+
+	if (master->dma_tx)
+		dma_release_channel(master->dma_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
 
 	clk_disable_unprepare(priv->clk);
 
diff --git a/drivers/spi/spi-xcomm.c b/drivers/spi/spi-xcomm.c
index a3496c4..1d9b3f0 100644
--- a/drivers/spi/spi-xcomm.c
+++ b/drivers/spi/spi-xcomm.c
@@ -188,8 +188,7 @@
 		}
 		status = 0;
 
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
+		spi_transfer_delay_exec(t);
 
 		is_first = false;
 	}
diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c
index d5f9d5f..523edfd 100644
--- a/drivers/spi/spi-xilinx.c
+++ b/drivers/spi/spi-xilinx.c
@@ -391,7 +391,7 @@
 	struct xilinx_spi *xspi;
 	struct xspi_platform_data *pdata;
 	struct resource *res;
-	int ret, num_cs = 0, bits_per_word = 8;
+	int ret, num_cs = 0, bits_per_word;
 	struct spi_master *master;
 	u32 tmp;
 	u8 i;
@@ -403,6 +403,11 @@
 	} else {
 		of_property_read_u32(pdev->dev.of_node, "xlnx,num-ss-bits",
 					  &num_cs);
+		ret = of_property_read_u32(pdev->dev.of_node,
+					   "xlnx,num-transfer-bits",
+					   &bits_per_word);
+		if (ret)
+			bits_per_word = 8;
 	}
 
 	if (!num_cs) {
@@ -486,8 +491,7 @@
 		goto put_master;
 	}
 
-	dev_info(&pdev->dev, "at 0x%08llX mapped to 0x%p, irq=%d\n",
-		(unsigned long long)res->start, xspi->regs, xspi->irq);
+	dev_info(&pdev->dev, "at %pR, irq=%d\n", res, xspi->irq);
 
 	if (pdata) {
 		for (i = 0; i < pdata->num_devices; i++)
diff --git a/drivers/spi/spi-xtensa-xtfpga.c b/drivers/spi/spi-xtensa-xtfpga.c
index 86516eb..fc2b5eb 100644
--- a/drivers/spi/spi-xtensa-xtfpga.c
+++ b/drivers/spi/spi-xtensa-xtfpga.c
@@ -80,7 +80,6 @@
 static int xtfpga_spi_probe(struct platform_device *pdev)
 {
 	struct xtfpga_spi *xspi;
-	struct resource *mem;
 	int ret;
 	struct spi_master *master;
 
@@ -97,14 +96,7 @@
 	xspi->bitbang.master = master;
 	xspi->bitbang.chipselect = xtfpga_spi_chipselect;
 	xspi->bitbang.txrx_word[SPI_MODE_0] = xtfpga_spi_txrx_word;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!mem) {
-		dev_err(&pdev->dev, "No memory resource\n");
-		ret = -ENODEV;
-		goto err;
-	}
-	xspi->regs = devm_ioremap_resource(&pdev->dev, mem);
+	xspi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xspi->regs)) {
 		ret = PTR_ERR(xspi->regs);
 		goto err;
diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c
index 1ced6eb..13c0b15 100644
--- a/drivers/spi/spi-zynq-qspi.c
+++ b/drivers/spi/spi-zynq-qspi.c
@@ -7,7 +7,6 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
@@ -51,7 +50,6 @@
 #define ZYNQ_QSPI_CONFIG_BDRATE_MASK	GENMASK(5, 3) /* Baud Rate Mask */
 #define ZYNQ_QSPI_CONFIG_CPHA_MASK	BIT(2) /* Clock Phase Control */
 #define ZYNQ_QSPI_CONFIG_CPOL_MASK	BIT(1) /* Clock Polarity Control */
-#define ZYNQ_QSPI_CONFIG_SSCTRL_MASK	BIT(10) /* Slave Select Mask */
 #define ZYNQ_QSPI_CONFIG_FWIDTH_MASK	GENMASK(7, 6) /* FIFO width */
 #define ZYNQ_QSPI_CONFIG_MSTREN_MASK	BIT(0) /* Master Mode */
 
@@ -61,9 +59,9 @@
  * These are the values used in the calculation of baud rate divisor and
  * setting the slave select.
  */
-#define ZYNQ_QSPI_BAUD_DIV_MAX		GENMASK(2, 0) /* Baud rate maximum */
-#define ZYNQ_QSPI_BAUD_DIV_SHIFT	3 /* Baud rate divisor shift in CR */
-#define ZYNQ_QSPI_SS_SHIFT		10 /* Slave Select field shift in CR */
+#define ZYNQ_QSPI_CONFIG_BAUD_DIV_MAX	GENMASK(2, 0) /* Baud rate maximum */
+#define ZYNQ_QSPI_CONFIG_BAUD_DIV_SHIFT	3 /* Baud rate divisor shift */
+#define ZYNQ_QSPI_CONFIG_PCS		BIT(10) /* Peripheral Chip Select */
 
 /*
  * QSPI Interrupt Registers bit Masks
@@ -99,9 +97,9 @@
  * It is named Linear Configuration but it controls other modes when not in
  * linear mode also.
  */
-#define ZYNQ_QSPI_LCFG_TWO_MEM_MASK	BIT(30) /* LQSPI Two memories Mask */
-#define ZYNQ_QSPI_LCFG_SEP_BUS_MASK	BIT(29) /* LQSPI Separate bus Mask */
-#define ZYNQ_QSPI_LCFG_U_PAGE_MASK	BIT(28) /* LQSPI Upper Page Mask */
+#define ZYNQ_QSPI_LCFG_TWO_MEM		BIT(30) /* LQSPI Two memories */
+#define ZYNQ_QSPI_LCFG_SEP_BUS		BIT(29) /* LQSPI Separate bus */
+#define ZYNQ_QSPI_LCFG_U_PAGE		BIT(28) /* LQSPI Upper Page */
 
 #define ZYNQ_QSPI_LCFG_DUMMY_SHIFT	8
 
@@ -116,11 +114,12 @@
  */
 #define ZYNQ_QSPI_MODEBITS			(SPI_CPOL | SPI_CPHA)
 
-/* Default number of chip selects */
-#define ZYNQ_QSPI_DEFAULT_NUM_CS	1
+/* Maximum number of chip selects */
+#define ZYNQ_QSPI_MAX_NUM_CS		2
 
 /**
  * struct zynq_qspi - Defines qspi driver instance
+ * @dev:		Pointer to the this device's information
  * @regs:		Virtual address of the QSPI controller registers
  * @refclk:		Pointer to the peripheral clock
  * @pclk:		Pointer to the APB clock
@@ -161,6 +160,7 @@
 /**
  * zynq_qspi_init_hw - Initialize the hardware
  * @xqspi:	Pointer to the zynq_qspi structure
+ * @num_cs:	Number of connected CS (to enable dual memories if needed)
  *
  * The default settings of the QSPI controller's configurable parameters on
  * reset are
@@ -178,7 +178,7 @@
  *	- Set the little endian mode of TX FIFO and
  *	- Enable the QSPI controller
  */
-static void zynq_qspi_init_hw(struct zynq_qspi *xqspi)
+static void zynq_qspi_init_hw(struct zynq_qspi *xqspi, unsigned int num_cs)
 {
 	u32 config_reg;
 
@@ -186,7 +186,12 @@
 	zynq_qspi_write(xqspi, ZYNQ_QSPI_IDIS_OFFSET, ZYNQ_QSPI_IXR_ALL_MASK);
 
 	/* Disable linear mode as the boot loader may have used it */
-	zynq_qspi_write(xqspi, ZYNQ_QSPI_LINEAR_CFG_OFFSET, 0);
+	config_reg = 0;
+	/* At the same time, enable dual mode if more than 1 CS is available */
+	if (num_cs > 1)
+		config_reg |= ZYNQ_QSPI_LCFG_TWO_MEM;
+
+	zynq_qspi_write(xqspi, ZYNQ_QSPI_LINEAR_CFG_OFFSET, config_reg);
 
 	/* Clear the RX FIFO */
 	while (zynq_qspi_read(xqspi, ZYNQ_QSPI_STATUS_OFFSET) &
@@ -284,28 +289,35 @@
  */
 static void zynq_qspi_chipselect(struct spi_device *spi, bool assert)
 {
-	struct spi_controller *ctrl = spi->master;
-	struct zynq_qspi *xqspi = spi_controller_get_devdata(ctrl);
+	struct spi_controller *ctlr = spi->master;
+	struct zynq_qspi *xqspi = spi_controller_get_devdata(ctlr);
 	u32 config_reg;
 
-	config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_CONFIG_OFFSET);
-	if (assert) {
-		/* Select the slave */
-		config_reg &= ~ZYNQ_QSPI_CONFIG_SSCTRL_MASK;
-		config_reg |= (((~(BIT(spi->chip_select))) <<
-				ZYNQ_QSPI_SS_SHIFT) &
-				ZYNQ_QSPI_CONFIG_SSCTRL_MASK);
-	} else {
-		config_reg |= ZYNQ_QSPI_CONFIG_SSCTRL_MASK;
+	/* Select the lower (CS0) or upper (CS1) memory */
+	if (ctlr->num_chipselect > 1) {
+		config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_LINEAR_CFG_OFFSET);
+		if (!spi->chip_select)
+			config_reg &= ~ZYNQ_QSPI_LCFG_U_PAGE;
+		else
+			config_reg |= ZYNQ_QSPI_LCFG_U_PAGE;
+
+		zynq_qspi_write(xqspi, ZYNQ_QSPI_LINEAR_CFG_OFFSET, config_reg);
 	}
 
+	/* Ground the line to assert the CS */
+	config_reg = zynq_qspi_read(xqspi, ZYNQ_QSPI_CONFIG_OFFSET);
+	if (assert)
+		config_reg &= ~ZYNQ_QSPI_CONFIG_PCS;
+	else
+		config_reg |= ZYNQ_QSPI_CONFIG_PCS;
+
 	zynq_qspi_write(xqspi, ZYNQ_QSPI_CONFIG_OFFSET, config_reg);
 }
 
 /**
  * zynq_qspi_config_op - Configure QSPI controller for specified transfer
  * @xqspi:	Pointer to the zynq_qspi structure
- * @qspi:	Pointer to the spi_device structure
+ * @spi:	Pointer to the spi_device structure
  *
  * Sets the operational mode of QSPI controller for the next QSPI transfer and
  * sets the requested clock frequency.
@@ -332,7 +344,7 @@
 	 *      ----------------
 	 *      111 - divide by 256
 	 */
-	while ((baud_rate_val < ZYNQ_QSPI_BAUD_DIV_MAX)  &&
+	while ((baud_rate_val < ZYNQ_QSPI_CONFIG_BAUD_DIV_MAX)  &&
 	       (clk_get_rate(xqspi->refclk) / (2 << baud_rate_val)) >
 		spi->max_speed_hz)
 		baud_rate_val++;
@@ -348,7 +360,7 @@
 		config_reg |= ZYNQ_QSPI_CONFIG_CPOL_MASK;
 
 	config_reg &= ~ZYNQ_QSPI_CONFIG_BDRATE_MASK;
-	config_reg |= (baud_rate_val << ZYNQ_QSPI_BAUD_DIV_SHIFT);
+	config_reg |= (baud_rate_val << ZYNQ_QSPI_CONFIG_BAUD_DIV_SHIFT);
 	zynq_qspi_write(xqspi, ZYNQ_QSPI_CONFIG_OFFSET, config_reg);
 
 	return 0;
@@ -365,10 +377,10 @@
  */
 static int zynq_qspi_setup_op(struct spi_device *spi)
 {
-	struct spi_controller *ctrl = spi->master;
-	struct zynq_qspi *qspi = spi_controller_get_devdata(ctrl);
+	struct spi_controller *ctlr = spi->master;
+	struct zynq_qspi *qspi = spi_controller_get_devdata(ctlr);
 
-	if (ctrl->busy)
+	if (ctlr->busy)
 		return -EBUSY;
 
 	clk_enable(qspi->refclk);
@@ -528,8 +540,8 @@
 		reinit_completion(&xqspi->data_completion);
 		xqspi->txbuf = (u8 *)&op->cmd.opcode;
 		xqspi->rxbuf = NULL;
-		xqspi->tx_bytes = sizeof(op->cmd.opcode);
-		xqspi->rx_bytes = sizeof(op->cmd.opcode);
+		xqspi->tx_bytes = op->cmd.nbytes;
+		xqspi->rx_bytes = op->cmd.nbytes;
 		zynq_qspi_write_op(xqspi, ZYNQ_QSPI_FIFO_DEPTH, true);
 		zynq_qspi_write(xqspi, ZYNQ_QSPI_IEN_OFFSET,
 				ZYNQ_QSPI_IXR_RXTX_MASK);
@@ -558,6 +570,9 @@
 
 	if (op->dummy.nbytes) {
 		tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL);
+		if (!tmpbuf)
+			return -ENOMEM;
+
 		memset(tmpbuf, 0xff, op->dummy.nbytes);
 		reinit_completion(&xqspi->data_completion);
 		xqspi->txbuf = tmpbuf;
@@ -663,28 +678,30 @@
 		goto clk_dis_pclk;
 	}
 
-	/* QSPI controller initializations */
-	zynq_qspi_init_hw(xqspi);
-
 	xqspi->irq = platform_get_irq(pdev, 0);
 	if (xqspi->irq <= 0) {
 		ret = -ENXIO;
-		goto remove_master;
+		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynq_qspi_irq,
 			       0, pdev->name, xqspi);
 	if (ret != 0) {
 		ret = -ENXIO;
 		dev_err(&pdev->dev, "request_irq failed\n");
-		goto remove_master;
+		goto clk_dis_all;
 	}
 
 	ret = of_property_read_u32(np, "num-cs",
 				   &num_cs);
-	if (ret < 0)
-		ctlr->num_chipselect = ZYNQ_QSPI_DEFAULT_NUM_CS;
-	else
+	if (ret < 0) {
+		ctlr->num_chipselect = 1;
+	} else if (num_cs > ZYNQ_QSPI_MAX_NUM_CS) {
+		ret = -EINVAL;
+		dev_err(&pdev->dev, "only 2 chip selects are available\n");
+		goto clk_dis_all;
+	} else {
 		ctlr->num_chipselect = num_cs;
+	}
 
 	ctlr->mode_bits =  SPI_RX_DUAL | SPI_RX_QUAD |
 			    SPI_TX_DUAL | SPI_TX_QUAD;
@@ -692,6 +709,10 @@
 	ctlr->setup = zynq_qspi_setup_op;
 	ctlr->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
 	ctlr->dev.of_node = np;
+
+	/* QSPI controller initializations */
+	zynq_qspi_init_hw(xqspi, ctlr->num_chipselect);
+
 	ret = devm_spi_register_controller(&pdev->dev, ctlr);
 	if (ret) {
 		dev_err(&pdev->dev, "spi_register_master failed\n");
diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
index 7412a30..1dd2af9 100644
--- a/drivers/spi/spi-zynqmp-gqspi.c
+++ b/drivers/spi/spi-zynqmp-gqspi.c
@@ -21,6 +21,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <linux/spi/spi-mem.h>
 
 /* Generic QSPI register offsets */
 #define GQSPI_CONFIG_OFST		0x00000100
@@ -135,7 +136,6 @@
 
 #define SPI_AUTOSUSPEND_TIMEOUT		3000
 enum mode_type {GQSPI_MODE_IO, GQSPI_MODE_DMA};
-static const struct zynqmp_eemi_ops *eemi_ops;
 
 /**
  * struct zynqmp_qspi - Defines qspi driver instance
@@ -154,8 +154,10 @@
  * @dma_addr:		DMA address after mapping the kernel buffer
  * @genfifoentry:	Used for storing the genfifoentry instruction.
  * @mode:		Defines the mode in which QSPI is operating
+ * @data_completion:	completion structure
  */
 struct zynqmp_qspi {
+	struct spi_controller *ctlr;
 	void __iomem *regs;
 	struct clk *refclk;
 	struct clk *pclk;
@@ -171,12 +173,15 @@
 	dma_addr_t dma_addr;
 	u32 genfifoentry;
 	enum mode_type mode;
+	struct completion data_completion;
+	struct mutex op_lock;
 };
 
 /**
- * zynqmp_gqspi_read:	For GQSPI controller read operation
+ * zynqmp_gqspi_read - For GQSPI controller read operation
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @offset:	Offset from where to read
+ * Return:      Value at the offset
  */
 static u32 zynqmp_gqspi_read(struct zynqmp_qspi *xqspi, u32 offset)
 {
@@ -184,7 +189,7 @@
 }
 
 /**
- * zynqmp_gqspi_write:	For GQSPI controller write operation
+ * zynqmp_gqspi_write - For GQSPI controller write operation
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @offset:	Offset where to write
  * @val:	Value to be written
@@ -196,10 +201,10 @@
 }
 
 /**
- * zynqmp_gqspi_selectslave:	For selection of slave device
+ * zynqmp_gqspi_selectslave - For selection of slave device
  * @instanceptr:	Pointer to the zynqmp_qspi structure
- * @flashcs:	For chip select
- * @flashbus:	To check which bus is selected- upper or lower
+ * @slavecs:	For chip select
+ * @slavebus:	To check which bus is selected- upper or lower
  */
 static void zynqmp_gqspi_selectslave(struct zynqmp_qspi *instanceptr,
 				     u8 slavecs, u8 slavebus)
@@ -243,7 +248,7 @@
 }
 
 /**
- * zynqmp_qspi_init_hw:	Initialize the hardware
+ * zynqmp_qspi_init_hw - Initialize the hardware
  * @xqspi:	Pointer to the zynqmp_qspi structure
  *
  * The default settings of the QSPI controller's configurable parameters on
@@ -323,15 +328,15 @@
 				 GQSPI_SELECT_FLASH_BUS_LOWER);
 	/* Initialize DMA */
 	zynqmp_gqspi_write(xqspi,
-			GQSPI_QSPIDMA_DST_CTRL_OFST,
-			GQSPI_QSPIDMA_DST_CTRL_RESET_VAL);
+			   GQSPI_QSPIDMA_DST_CTRL_OFST,
+			   GQSPI_QSPIDMA_DST_CTRL_RESET_VAL);
 
 	/* Enable the GQSPI */
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
 }
 
 /**
- * zynqmp_qspi_copy_read_data:	Copy data to RX buffer
+ * zynqmp_qspi_copy_read_data - Copy data to RX buffer
  * @xqspi:	Pointer to the zynqmp_qspi structure
  * @data:	The variable where data is stored
  * @size:	Number of bytes to be copied from data to RX buffer
@@ -345,41 +350,7 @@
 }
 
 /**
- * zynqmp_prepare_transfer_hardware:	Prepares hardware for transfer.
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
- *
- * This function enables SPI master controller.
- *
- * Return:	0 on success; error value otherwise
- */
-static int zynqmp_prepare_transfer_hardware(struct spi_master *master)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-
-	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
-	return 0;
-}
-
-/**
- * zynqmp_unprepare_transfer_hardware:	Relaxes hardware after transfer
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
- *
- * This function disables the SPI master controller.
- *
- * Return:	Always 0
- */
-static int zynqmp_unprepare_transfer_hardware(struct spi_master *master)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-
-	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
-	return 0;
-}
-
-/**
- * zynqmp_qspi_chipselect:	Select or deselect the chip select line
+ * zynqmp_qspi_chipselect - Select or deselect the chip select line
  * @qspi:	Pointer to the spi_device structure
  * @is_high:	Select(0) or deselect (1) the chip select line
  */
@@ -387,12 +358,14 @@
 {
 	struct zynqmp_qspi *xqspi = spi_master_get_devdata(qspi->master);
 	ulong timeout;
-	u32 genfifoentry = 0x0, statusreg;
+	u32 genfifoentry = 0, statusreg;
 
 	genfifoentry |= GQSPI_GENFIFO_MODE_SPI;
-	genfifoentry |= xqspi->genfifobus;
 
 	if (!is_high) {
+		xqspi->genfifobus = GQSPI_GENFIFO_BUS_LOWER;
+		xqspi->genfifocs = GQSPI_GENFIFO_CS_LOWER;
+		genfifoentry |= xqspi->genfifobus;
 		genfifoentry |= xqspi->genfifocs;
 		genfifoentry |= GQSPI_GENFIFO_CS_SETUP;
 	} else {
@@ -403,8 +376,8 @@
 
 	/* Manually start the generic FIFO command */
 	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			GQSPI_CFG_START_GEN_FIFO_MASK);
+			   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+			   GQSPI_CFG_START_GEN_FIFO_MASK);
 
 	timeout = jiffies + msecs_to_jiffies(1000);
 
@@ -413,10 +386,9 @@
 		statusreg = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
 
 		if ((statusreg & GQSPI_ISR_GENFIFOEMPTY_MASK) &&
-			(statusreg & GQSPI_ISR_TXEMPTY_MASK))
+		    (statusreg & GQSPI_ISR_TXEMPTY_MASK))
 			break;
-		else
-			cpu_relax();
+		cpu_relax();
 	} while (!time_after_eq(jiffies, timeout));
 
 	if (time_after_eq(jiffies, timeout))
@@ -424,244 +396,13 @@
 }
 
 /**
- * zynqmp_qspi_setup_transfer:	Configure QSPI controller for specified
- *				transfer
- * @qspi:	Pointer to the spi_device structure
- * @transfer:	Pointer to the spi_transfer structure which provides
- *		information about next transfer setup parameters
- *
- * Sets the operational mode of QSPI controller for the next QSPI transfer and
- * sets the requested clock frequency.
- *
- * Return:	Always 0
- *
- * Note:
- *	If the requested frequency is not an exact match with what can be
- *	obtained using the pre-scalar value, the driver sets the clock
- *	frequency which is lower than the requested frequency (maximum lower)
- *	for the transfer.
- *
- *	If the requested frequency is higher or lower than that is supported
- *	by the QSPI controller the driver will set the highest or lowest
- *	frequency supported by controller.
- */
-static int zynqmp_qspi_setup_transfer(struct spi_device *qspi,
-				      struct spi_transfer *transfer)
-{
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(qspi->master);
-	ulong clk_rate;
-	u32 config_reg, req_hz, baud_rate_val = 0;
-
-	if (transfer)
-		req_hz = transfer->speed_hz;
-	else
-		req_hz = qspi->max_speed_hz;
-
-	/* Set the clock frequency */
-	/* If req_hz == 0, default to lowest speed */
-	clk_rate = clk_get_rate(xqspi->refclk);
-
-	while ((baud_rate_val < GQSPI_BAUD_DIV_MAX) &&
-	       (clk_rate /
-		(GQSPI_BAUD_DIV_SHIFT << baud_rate_val)) > req_hz)
-		baud_rate_val++;
-
-	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-
-	/* Set the QSPI clock phase and clock polarity */
-	config_reg &= (~GQSPI_CFG_CLK_PHA_MASK) & (~GQSPI_CFG_CLK_POL_MASK);
-
-	if (qspi->mode & SPI_CPHA)
-		config_reg |= GQSPI_CFG_CLK_PHA_MASK;
-	if (qspi->mode & SPI_CPOL)
-		config_reg |= GQSPI_CFG_CLK_POL_MASK;
-
-	config_reg &= ~GQSPI_CFG_BAUD_RATE_DIV_MASK;
-	config_reg |= (baud_rate_val << GQSPI_CFG_BAUD_RATE_DIV_SHIFT);
-	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-	return 0;
-}
-
-/**
- * zynqmp_qspi_setup:	Configure the QSPI controller
- * @qspi:	Pointer to the spi_device structure
- *
- * Sets the operational mode of QSPI controller for the next QSPI transfer,
- * baud rate and divisor value to setup the requested qspi clock.
- *
- * Return:	0 on success; error value otherwise.
- */
-static int zynqmp_qspi_setup(struct spi_device *qspi)
-{
-	if (qspi->master->busy)
-		return -EBUSY;
-	return 0;
-}
-
-/**
- * zynqmp_qspi_filltxfifo:	Fills the TX FIFO as long as there is room in
- *				the FIFO or the bytes required to be
- *				transmitted.
- * @xqspi:	Pointer to the zynqmp_qspi structure
- * @size:	Number of bytes to be copied from TX buffer to TX FIFO
- */
-static void zynqmp_qspi_filltxfifo(struct zynqmp_qspi *xqspi, int size)
-{
-	u32 count = 0, intermediate;
-
-	while ((xqspi->bytes_to_transfer > 0) && (count < size)) {
-		memcpy(&intermediate, xqspi->txbuf, 4);
-		zynqmp_gqspi_write(xqspi, GQSPI_TXD_OFST, intermediate);
-
-		if (xqspi->bytes_to_transfer >= 4) {
-			xqspi->txbuf += 4;
-			xqspi->bytes_to_transfer -= 4;
-		} else {
-			xqspi->txbuf += xqspi->bytes_to_transfer;
-			xqspi->bytes_to_transfer = 0;
-		}
-		count++;
-	}
-}
-
-/**
- * zynqmp_qspi_readrxfifo:	Fills the RX FIFO as long as there is room in
- *				the FIFO.
- * @xqspi:	Pointer to the zynqmp_qspi structure
- * @size:	Number of bytes to be copied from RX buffer to RX FIFO
- */
-static void zynqmp_qspi_readrxfifo(struct zynqmp_qspi *xqspi, u32 size)
-{
-	ulong data;
-	int count = 0;
-
-	while ((count < size) && (xqspi->bytes_to_receive > 0)) {
-		if (xqspi->bytes_to_receive >= 4) {
-			(*(u32 *) xqspi->rxbuf) =
-			zynqmp_gqspi_read(xqspi, GQSPI_RXD_OFST);
-			xqspi->rxbuf += 4;
-			xqspi->bytes_to_receive -= 4;
-			count += 4;
-		} else {
-			data = zynqmp_gqspi_read(xqspi, GQSPI_RXD_OFST);
-			count += xqspi->bytes_to_receive;
-			zynqmp_qspi_copy_read_data(xqspi, data,
-						   xqspi->bytes_to_receive);
-			xqspi->bytes_to_receive = 0;
-		}
-	}
-}
-
-/**
- * zynqmp_process_dma_irq:	Handler for DMA done interrupt of QSPI
- *				controller
- * @xqspi:	zynqmp_qspi instance pointer
- *
- * This function handles DMA interrupt only.
- */
-static void zynqmp_process_dma_irq(struct zynqmp_qspi *xqspi)
-{
-	u32 config_reg, genfifoentry;
-
-	dma_unmap_single(xqspi->dev, xqspi->dma_addr,
-				xqspi->dma_rx_bytes, DMA_FROM_DEVICE);
-	xqspi->rxbuf += xqspi->dma_rx_bytes;
-	xqspi->bytes_to_receive -= xqspi->dma_rx_bytes;
-	xqspi->dma_rx_bytes = 0;
-
-	/* Disabling the DMA interrupts */
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_DIS_OFST,
-					GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
-
-	if (xqspi->bytes_to_receive > 0) {
-		/* Switch to IO mode,for remaining bytes to receive */
-		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-
-		/* Initiate the transfer of remaining bytes */
-		genfifoentry = xqspi->genfifoentry;
-		genfifoentry |= xqspi->bytes_to_receive;
-		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry);
-
-		/* Dummy generic FIFO entry */
-		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
-
-		/* Manual start */
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			(zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			GQSPI_CFG_START_GEN_FIFO_MASK));
-
-		/* Enable the RX interrupts for IO mode */
-		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-				GQSPI_IER_GENFIFOEMPTY_MASK |
-				GQSPI_IER_RXNEMPTY_MASK |
-				GQSPI_IER_RXEMPTY_MASK);
-	}
-}
-
-/**
- * zynqmp_qspi_irq:	Interrupt service routine of the QSPI controller
- * @irq:	IRQ number
- * @dev_id:	Pointer to the xqspi structure
- *
- * This function handles TX empty only.
- * On TX empty interrupt this function reads the received data from RX FIFO
- * and fills the TX FIFO if there is any data remaining to be transferred.
- *
- * Return:	IRQ_HANDLED when interrupt is handled
- *		IRQ_NONE otherwise.
- */
-static irqreturn_t zynqmp_qspi_irq(int irq, void *dev_id)
-{
-	struct spi_master *master = dev_id;
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	int ret = IRQ_NONE;
-	u32 status, mask, dma_status = 0;
-
-	status = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
-	zynqmp_gqspi_write(xqspi, GQSPI_ISR_OFST, status);
-	mask = (status & ~(zynqmp_gqspi_read(xqspi, GQSPI_IMASK_OFST)));
-
-	/* Read and clear DMA status */
-	if (xqspi->mode == GQSPI_MODE_DMA) {
-		dma_status =
-			zynqmp_gqspi_read(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST);
-		zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST,
-								dma_status);
-	}
-
-	if (mask & GQSPI_ISR_TXNOT_FULL_MASK) {
-		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TX_FIFO_FILL);
-		ret = IRQ_HANDLED;
-	}
-
-	if (dma_status & GQSPI_QSPIDMA_DST_I_STS_DONE_MASK) {
-		zynqmp_process_dma_irq(xqspi);
-		ret = IRQ_HANDLED;
-	} else if (!(mask & GQSPI_IER_RXEMPTY_MASK) &&
-			(mask & GQSPI_IER_GENFIFOEMPTY_MASK)) {
-		zynqmp_qspi_readrxfifo(xqspi, GQSPI_RX_FIFO_FILL);
-		ret = IRQ_HANDLED;
-	}
-
-	if ((xqspi->bytes_to_receive == 0) && (xqspi->bytes_to_transfer == 0)
-			&& ((status & GQSPI_IRQ_MASK) == GQSPI_IRQ_MASK)) {
-		zynqmp_gqspi_write(xqspi, GQSPI_IDR_OFST, GQSPI_ISR_IDR_MASK);
-		spi_finalize_current_transfer(master);
-		ret = IRQ_HANDLED;
-	}
-	return ret;
-}
-
-/**
- * zynqmp_qspi_selectspimode:	Selects SPI mode - x1 or x2 or x4.
+ * zynqmp_qspi_selectspimode - Selects SPI mode - x1 or x2 or x4.
  * @xqspi:	xqspi is a pointer to the GQSPI instance
  * @spimode:	spimode - SPI or DUAL or QUAD.
  * Return:	Mask to set desired SPI mode in GENFIFO entry.
  */
 static inline u32 zynqmp_qspi_selectspimode(struct zynqmp_qspi *xqspi,
-						u8 spimode)
+					    u8 spimode)
 {
 	u32 mask = 0;
 
@@ -683,142 +424,166 @@
 }
 
 /**
- * zynq_qspi_setuprxdma:	This function sets up the RX DMA operation
- * @xqspi:	xqspi is a pointer to the GQSPI instance.
- */
-static void zynq_qspi_setuprxdma(struct zynqmp_qspi *xqspi)
-{
-	u32 rx_bytes, rx_rem, config_reg;
-	dma_addr_t addr;
-	u64 dma_align =  (u64)(uintptr_t)xqspi->rxbuf;
-
-	if ((xqspi->bytes_to_receive < 8) ||
-		((dma_align & GQSPI_DMA_UNALIGN) != 0x0)) {
-		/* Setting to IO mode */
-		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-		xqspi->mode = GQSPI_MODE_IO;
-		xqspi->dma_rx_bytes = 0;
-		return;
-	}
-
-	rx_rem = xqspi->bytes_to_receive % 4;
-	rx_bytes = (xqspi->bytes_to_receive - rx_rem);
-
-	addr = dma_map_single(xqspi->dev, (void *)xqspi->rxbuf,
-						rx_bytes, DMA_FROM_DEVICE);
-	if (dma_mapping_error(xqspi->dev, addr))
-		dev_err(xqspi->dev, "ERR:rxdma:memory not mapped\n");
-
-	xqspi->dma_rx_bytes = rx_bytes;
-	xqspi->dma_addr = addr;
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_OFST,
-				(u32)(addr & 0xffffffff));
-	addr = ((addr >> 16) >> 16);
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_MSB_OFST,
-				((u32)addr) & 0xfff);
-
-	/* Enabling the DMA mode */
-	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
-	config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-	config_reg |= GQSPI_CFG_MODE_EN_DMA_MASK;
-	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
-
-	/* Switch to DMA mode */
-	xqspi->mode = GQSPI_MODE_DMA;
-
-	/* Write the number of bytes to transfer */
-	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_SIZE_OFST, rx_bytes);
-}
-
-/**
- * zynqmp_qspi_txrxsetup:	This function checks the TX/RX buffers in
- *				the transfer and sets up the GENFIFO entries,
- *				TX FIFO as required.
- * @xqspi:	xqspi is a pointer to the GQSPI instance.
- * @transfer:	It is a pointer to the structure containing transfer data.
- * @genfifoentry:	genfifoentry is pointer to the variable in which
- *			GENFIFO	mask is returned to calling function
- */
-static void zynqmp_qspi_txrxsetup(struct zynqmp_qspi *xqspi,
-				  struct spi_transfer *transfer,
-				  u32 *genfifoentry)
-{
-	u32 config_reg;
-
-	/* Transmit */
-	if ((xqspi->txbuf != NULL) && (xqspi->rxbuf == NULL)) {
-		/* Setup data to be TXed */
-		*genfifoentry &= ~GQSPI_GENFIFO_RX;
-		*genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
-		*genfifoentry |= GQSPI_GENFIFO_TX;
-		*genfifoentry |=
-			zynqmp_qspi_selectspimode(xqspi, transfer->tx_nbits);
-		xqspi->bytes_to_transfer = transfer->len;
-		if (xqspi->mode == GQSPI_MODE_DMA) {
-			config_reg = zynqmp_gqspi_read(xqspi,
-							GQSPI_CONFIG_OFST);
-			config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
-			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-								config_reg);
-			xqspi->mode = GQSPI_MODE_IO;
-		}
-		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TXD_DEPTH);
-		/* Discard RX data */
-		xqspi->bytes_to_receive = 0;
-	} else if ((xqspi->txbuf == NULL) && (xqspi->rxbuf != NULL)) {
-		/* Receive */
-
-		/* TX auto fill */
-		*genfifoentry &= ~GQSPI_GENFIFO_TX;
-		/* Setup RX */
-		*genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
-		*genfifoentry |= GQSPI_GENFIFO_RX;
-		*genfifoentry |=
-			zynqmp_qspi_selectspimode(xqspi, transfer->rx_nbits);
-		xqspi->bytes_to_transfer = 0;
-		xqspi->bytes_to_receive = transfer->len;
-		zynq_qspi_setuprxdma(xqspi);
-	}
-}
-
-/**
- * zynqmp_qspi_start_transfer:	Initiates the QSPI transfer
- * @master:	Pointer to the spi_master structure which provides
- *		information about the controller.
+ * zynqmp_qspi_config_op - Configure QSPI controller for specified
+ *				transfer
+ * @xqspi:	Pointer to the zynqmp_qspi structure
  * @qspi:	Pointer to the spi_device structure
- * @transfer:	Pointer to the spi_transfer structure which provide information
- *		about next transfer parameters
  *
- * This function fills the TX FIFO, starts the QSPI transfer, and waits for the
- * transfer to be completed.
+ * Sets the operational mode of QSPI controller for the next QSPI transfer and
+ * sets the requested clock frequency.
  *
- * Return:	Number of bytes transferred in the last transfer
+ * Return:	Always 0
+ *
+ * Note:
+ *	If the requested frequency is not an exact match with what can be
+ *	obtained using the pre-scalar value, the driver sets the clock
+ *	frequency which is lower than the requested frequency (maximum lower)
+ *	for the transfer.
+ *
+ *	If the requested frequency is higher or lower than that is supported
+ *	by the QSPI controller the driver will set the highest or lowest
+ *	frequency supported by controller.
  */
-static int zynqmp_qspi_start_transfer(struct spi_master *master,
-				      struct spi_device *qspi,
-				      struct spi_transfer *transfer)
+static int zynqmp_qspi_config_op(struct zynqmp_qspi *xqspi,
+				 struct spi_device *qspi)
 {
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	u32 genfifoentry = 0x0, transfer_len;
+	ulong clk_rate;
+	u32 config_reg, baud_rate_val = 0;
 
-	xqspi->txbuf = transfer->tx_buf;
-	xqspi->rxbuf = transfer->rx_buf;
+	/* Set the clock frequency */
+	/* If req_hz == 0, default to lowest speed */
+	clk_rate = clk_get_rate(xqspi->refclk);
 
-	zynqmp_qspi_setup_transfer(qspi, transfer);
+	while ((baud_rate_val < GQSPI_BAUD_DIV_MAX) &&
+	       (clk_rate /
+		(GQSPI_BAUD_DIV_SHIFT << baud_rate_val)) > qspi->max_speed_hz)
+		baud_rate_val++;
 
-	genfifoentry |= xqspi->genfifocs;
-	genfifoentry |= xqspi->genfifobus;
+	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
 
-	zynqmp_qspi_txrxsetup(xqspi, transfer, &genfifoentry);
+	/* Set the QSPI clock phase and clock polarity */
+	config_reg &= (~GQSPI_CFG_CLK_PHA_MASK) & (~GQSPI_CFG_CLK_POL_MASK);
 
-	if (xqspi->mode == GQSPI_MODE_DMA)
-		transfer_len = xqspi->dma_rx_bytes;
-	else
-		transfer_len = transfer->len;
+	if (qspi->mode & SPI_CPHA)
+		config_reg |= GQSPI_CFG_CLK_PHA_MASK;
+	if (qspi->mode & SPI_CPOL)
+		config_reg |= GQSPI_CFG_CLK_POL_MASK;
 
+	config_reg &= ~GQSPI_CFG_BAUD_RATE_DIV_MASK;
+	config_reg |= (baud_rate_val << GQSPI_CFG_BAUD_RATE_DIV_SHIFT);
+	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+	return 0;
+}
+
+/**
+ * zynqmp_qspi_setup_op - Configure the QSPI controller
+ * @qspi:	Pointer to the spi_device structure
+ *
+ * Sets the operational mode of QSPI controller for the next QSPI transfer,
+ * baud rate and divisor value to setup the requested qspi clock.
+ *
+ * Return:	0 on success; error value otherwise.
+ */
+static int zynqmp_qspi_setup_op(struct spi_device *qspi)
+{
+	struct spi_controller *ctlr = qspi->master;
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata(ctlr);
+
+	if (ctlr->busy)
+		return -EBUSY;
+
+	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
+
+	return 0;
+}
+
+/**
+ * zynqmp_qspi_filltxfifo - Fills the TX FIFO as long as there is room in
+ *				the FIFO or the bytes required to be
+ *				transmitted.
+ * @xqspi:	Pointer to the zynqmp_qspi structure
+ * @size:	Number of bytes to be copied from TX buffer to TX FIFO
+ */
+static void zynqmp_qspi_filltxfifo(struct zynqmp_qspi *xqspi, int size)
+{
+	u32 count = 0, intermediate;
+
+	while ((xqspi->bytes_to_transfer > 0) && (count < size) && (xqspi->txbuf)) {
+		memcpy(&intermediate, xqspi->txbuf, 4);
+		zynqmp_gqspi_write(xqspi, GQSPI_TXD_OFST, intermediate);
+
+		if (xqspi->bytes_to_transfer >= 4) {
+			xqspi->txbuf += 4;
+			xqspi->bytes_to_transfer -= 4;
+		} else {
+			xqspi->txbuf += xqspi->bytes_to_transfer;
+			xqspi->bytes_to_transfer = 0;
+		}
+		count++;
+	}
+}
+
+/**
+ * zynqmp_qspi_readrxfifo - Fills the RX FIFO as long as there is room in
+ *				the FIFO.
+ * @xqspi:	Pointer to the zynqmp_qspi structure
+ * @size:	Number of bytes to be copied from RX buffer to RX FIFO
+ */
+static void zynqmp_qspi_readrxfifo(struct zynqmp_qspi *xqspi, u32 size)
+{
+	ulong data;
+	int count = 0;
+
+	while ((count < size) && (xqspi->bytes_to_receive > 0)) {
+		if (xqspi->bytes_to_receive >= 4) {
+			(*(u32 *)xqspi->rxbuf) =
+			zynqmp_gqspi_read(xqspi, GQSPI_RXD_OFST);
+			xqspi->rxbuf += 4;
+			xqspi->bytes_to_receive -= 4;
+			count += 4;
+		} else {
+			data = zynqmp_gqspi_read(xqspi, GQSPI_RXD_OFST);
+			count += xqspi->bytes_to_receive;
+			zynqmp_qspi_copy_read_data(xqspi, data,
+						   xqspi->bytes_to_receive);
+			xqspi->bytes_to_receive = 0;
+		}
+	}
+}
+
+/**
+ * zynqmp_qspi_fillgenfifo - Fills the GENFIFO.
+ * @xqspi:	Pointer to the zynqmp_qspi structure
+ * @nbits:	Transfer/Receive buswidth.
+ * @genfifoentry:       Variable in which GENFIFO mask is saved
+ */
+static void zynqmp_qspi_fillgenfifo(struct zynqmp_qspi *xqspi, u8 nbits,
+				    u32 genfifoentry)
+{
+	u32 transfer_len = 0;
+
+	if (xqspi->txbuf) {
+		genfifoentry &= ~GQSPI_GENFIFO_RX;
+		genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
+		genfifoentry |= GQSPI_GENFIFO_TX;
+		transfer_len = xqspi->bytes_to_transfer;
+	} else if (xqspi->rxbuf) {
+		genfifoentry &= ~GQSPI_GENFIFO_TX;
+		genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
+		genfifoentry |= GQSPI_GENFIFO_RX;
+		if (xqspi->mode == GQSPI_MODE_DMA)
+			transfer_len = xqspi->dma_rx_bytes;
+		else
+			transfer_len = xqspi->bytes_to_receive;
+	} else {
+		/* Sending dummy circles here */
+		genfifoentry &= ~(GQSPI_GENFIFO_TX | GQSPI_GENFIFO_RX);
+		genfifoentry |= GQSPI_GENFIFO_DATA_XFER;
+		transfer_len = xqspi->bytes_to_transfer;
+	}
+	genfifoentry |= zynqmp_qspi_selectspimode(xqspi, nbits);
 	xqspi->genfifoentry = genfifoentry;
+
 	if ((transfer_len) < GQSPI_GENFIFO_IMM_DATA_MASK) {
 		genfifoentry &= ~GQSPI_GENFIFO_IMM_DATA_MASK;
 		genfifoentry |= transfer_len;
@@ -836,7 +601,7 @@
 			while (tempcount != 0) {
 				if (tempcount & GQSPI_GENFIFO_EXP_START) {
 					genfifoentry &=
-					    ~GQSPI_GENFIFO_IMM_DATA_MASK;
+						~GQSPI_GENFIFO_IMM_DATA_MASK;
 					genfifoentry |= exponent;
 					zynqmp_gqspi_write(xqspi,
 							   GQSPI_GEN_FIFO_OFST,
@@ -849,51 +614,223 @@
 		if (imm_data != 0) {
 			genfifoentry &= ~GQSPI_GENFIFO_EXP;
 			genfifoentry &= ~GQSPI_GENFIFO_IMM_DATA_MASK;
-			genfifoentry |= (u8) (imm_data & 0xFF);
-			zynqmp_gqspi_write(xqspi,
-					   GQSPI_GEN_FIFO_OFST, genfifoentry);
+			genfifoentry |= (u8)(imm_data & 0xFF);
+			zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST,
+					   genfifoentry);
 		}
 	}
-
-	if ((xqspi->mode == GQSPI_MODE_IO) &&
-			(xqspi->rxbuf != NULL)) {
+	if (xqspi->mode == GQSPI_MODE_IO && xqspi->rxbuf) {
 		/* Dummy generic FIFO entry */
 		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
 	}
-
-	/* Since we are using manual mode */
-	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
-			   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
-			   GQSPI_CFG_START_GEN_FIFO_MASK);
-
-	if (xqspi->txbuf != NULL)
-		/* Enable interrupts for TX */
-		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-				   GQSPI_IER_TXEMPTY_MASK |
-					GQSPI_IER_GENFIFOEMPTY_MASK |
-					GQSPI_IER_TXNOT_FULL_MASK);
-
-	if (xqspi->rxbuf != NULL) {
-		/* Enable interrupts for RX */
-		if (xqspi->mode == GQSPI_MODE_DMA) {
-			/* Enable DMA interrupts */
-			zynqmp_gqspi_write(xqspi,
-					GQSPI_QSPIDMA_DST_I_EN_OFST,
-					GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
-		} else {
-			zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
-					GQSPI_IER_GENFIFOEMPTY_MASK |
-					GQSPI_IER_RXNEMPTY_MASK |
-					GQSPI_IER_RXEMPTY_MASK);
-		}
-	}
-
-	return transfer->len;
 }
 
 /**
- * zynqmp_qspi_suspend:	Suspend method for the QSPI driver
- * @_dev:	Address of the platform_device structure
+ * zynqmp_process_dma_irq - Handler for DMA done interrupt of QSPI
+ *				controller
+ * @xqspi:	zynqmp_qspi instance pointer
+ *
+ * This function handles DMA interrupt only.
+ */
+static void zynqmp_process_dma_irq(struct zynqmp_qspi *xqspi)
+{
+	u32 config_reg, genfifoentry;
+
+	dma_unmap_single(xqspi->dev, xqspi->dma_addr,
+			 xqspi->dma_rx_bytes, DMA_FROM_DEVICE);
+	xqspi->rxbuf += xqspi->dma_rx_bytes;
+	xqspi->bytes_to_receive -= xqspi->dma_rx_bytes;
+	xqspi->dma_rx_bytes = 0;
+
+	/* Disabling the DMA interrupts */
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_DIS_OFST,
+			   GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
+
+	if (xqspi->bytes_to_receive > 0) {
+		/* Switch to IO mode,for remaining bytes to receive */
+		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+
+		/* Initiate the transfer of remaining bytes */
+		genfifoentry = xqspi->genfifoentry;
+		genfifoentry |= xqspi->bytes_to_receive;
+		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, genfifoentry);
+
+		/* Dummy generic FIFO entry */
+		zynqmp_gqspi_write(xqspi, GQSPI_GEN_FIFO_OFST, 0x0);
+
+		/* Manual start */
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   (zynqmp_gqspi_read(xqspi,
+						      GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK));
+
+		/* Enable the RX interrupts for IO mode */
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_RXNEMPTY_MASK |
+				   GQSPI_IER_RXEMPTY_MASK);
+	}
+}
+
+/**
+ * zynqmp_qspi_irq - Interrupt service routine of the QSPI controller
+ * @irq:	IRQ number
+ * @dev_id:	Pointer to the xqspi structure
+ *
+ * This function handles TX empty only.
+ * On TX empty interrupt this function reads the received data from RX FIFO
+ * and fills the TX FIFO if there is any data remaining to be transferred.
+ *
+ * Return:	IRQ_HANDLED when interrupt is handled
+ *		IRQ_NONE otherwise.
+ */
+static irqreturn_t zynqmp_qspi_irq(int irq, void *dev_id)
+{
+	struct zynqmp_qspi *xqspi = (struct zynqmp_qspi *)dev_id;
+	irqreturn_t ret = IRQ_NONE;
+	u32 status, mask, dma_status = 0;
+
+	status = zynqmp_gqspi_read(xqspi, GQSPI_ISR_OFST);
+	zynqmp_gqspi_write(xqspi, GQSPI_ISR_OFST, status);
+	mask = (status & ~(zynqmp_gqspi_read(xqspi, GQSPI_IMASK_OFST)));
+
+	/* Read and clear DMA status */
+	if (xqspi->mode == GQSPI_MODE_DMA) {
+		dma_status =
+			zynqmp_gqspi_read(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST);
+		zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_I_STS_OFST,
+				   dma_status);
+	}
+
+	if (mask & GQSPI_ISR_TXNOT_FULL_MASK) {
+		zynqmp_qspi_filltxfifo(xqspi, GQSPI_TX_FIFO_FILL);
+		ret = IRQ_HANDLED;
+	}
+
+	if (dma_status & GQSPI_QSPIDMA_DST_I_STS_DONE_MASK) {
+		zynqmp_process_dma_irq(xqspi);
+		ret = IRQ_HANDLED;
+	} else if (!(mask & GQSPI_IER_RXEMPTY_MASK) &&
+			(mask & GQSPI_IER_GENFIFOEMPTY_MASK)) {
+		zynqmp_qspi_readrxfifo(xqspi, GQSPI_RX_FIFO_FILL);
+		ret = IRQ_HANDLED;
+	}
+
+	if (xqspi->bytes_to_receive == 0 && xqspi->bytes_to_transfer == 0 &&
+	    ((status & GQSPI_IRQ_MASK) == GQSPI_IRQ_MASK)) {
+		zynqmp_gqspi_write(xqspi, GQSPI_IDR_OFST, GQSPI_ISR_IDR_MASK);
+		complete(&xqspi->data_completion);
+		ret = IRQ_HANDLED;
+	}
+	return ret;
+}
+
+/**
+ * zynqmp_qspi_setuprxdma - This function sets up the RX DMA operation
+ * @xqspi:	xqspi is a pointer to the GQSPI instance.
+ */
+static int zynqmp_qspi_setuprxdma(struct zynqmp_qspi *xqspi)
+{
+	u32 rx_bytes, rx_rem, config_reg;
+	dma_addr_t addr;
+	u64 dma_align =  (u64)(uintptr_t)xqspi->rxbuf;
+
+	if (xqspi->bytes_to_receive < 8 ||
+	    ((dma_align & GQSPI_DMA_UNALIGN) != 0x0)) {
+		/* Setting to IO mode */
+		config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+		xqspi->mode = GQSPI_MODE_IO;
+		xqspi->dma_rx_bytes = 0;
+		return 0;
+	}
+
+	rx_rem = xqspi->bytes_to_receive % 4;
+	rx_bytes = (xqspi->bytes_to_receive - rx_rem);
+
+	addr = dma_map_single(xqspi->dev, (void *)xqspi->rxbuf,
+			      rx_bytes, DMA_FROM_DEVICE);
+	if (dma_mapping_error(xqspi->dev, addr)) {
+		dev_err(xqspi->dev, "ERR:rxdma:memory not mapped\n");
+		return -ENOMEM;
+	}
+
+	xqspi->dma_rx_bytes = rx_bytes;
+	xqspi->dma_addr = addr;
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_OFST,
+			   (u32)(addr & 0xffffffff));
+	addr = ((addr >> 16) >> 16);
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_ADDR_MSB_OFST,
+			   ((u32)addr) & 0xfff);
+
+	/* Enabling the DMA mode */
+	config_reg = zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST);
+	config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+	config_reg |= GQSPI_CFG_MODE_EN_DMA_MASK;
+	zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST, config_reg);
+
+	/* Switch to DMA mode */
+	xqspi->mode = GQSPI_MODE_DMA;
+
+	/* Write the number of bytes to transfer */
+	zynqmp_gqspi_write(xqspi, GQSPI_QSPIDMA_DST_SIZE_OFST, rx_bytes);
+
+	return 0;
+}
+
+/**
+ * zynqmp_qspi_write_op - This function sets up the GENFIFO entries,
+ *			TX FIFO, and fills the TX FIFO with as many
+ *			bytes as possible.
+ * @xqspi:	Pointer to the GQSPI instance.
+ * @tx_nbits:	Transfer buswidth.
+ * @genfifoentry:	Variable in which GENFIFO mask is returned
+ *			to calling function
+ */
+static void zynqmp_qspi_write_op(struct zynqmp_qspi *xqspi, u8 tx_nbits,
+				 u32 genfifoentry)
+{
+	u32 config_reg;
+
+	zynqmp_qspi_fillgenfifo(xqspi, tx_nbits, genfifoentry);
+	zynqmp_qspi_filltxfifo(xqspi, GQSPI_TXD_DEPTH);
+	if (xqspi->mode == GQSPI_MODE_DMA) {
+		config_reg = zynqmp_gqspi_read(xqspi,
+					       GQSPI_CONFIG_OFST);
+		config_reg &= ~GQSPI_CFG_MODE_EN_MASK;
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   config_reg);
+		xqspi->mode = GQSPI_MODE_IO;
+	}
+}
+
+/**
+ * zynqmp_qspi_read_op - This function sets up the GENFIFO entries and
+ *				RX DMA operation.
+ * @xqspi:	xqspi is a pointer to the GQSPI instance.
+ * @rx_nbits:	Receive buswidth.
+ * @genfifoentry:	genfifoentry is pointer to the variable in which
+ *			GENFIFO	mask is returned to calling function
+ */
+static int zynqmp_qspi_read_op(struct zynqmp_qspi *xqspi, u8 rx_nbits,
+				u32 genfifoentry)
+{
+	int ret;
+
+	ret = zynqmp_qspi_setuprxdma(xqspi);
+	if (ret)
+		return ret;
+	zynqmp_qspi_fillgenfifo(xqspi, rx_nbits, genfifoentry);
+
+	return 0;
+}
+
+/**
+ * zynqmp_qspi_suspend - Suspend method for the QSPI driver
+ * @dev:	Address of the platform_device structure
  *
  * This function stops the QSPI driver queue and disables the QSPI controller
  *
@@ -901,17 +838,21 @@
  */
 static int __maybe_unused zynqmp_qspi_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
+	struct zynqmp_qspi *xqspi = dev_get_drvdata(dev);
+	struct spi_controller *ctlr = xqspi->ctlr;
+	int ret;
 
-	spi_master_suspend(master);
+	ret = spi_controller_suspend(ctlr);
+	if (ret)
+		return ret;
 
-	zynqmp_unprepare_transfer_hardware(master);
+	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
 
 	return 0;
 }
 
 /**
- * zynqmp_qspi_resume:	Resume method for the QSPI driver
+ * zynqmp_qspi_resume - Resume method for the QSPI driver
  * @dev:	Address of the platform_device structure
  *
  * The function starts the QSPI driver queue and initializes the QSPI
@@ -921,27 +862,13 @@
  */
 static int __maybe_unused zynqmp_qspi_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
-	int ret = 0;
+	struct zynqmp_qspi *xqspi = dev_get_drvdata(dev);
+	struct spi_controller *ctlr = xqspi->ctlr;
 
-	ret = clk_enable(xqspi->pclk);
-	if (ret) {
-		dev_err(dev, "Cannot enable APB clock.\n");
-		return ret;
-	}
+	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, GQSPI_EN_MASK);
 
-	ret = clk_enable(xqspi->refclk);
-	if (ret) {
-		dev_err(dev, "Cannot enable device clock.\n");
-		clk_disable(xqspi->pclk);
-		return ret;
-	}
+	spi_controller_resume(ctlr);
 
-	spi_master_resume(master);
-
-	clk_disable(xqspi->refclk);
-	clk_disable(xqspi->pclk);
 	return 0;
 }
 
@@ -955,11 +882,10 @@
  */
 static int __maybe_unused zynqmp_runtime_suspend(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = dev_get_drvdata(dev);
 
-	clk_disable(xqspi->refclk);
-	clk_disable(xqspi->pclk);
+	clk_disable_unprepare(xqspi->refclk);
+	clk_disable_unprepare(xqspi->pclk);
 
 	return 0;
 }
@@ -974,34 +900,190 @@
  */
 static int __maybe_unused zynqmp_runtime_resume(struct device *dev)
 {
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = dev_get_drvdata(dev);
 	int ret;
 
-	ret = clk_enable(xqspi->pclk);
+	ret = clk_prepare_enable(xqspi->pclk);
 	if (ret) {
 		dev_err(dev, "Cannot enable APB clock.\n");
 		return ret;
 	}
 
-	ret = clk_enable(xqspi->refclk);
+	ret = clk_prepare_enable(xqspi->refclk);
 	if (ret) {
 		dev_err(dev, "Cannot enable device clock.\n");
-		clk_disable(xqspi->pclk);
+		clk_disable_unprepare(xqspi->pclk);
 		return ret;
 	}
 
 	return 0;
 }
 
+/**
+ * zynqmp_qspi_exec_op() - Initiates the QSPI transfer
+ * @mem: The SPI memory
+ * @op: The memory operation to execute
+ *
+ * Executes a memory operation.
+ *
+ * This function first selects the chip and starts the memory operation.
+ *
+ * Return: 0 in case of success, a negative error code otherwise.
+ */
+static int zynqmp_qspi_exec_op(struct spi_mem *mem,
+			       const struct spi_mem_op *op)
+{
+	struct zynqmp_qspi *xqspi = spi_controller_get_devdata
+				    (mem->spi->master);
+	int err = 0, i;
+	u32 genfifoentry = 0;
+	u16 opcode = op->cmd.opcode;
+	u64 opaddr;
+
+	dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n",
+		op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+		op->dummy.buswidth, op->data.buswidth);
+
+	mutex_lock(&xqspi->op_lock);
+	zynqmp_qspi_config_op(xqspi, mem->spi);
+	zynqmp_qspi_chipselect(mem->spi, false);
+	genfifoentry |= xqspi->genfifocs;
+	genfifoentry |= xqspi->genfifobus;
+
+	if (op->cmd.opcode) {
+		reinit_completion(&xqspi->data_completion);
+		xqspi->txbuf = &opcode;
+		xqspi->rxbuf = NULL;
+		xqspi->bytes_to_transfer = op->cmd.nbytes;
+		xqspi->bytes_to_receive = 0;
+		zynqmp_qspi_write_op(xqspi, op->cmd.buswidth, genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_TXNOT_FULL_MASK);
+		if (!wait_for_completion_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000))) {
+			err = -ETIMEDOUT;
+			goto return_err;
+		}
+	}
+
+	if (op->addr.nbytes) {
+		xqspi->txbuf = &opaddr;
+		for (i = 0; i < op->addr.nbytes; i++) {
+			*(((u8 *)xqspi->txbuf) + i) = op->addr.val >>
+					(8 * (op->addr.nbytes - i - 1));
+		}
+
+		reinit_completion(&xqspi->data_completion);
+		xqspi->rxbuf = NULL;
+		xqspi->bytes_to_transfer = op->addr.nbytes;
+		xqspi->bytes_to_receive = 0;
+		zynqmp_qspi_write_op(xqspi, op->addr.buswidth, genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi,
+						     GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+		zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+				   GQSPI_IER_TXEMPTY_MASK |
+				   GQSPI_IER_GENFIFOEMPTY_MASK |
+				   GQSPI_IER_TXNOT_FULL_MASK);
+		if (!wait_for_completion_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000))) {
+			err = -ETIMEDOUT;
+			goto return_err;
+		}
+	}
+
+	if (op->dummy.nbytes) {
+		xqspi->txbuf = NULL;
+		xqspi->rxbuf = NULL;
+		/*
+		 * xqspi->bytes_to_transfer here represents the dummy circles
+		 * which need to be sent.
+		 */
+		xqspi->bytes_to_transfer = op->dummy.nbytes * 8 / op->dummy.buswidth;
+		xqspi->bytes_to_receive = 0;
+		/*
+		 * Using op->data.buswidth instead of op->dummy.buswidth here because
+		 * we need to use it to configure the correct SPI mode.
+		 */
+		zynqmp_qspi_write_op(xqspi, op->data.buswidth,
+				     genfifoentry);
+		zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+				   zynqmp_gqspi_read(xqspi, GQSPI_CONFIG_OFST) |
+				   GQSPI_CFG_START_GEN_FIFO_MASK);
+	}
+
+	if (op->data.nbytes) {
+		reinit_completion(&xqspi->data_completion);
+		if (op->data.dir == SPI_MEM_DATA_OUT) {
+			xqspi->txbuf = (u8 *)op->data.buf.out;
+			xqspi->rxbuf = NULL;
+			xqspi->bytes_to_transfer = op->data.nbytes;
+			xqspi->bytes_to_receive = 0;
+			zynqmp_qspi_write_op(xqspi, op->data.buswidth,
+					     genfifoentry);
+			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+					   zynqmp_gqspi_read
+					   (xqspi, GQSPI_CONFIG_OFST) |
+					   GQSPI_CFG_START_GEN_FIFO_MASK);
+			zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+					   GQSPI_IER_TXEMPTY_MASK |
+					   GQSPI_IER_GENFIFOEMPTY_MASK |
+					   GQSPI_IER_TXNOT_FULL_MASK);
+		} else {
+			xqspi->txbuf = NULL;
+			xqspi->rxbuf = (u8 *)op->data.buf.in;
+			xqspi->bytes_to_receive = op->data.nbytes;
+			xqspi->bytes_to_transfer = 0;
+			err = zynqmp_qspi_read_op(xqspi, op->data.buswidth,
+					    genfifoentry);
+			if (err)
+				goto return_err;
+
+			zynqmp_gqspi_write(xqspi, GQSPI_CONFIG_OFST,
+					   zynqmp_gqspi_read
+					   (xqspi, GQSPI_CONFIG_OFST) |
+					   GQSPI_CFG_START_GEN_FIFO_MASK);
+			if (xqspi->mode == GQSPI_MODE_DMA) {
+				zynqmp_gqspi_write
+					(xqspi, GQSPI_QSPIDMA_DST_I_EN_OFST,
+					 GQSPI_QSPIDMA_DST_I_EN_DONE_MASK);
+			} else {
+				zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST,
+						   GQSPI_IER_GENFIFOEMPTY_MASK |
+						   GQSPI_IER_RXNEMPTY_MASK |
+						   GQSPI_IER_RXEMPTY_MASK);
+			}
+		}
+		if (!wait_for_completion_timeout
+		    (&xqspi->data_completion, msecs_to_jiffies(1000)))
+			err = -ETIMEDOUT;
+	}
+
+return_err:
+
+	zynqmp_qspi_chipselect(mem->spi, true);
+	mutex_unlock(&xqspi->op_lock);
+
+	return err;
+}
+
 static const struct dev_pm_ops zynqmp_qspi_dev_pm_ops = {
 	SET_RUNTIME_PM_OPS(zynqmp_runtime_suspend,
 			   zynqmp_runtime_resume, NULL)
 	SET_SYSTEM_SLEEP_PM_OPS(zynqmp_qspi_suspend, zynqmp_qspi_resume)
 };
 
+static const struct spi_controller_mem_ops zynqmp_qspi_mem_ops = {
+	.exec_op = zynqmp_qspi_exec_op,
+};
+
 /**
- * zynqmp_qspi_probe:	Probe method for the QSPI driver
+ * zynqmp_qspi_probe - Probe method for the QSPI driver
  * @pdev:	Pointer to the platform_device structure
  *
  * This function initializes the driver data structures and the hardware.
@@ -1011,21 +1093,19 @@
 static int zynqmp_qspi_probe(struct platform_device *pdev)
 {
 	int ret = 0;
-	struct spi_master *master;
+	struct spi_controller *ctlr;
 	struct zynqmp_qspi *xqspi;
 	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
 
-	eemi_ops = zynqmp_pm_get_eemi_ops();
-	if (IS_ERR(eemi_ops))
-		return PTR_ERR(eemi_ops);
-
-	master = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
-	if (!master)
+	ctlr = spi_alloc_master(&pdev->dev, sizeof(*xqspi));
+	if (!ctlr)
 		return -ENOMEM;
 
-	xqspi = spi_master_get_devdata(master);
-	master->dev.of_node = pdev->dev.of_node;
-	platform_set_drvdata(pdev, master);
+	xqspi = spi_controller_get_devdata(ctlr);
+	xqspi->dev = dev;
+	xqspi->ctlr = ctlr;
+	platform_set_drvdata(pdev, xqspi);
 
 	xqspi->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(xqspi->regs)) {
@@ -1033,7 +1113,6 @@
 		goto remove_master;
 	}
 
-	xqspi->dev = dev;
 	xqspi->pclk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(xqspi->pclk)) {
 		dev_err(dev, "pclk clock not found.\n");
@@ -1041,25 +1120,29 @@
 		goto remove_master;
 	}
 
+	xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk");
+	if (IS_ERR(xqspi->refclk)) {
+		dev_err(dev, "ref_clk clock not found.\n");
+		ret = PTR_ERR(xqspi->refclk);
+		goto remove_master;
+	}
+
 	ret = clk_prepare_enable(xqspi->pclk);
 	if (ret) {
 		dev_err(dev, "Unable to enable APB clock.\n");
 		goto remove_master;
 	}
 
-	xqspi->refclk = devm_clk_get(&pdev->dev, "ref_clk");
-	if (IS_ERR(xqspi->refclk)) {
-		dev_err(dev, "ref_clk clock not found.\n");
-		ret = PTR_ERR(xqspi->refclk);
-		goto clk_dis_pclk;
-	}
-
 	ret = clk_prepare_enable(xqspi->refclk);
 	if (ret) {
 		dev_err(dev, "Unable to enable device clock.\n");
 		goto clk_dis_pclk;
 	}
 
+	init_completion(&xqspi->data_completion);
+
+	mutex_init(&xqspi->op_lock);
+
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT);
 	pm_runtime_set_active(&pdev->dev);
@@ -1075,32 +1158,29 @@
 		goto clk_dis_all;
 	}
 	ret = devm_request_irq(&pdev->dev, xqspi->irq, zynqmp_qspi_irq,
-			       0, pdev->name, master);
+			       0, pdev->name, xqspi);
 	if (ret != 0) {
 		ret = -ENXIO;
 		dev_err(dev, "request_irq failed\n");
 		goto clk_dis_all;
 	}
 
-	master->num_chipselect = GQSPI_DEFAULT_NUM_CS;
-
-	master->setup = zynqmp_qspi_setup;
-	master->set_cs = zynqmp_qspi_chipselect;
-	master->transfer_one = zynqmp_qspi_start_transfer;
-	master->prepare_transfer_hardware = zynqmp_prepare_transfer_hardware;
-	master->unprepare_transfer_hardware =
-					zynqmp_unprepare_transfer_hardware;
-	master->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
-	master->bits_per_word_mask = SPI_BPW_MASK(8);
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->num_chipselect = GQSPI_DEFAULT_NUM_CS;
+	ctlr->mem_ops = &zynqmp_qspi_mem_ops;
+	ctlr->setup = zynqmp_qspi_setup_op;
+	ctlr->max_speed_hz = clk_get_rate(xqspi->refclk) / 2;
+	ctlr->bits_per_word_mask = SPI_BPW_MASK(8);
+	ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
 			    SPI_TX_DUAL | SPI_TX_QUAD;
+	ctlr->dev.of_node = np;
 
-	if (master->dev.parent == NULL)
-		master->dev.parent = &master->dev;
-
-	ret = spi_register_master(master);
-	if (ret)
+	ret = devm_spi_register_controller(&pdev->dev, ctlr);
+	if (ret) {
+		dev_err(&pdev->dev, "spi_register_controller failed\n");
 		goto clk_dis_all;
+	}
 
 	return 0;
 
@@ -1111,13 +1191,13 @@
 clk_dis_pclk:
 	clk_disable_unprepare(xqspi->pclk);
 remove_master:
-	spi_master_put(master);
+	spi_controller_put(ctlr);
 
 	return ret;
 }
 
 /**
- * zynqmp_qspi_remove:	Remove method for the QSPI driver
+ * zynqmp_qspi_remove - Remove method for the QSPI driver
  * @pdev:	Pointer to the platform_device structure
  *
  * This function is called if a device is physically removed from the system or
@@ -1128,8 +1208,7 @@
  */
 static int zynqmp_qspi_remove(struct platform_device *pdev)
 {
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct zynqmp_qspi *xqspi = spi_master_get_devdata(master);
+	struct zynqmp_qspi *xqspi = platform_get_drvdata(pdev);
 
 	zynqmp_gqspi_write(xqspi, GQSPI_EN_OFST, 0x0);
 	clk_disable_unprepare(xqspi->refclk);
@@ -1137,8 +1216,6 @@
 	pm_runtime_set_suspended(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	spi_unregister_master(master);
-
 	return 0;
 }
 
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ac05c9c..8c261ea 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -88,7 +88,7 @@
 	if (len) {
 		spi->driver_override = driver_override;
 	} else {
-		/* Emptry string, disable driver override */
+		/* Empty string, disable driver override */
 		spi->driver_override = NULL;
 		kfree(driver_override);
 	}
@@ -466,7 +466,7 @@
 static LIST_HEAD(spi_controller_list);
 
 /*
- * Used to protect add/del opertion for board_info list and
+ * Used to protect add/del operation for board_info list and
  * spi_controller list, and their matching process
  * also used to protect object of type struct idr
  */
@@ -513,6 +513,7 @@
 	spi->dev.bus = &spi_bus_type;
 	spi->dev.release = spidev_release;
 	spi->cs_gpio = -ENOENT;
+	spi->mode = ctlr->buswidth_override_bits;
 
 	spin_lock_init(&spi->statistics.lock);
 
@@ -792,25 +793,56 @@
 
 /*-------------------------------------------------------------------------*/
 
-static void spi_set_cs(struct spi_device *spi, bool enable)
+static void spi_set_cs(struct spi_device *spi, bool enable, bool force)
 {
+	bool enable1 = enable;
+
+	/*
+	 * Avoid calling into the driver (or doing delays) if the chip select
+	 * isn't actually changing from the last time this was called.
+	 */
+	if (!force && (spi->controller->last_cs_enable == enable) &&
+	    (spi->controller->last_cs_mode_high == (spi->mode & SPI_CS_HIGH)))
+		return;
+
+	spi->controller->last_cs_enable = enable;
+	spi->controller->last_cs_mode_high = spi->mode & SPI_CS_HIGH;
+
+	if (!spi->controller->set_cs_timing) {
+		if (enable1)
+			spi_delay_exec(&spi->controller->cs_setup, NULL);
+		else
+			spi_delay_exec(&spi->controller->cs_hold, NULL);
+	}
+
 	if (spi->mode & SPI_CS_HIGH)
 		enable = !enable;
 
 	if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) {
-		/*
-		 * Honour the SPI_NO_CS flag and invert the enable line, as
-		 * active low is default for SPI. Execution paths that handle
-		 * polarity inversion in gpiolib (such as device tree) will
-		 * enforce active high using the SPI_CS_HIGH resulting in a
-		 * double inversion through the code above.
-		 */
 		if (!(spi->mode & SPI_NO_CS)) {
-			if (spi->cs_gpiod)
-				gpiod_set_value_cansleep(spi->cs_gpiod,
-							 !enable);
-			else
+			if (spi->cs_gpiod) {
+				/*
+				 * Historically ACPI has no means of the GPIO polarity and
+				 * thus the SPISerialBus() resource defines it on the per-chip
+				 * basis. In order to avoid a chain of negations, the GPIO
+				 * polarity is considered being Active High. Even for the cases
+				 * when _DSD() is involved (in the updated versions of ACPI)
+				 * the GPIO CS polarity must be defined Active High to avoid
+				 * ambiguity. That's why we use enable, that takes SPI_CS_HIGH
+				 * into account.
+				 */
+				if (has_acpi_companion(&spi->dev))
+					gpiod_set_value_cansleep(spi->cs_gpiod, !enable);
+				else
+					/* Polarity handled by GPIO library */
+					gpiod_set_value_cansleep(spi->cs_gpiod, enable1);
+			} else {
+				/*
+				 * invert the enable line, as active low is
+				 * default for SPI.
+				 */
 				gpio_set_value_cansleep(spi->cs_gpio, !enable);
+			}
 		}
 		/* Some SPI masters need both GPIO CS & slave_select */
 		if ((spi->controller->flags & SPI_MASTER_GPIO_SS) &&
@@ -819,6 +851,11 @@
 	} else if (spi->controller->set_cs) {
 		spi->controller->set_cs(spi, !enable);
 	}
+
+	if (!spi->controller->set_cs_timing) {
+		if (!enable1)
+			spi_delay_exec(&spi->controller->cs_inactive, NULL);
+	}
 }
 
 #ifdef CONFIG_HAS_DMA
@@ -986,6 +1023,8 @@
 		spi_unmap_buf(ctlr, tx_dev, &xfer->tx_sg, DMA_TO_DEVICE);
 	}
 
+	ctlr->cur_msg_mapped = false;
+
 	return 0;
 }
 #else /* !CONFIG_HAS_DMA */
@@ -1027,7 +1066,8 @@
 	void *tmp;
 	unsigned int max_tx, max_rx;
 
-	if (ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX)) {
+	if ((ctlr->flags & (SPI_CONTROLLER_MUST_RX | SPI_CONTROLLER_MUST_TX))
+		&& !(msg->spi->mode & SPI_3WIRE)) {
 		max_tx = 0;
 		max_rx = 0;
 
@@ -1079,7 +1119,8 @@
 {
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
-	unsigned long long ms = 1;
+	u32 speed_hz = xfer->speed_hz;
+	unsigned long long ms;
 
 	if (spi_controller_is_slave(ctlr)) {
 		if (wait_for_completion_interruptible(&ctlr->xfer_completion)) {
@@ -1087,8 +1128,11 @@
 			return -EINTR;
 		}
 	} else {
+		if (!speed_hz)
+			speed_hz = 100000;
+
 		ms = 8LL * 1000LL * xfer->len;
-		do_div(ms, xfer->speed_hz);
+		do_div(ms, speed_hz);
 		ms += ms + 200; /* some tolerance */
 
 		if (ms > UINT_MAX)
@@ -1125,42 +1169,81 @@
 	}
 }
 
-static void _spi_transfer_cs_change_delay(struct spi_message *msg,
-					  struct spi_transfer *xfer)
+int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer)
 {
-	u32 delay = xfer->cs_change_delay;
-	u32 unit = xfer->cs_change_delay_unit;
+	u32 delay = _delay->value;
+	u32 unit = _delay->unit;
 	u32 hz;
 
-	/* return early on "fast" mode - for everything but USECS */
-	if (!delay && unit != SPI_DELAY_UNIT_USECS)
-		return;
+	if (!delay)
+		return 0;
 
 	switch (unit) {
 	case SPI_DELAY_UNIT_USECS:
-		/* for compatibility use default of 10us */
-		if (!delay)
-			delay = 10000;
-		else
-			delay *= 1000;
+		delay *= 1000;
 		break;
 	case SPI_DELAY_UNIT_NSECS: /* nothing to do here */
 		break;
 	case SPI_DELAY_UNIT_SCK:
+		/* clock cycles need to be obtained from spi_transfer */
+		if (!xfer)
+			return -EINVAL;
 		/* if there is no effective speed know, then approximate
 		 * by underestimating with half the requested hz
 		 */
 		hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2;
+		if (!hz)
+			return -EINVAL;
 		delay *= DIV_ROUND_UP(1000000000, hz);
 		break;
 	default:
+		return -EINVAL;
+	}
+
+	return delay;
+}
+EXPORT_SYMBOL_GPL(spi_delay_to_ns);
+
+int spi_delay_exec(struct spi_delay *_delay, struct spi_transfer *xfer)
+{
+	int delay;
+
+	might_sleep();
+
+	if (!_delay)
+		return -EINVAL;
+
+	delay = spi_delay_to_ns(_delay, xfer);
+	if (delay < 0)
+		return delay;
+
+	_spi_transfer_delay_ns(delay);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(spi_delay_exec);
+
+static void _spi_transfer_cs_change_delay(struct spi_message *msg,
+					  struct spi_transfer *xfer)
+{
+	u32 delay = xfer->cs_change_delay.value;
+	u32 unit = xfer->cs_change_delay.unit;
+	int ret;
+
+	/* return early on "fast" mode - for everything but USECS */
+	if (!delay) {
+		if (unit == SPI_DELAY_UNIT_USECS)
+			_spi_transfer_delay_ns(10000);
+		return;
+	}
+
+	ret = spi_delay_exec(&xfer->cs_change_delay, xfer);
+	if (ret) {
 		dev_err_once(&msg->spi->dev,
 			     "Use of unsupported delay unit %i, using default of 10us\n",
-			     xfer->cs_change_delay_unit);
-		delay = 10000;
+			     unit);
+		_spi_transfer_delay_ns(10000);
 	}
-	/* now sleep for the requested amount of time */
-	_spi_transfer_delay_ns(delay);
 }
 
 /*
@@ -1179,7 +1262,7 @@
 	struct spi_statistics *statm = &ctlr->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
 
-	spi_set_cs(msg->spi, true);
+	spi_set_cs(msg->spi, true, false);
 
 	SPI_STATISTICS_INCREMENT_FIELD(statm, messages);
 	SPI_STATISTICS_INCREMENT_FIELD(stats, messages);
@@ -1190,11 +1273,25 @@
 		spi_statistics_add_transfer_stats(statm, xfer, ctlr);
 		spi_statistics_add_transfer_stats(stats, xfer, ctlr);
 
-		if (xfer->tx_buf || xfer->rx_buf) {
+		if (!ctlr->ptp_sts_supported) {
+			xfer->ptp_sts_word_pre = 0;
+			ptp_read_system_prets(xfer->ptp_sts);
+		}
+
+		if ((xfer->tx_buf || xfer->rx_buf) && xfer->len) {
 			reinit_completion(&ctlr->xfer_completion);
 
+fallback_pio:
 			ret = ctlr->transfer_one(ctlr, msg->spi, xfer);
 			if (ret < 0) {
+				if (ctlr->cur_msg_mapped &&
+				   (xfer->error & SPI_TRANS_FAIL_NO_START)) {
+					__spi_unmap_msg(ctlr, msg);
+					ctlr->fallback = true;
+					xfer->error &= ~SPI_TRANS_FAIL_NO_START;
+					goto fallback_pio;
+				}
+
 				SPI_STATISTICS_INCREMENT_FIELD(statm,
 							       errors);
 				SPI_STATISTICS_INCREMENT_FIELD(stats,
@@ -1216,22 +1313,26 @@
 					xfer->len);
 		}
 
+		if (!ctlr->ptp_sts_supported) {
+			ptp_read_system_postts(xfer->ptp_sts);
+			xfer->ptp_sts_word_post = xfer->len;
+		}
+
 		trace_spi_transfer_stop(msg, xfer);
 
 		if (msg->status != -EINPROGRESS)
 			goto out;
 
-		if (xfer->delay_usecs)
-			_spi_transfer_delay_ns(xfer->delay_usecs * 1000);
+		spi_transfer_delay_exec(xfer);
 
 		if (xfer->cs_change) {
 			if (list_is_last(&xfer->transfer_list,
 					 &msg->transfers)) {
 				keep_cs = true;
 			} else {
-				spi_set_cs(msg->spi, false);
+				spi_set_cs(msg->spi, false, false);
 				_spi_transfer_cs_change_delay(msg, xfer);
-				spi_set_cs(msg->spi, true);
+				spi_set_cs(msg->spi, true, false);
 			}
 		}
 
@@ -1240,7 +1341,7 @@
 
 out:
 	if (ret != 0 || !keep_cs)
-		spi_set_cs(msg->spi, false);
+		spi_set_cs(msg->spi, false, false);
 
 	if (msg->status == -EINPROGRESS)
 		msg->status = ret;
@@ -1267,6 +1368,14 @@
 }
 EXPORT_SYMBOL_GPL(spi_finalize_current_transfer);
 
+static void spi_idle_runtime_pm(struct spi_controller *ctlr)
+{
+	if (ctlr->auto_runtime_pm) {
+		pm_runtime_mark_last_busy(ctlr->dev.parent);
+		pm_runtime_put_autosuspend(ctlr->dev.parent);
+	}
+}
+
 /**
  * __spi_pump_messages - function which processes spi message queue
  * @ctlr: controller to process queue for
@@ -1282,6 +1391,7 @@
  */
 static void __spi_pump_messages(struct spi_controller *ctlr, bool in_kthread)
 {
+	struct spi_transfer *xfer;
 	struct spi_message *msg;
 	bool was_busy = false;
 	unsigned long flags;
@@ -1298,7 +1408,7 @@
 
 	/* If another context is idling the device then defer */
 	if (ctlr->idling) {
-		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+		kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
 		spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 		return;
 	}
@@ -1310,10 +1420,17 @@
 			return;
 		}
 
-		/* Only do teardown in the thread */
+		/* Defer any non-atomic teardown to the thread */
 		if (!in_kthread) {
-			kthread_queue_work(&ctlr->kworker,
-					   &ctlr->pump_messages);
+			if (!ctlr->dummy_rx && !ctlr->dummy_tx &&
+			    !ctlr->unprepare_transfer_hardware) {
+				spi_idle_runtime_pm(ctlr);
+				ctlr->busy = false;
+				trace_spi_controller_idle(ctlr);
+			} else {
+				kthread_queue_work(ctlr->kworker,
+						   &ctlr->pump_messages);
+			}
 			spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 			return;
 		}
@@ -1330,10 +1447,7 @@
 		    ctlr->unprepare_transfer_hardware(ctlr))
 			dev_err(&ctlr->dev,
 				"failed to unprepare transfer hardware\n");
-		if (ctlr->auto_runtime_pm) {
-			pm_runtime_mark_last_busy(ctlr->dev.parent);
-			pm_runtime_put_autosuspend(ctlr->dev.parent);
-		}
+		spi_idle_runtime_pm(ctlr);
 		trace_spi_controller_idle(ctlr);
 
 		spin_lock_irqsave(&ctlr->queue_lock, flags);
@@ -1408,6 +1522,13 @@
 		goto out;
 	}
 
+	if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
+		list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+			xfer->ptp_sts_word_pre = 0;
+			ptp_read_system_prets(xfer->ptp_sts);
+		}
+	}
+
 	ret = ctlr->transfer_one_message(ctlr, msg);
 	if (ret) {
 		dev_err(&ctlr->dev,
@@ -1436,6 +1557,91 @@
 }
 
 /**
+ * spi_take_timestamp_pre - helper for drivers to collect the beginning of the
+ *			    TX timestamp for the requested byte from the SPI
+ *			    transfer. The frequency with which this function
+ *			    must be called (once per word, once for the whole
+ *			    transfer, once per batch of words etc) is arbitrary
+ *			    as long as the @tx buffer offset is greater than or
+ *			    equal to the requested byte at the time of the
+ *			    call. The timestamp is only taken once, at the
+ *			    first such call. It is assumed that the driver
+ *			    advances its @tx buffer pointer monotonically.
+ * @ctlr: Pointer to the spi_controller structure of the driver
+ * @xfer: Pointer to the transfer being timestamped
+ * @progress: How many words (not bytes) have been transferred so far
+ * @irqs_off: If true, will disable IRQs and preemption for the duration of the
+ *	      transfer, for less jitter in time measurement. Only compatible
+ *	      with PIO drivers. If true, must follow up with
+ *	      spi_take_timestamp_post or otherwise system will crash.
+ *	      WARNING: for fully predictable results, the CPU frequency must
+ *	      also be under control (governor).
+ */
+void spi_take_timestamp_pre(struct spi_controller *ctlr,
+			    struct spi_transfer *xfer,
+			    size_t progress, bool irqs_off)
+{
+	if (!xfer->ptp_sts)
+		return;
+
+	if (xfer->timestamped)
+		return;
+
+	if (progress > xfer->ptp_sts_word_pre)
+		return;
+
+	/* Capture the resolution of the timestamp */
+	xfer->ptp_sts_word_pre = progress;
+
+	if (irqs_off) {
+		local_irq_save(ctlr->irq_flags);
+		preempt_disable();
+	}
+
+	ptp_read_system_prets(xfer->ptp_sts);
+}
+EXPORT_SYMBOL_GPL(spi_take_timestamp_pre);
+
+/**
+ * spi_take_timestamp_post - helper for drivers to collect the end of the
+ *			     TX timestamp for the requested byte from the SPI
+ *			     transfer. Can be called with an arbitrary
+ *			     frequency: only the first call where @tx exceeds
+ *			     or is equal to the requested word will be
+ *			     timestamped.
+ * @ctlr: Pointer to the spi_controller structure of the driver
+ * @xfer: Pointer to the transfer being timestamped
+ * @progress: How many words (not bytes) have been transferred so far
+ * @irqs_off: If true, will re-enable IRQs and preemption for the local CPU.
+ */
+void spi_take_timestamp_post(struct spi_controller *ctlr,
+			     struct spi_transfer *xfer,
+			     size_t progress, bool irqs_off)
+{
+	if (!xfer->ptp_sts)
+		return;
+
+	if (xfer->timestamped)
+		return;
+
+	if (progress < xfer->ptp_sts_word_post)
+		return;
+
+	ptp_read_system_postts(xfer->ptp_sts);
+
+	if (irqs_off) {
+		local_irq_restore(ctlr->irq_flags);
+		preempt_enable();
+	}
+
+	/* Capture the resolution of the timestamp */
+	xfer->ptp_sts_word_post = progress;
+
+	xfer->timestamped = true;
+}
+EXPORT_SYMBOL_GPL(spi_take_timestamp_post);
+
+/**
  * spi_set_thread_rt - set the controller to pump at realtime priority
  * @ctlr: controller to boost priority of
  *
@@ -1452,11 +1658,9 @@
  */
 static void spi_set_thread_rt(struct spi_controller *ctlr)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 };
-
 	dev_info(&ctlr->dev,
 		"will run message pump with realtime priority\n");
-	sched_setscheduler(ctlr->kworker_task, SCHED_FIFO, &param);
+	sched_set_fifo(ctlr->kworker->task);
 }
 
 static int spi_init_queue(struct spi_controller *ctlr)
@@ -1464,13 +1668,12 @@
 	ctlr->running = false;
 	ctlr->busy = false;
 
-	kthread_init_worker(&ctlr->kworker);
-	ctlr->kworker_task = kthread_run(kthread_worker_fn, &ctlr->kworker,
-					 "%s", dev_name(&ctlr->dev));
-	if (IS_ERR(ctlr->kworker_task)) {
-		dev_err(&ctlr->dev, "failed to create message pump task\n");
-		return PTR_ERR(ctlr->kworker_task);
+	ctlr->kworker = kthread_create_worker(0, dev_name(&ctlr->dev));
+	if (IS_ERR(ctlr->kworker)) {
+		dev_err(&ctlr->dev, "failed to create message pump kworker\n");
+		return PTR_ERR(ctlr->kworker);
 	}
+
 	kthread_init_work(&ctlr->pump_messages, spi_pump_messages);
 
 	/*
@@ -1520,6 +1723,7 @@
  */
 void spi_finalize_current_message(struct spi_controller *ctlr)
 {
+	struct spi_transfer *xfer;
 	struct spi_message *mesg;
 	unsigned long flags;
 	int ret;
@@ -1528,6 +1732,17 @@
 	mesg = ctlr->cur_msg;
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
+	if (!ctlr->ptp_sts_supported && !ctlr->transfer_one) {
+		list_for_each_entry(xfer, &mesg->transfers, transfer_list) {
+			ptp_read_system_postts(xfer->ptp_sts);
+			xfer->ptp_sts_word_post = xfer->len;
+		}
+	}
+
+	if (unlikely(ctlr->ptp_sts_supported))
+		list_for_each_entry(xfer, &mesg->transfers, transfer_list)
+			WARN_ON_ONCE(xfer->ptp_sts && !xfer->timestamped);
+
 	spi_unmap_msg(ctlr, mesg);
 
 	/* In the prepare_messages callback the spi bus has the opportunity to
@@ -1548,7 +1763,8 @@
 	spin_lock_irqsave(&ctlr->queue_lock, flags);
 	ctlr->cur_msg = NULL;
 	ctlr->cur_msg_prepared = false;
-	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+	ctlr->fallback = false;
+	kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
 	trace_spi_message_done(mesg);
@@ -1574,7 +1790,7 @@
 	ctlr->cur_msg = NULL;
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 
-	kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+	kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
 
 	return 0;
 }
@@ -1630,8 +1846,7 @@
 		return ret;
 	}
 
-	kthread_flush_worker(&ctlr->kworker);
-	kthread_stop(ctlr->kworker_task);
+	kthread_destroy_worker(ctlr->kworker);
 
 	return 0;
 }
@@ -1654,7 +1869,7 @@
 
 	list_add_tail(&msg->queue, &ctlr->queue);
 	if (!ctlr->busy && need_pump)
-		kthread_queue_work(&ctlr->kworker, &ctlr->pump_messages);
+		kthread_queue_work(ctlr->kworker, &ctlr->pump_messages);
 
 	spin_unlock_irqrestore(&ctlr->queue_lock, flags);
 	return 0;
@@ -1799,23 +2014,9 @@
 	}
 	spi->chip_select = value;
 
-	/*
-	 * For descriptors associated with the device, polarity inversion is
-	 * handled in the gpiolib, so all gpio chip selects are "active high"
-	 * in the logical sense, the gpiolib will invert the line if need be.
-	 */
-	if ((ctlr->use_gpio_descriptors) && ctlr->cs_gpiods &&
-	    ctlr->cs_gpiods[spi->chip_select])
-		spi->mode |= SPI_CS_HIGH;
-
 	/* Device speed */
-	rc = of_property_read_u32(nc, "spi-max-frequency", &value);
-	if (rc) {
-		dev_err(&ctlr->dev,
-			"%pOF has no valid 'spi-max-frequency' property (%d)\n", nc, rc);
-		return rc;
-	}
-	spi->max_speed_hz = value;
+	if (!of_property_read_u32(nc, "spi-max-frequency", &value))
+		spi->max_speed_hz = value;
 
 	return 0;
 }
@@ -2037,9 +2238,10 @@
 		return AE_NO_MEMORY;
 	}
 
+
 	ACPI_COMPANION_SET(&spi->dev, adev);
 	spi->max_speed_hz	= lookup.max_speed_hz;
-	spi->mode		= lookup.mode;
+	spi->mode		|= lookup.mode;
 	spi->irq		= lookup.irq;
 	spi->bits_per_word	= lookup.bits_per_word;
 	spi->chip_select	= lookup.chip_select;
@@ -2358,6 +2560,8 @@
 	int nb, i;
 	struct gpio_desc **cs;
 	struct device *dev = &ctlr->dev;
+	unsigned long native_cs_mask = 0;
+	unsigned int num_cs_gpios = 0;
 
 	nb = gpiod_count(dev, "cs");
 	ctlr->num_chipselect = max_t(int, nb, ctlr->num_chipselect);
@@ -2399,7 +2603,23 @@
 			if (!gpioname)
 				return -ENOMEM;
 			gpiod_set_consumer_name(cs[i], gpioname);
+			num_cs_gpios++;
+			continue;
 		}
+
+		if (ctlr->max_native_cs && i >= ctlr->max_native_cs) {
+			dev_err(dev, "Invalid native chip select %d\n", i);
+			return -EINVAL;
+		}
+		native_cs_mask |= BIT(i);
+	}
+
+	ctlr->unused_native_cs = ffs(~native_cs_mask) - 1;
+
+	if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios &&
+	    ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) {
+		dev_err(dev, "No unused native chip select available\n");
+		return -EINVAL;
 	}
 
 	return 0;
@@ -2955,10 +3175,11 @@
 		/* add to list */
 		list_add(&xfer->transfer_list, rxfer->replaced_after);
 
-		/* clear cs_change and delay_usecs for all but the last */
+		/* clear cs_change and delay for all but the last */
 		if (i) {
 			xfer->cs_change = false;
 			xfer->delay_usecs = 0;
+			xfer->delay.value = 0;
 		}
 	}
 
@@ -3172,10 +3393,37 @@
 	if (!spi->max_speed_hz)
 		spi->max_speed_hz = spi->controller->max_speed_hz;
 
+	mutex_lock(&spi->controller->io_mutex);
+
 	if (spi->controller->setup)
 		status = spi->controller->setup(spi);
 
-	spi_set_cs(spi, false);
+	if (spi->controller->auto_runtime_pm && spi->controller->set_cs) {
+		status = pm_runtime_get_sync(spi->controller->dev.parent);
+		if (status < 0) {
+			mutex_unlock(&spi->controller->io_mutex);
+			pm_runtime_put_noidle(spi->controller->dev.parent);
+			dev_err(&spi->controller->dev, "Failed to power device: %d\n",
+				status);
+			return status;
+		}
+
+		/*
+		 * We do not want to return positive value from pm_runtime_get,
+		 * there are many instances of devices calling spi_setup() and
+		 * checking for a non-zero return value instead of a negative
+		 * return value.
+		 */
+		status = 0;
+
+		spi_set_cs(spi, false, true);
+		pm_runtime_mark_last_busy(spi->controller->dev.parent);
+		pm_runtime_put_autosuspend(spi->controller->dev.parent);
+	} else {
+		spi_set_cs(spi, false, true);
+	}
+
+	mutex_unlock(&spi->controller->io_mutex);
 
 	if (spi->rt && !spi->controller->rt) {
 		spi->controller->rt = true;
@@ -3198,18 +3446,71 @@
 /**
  * spi_set_cs_timing - configure CS setup, hold, and inactive delays
  * @spi: the device that requires specific CS timing configuration
- * @setup: CS setup time in terms of clock count
- * @hold: CS hold time in terms of clock count
- * @inactive_dly: CS inactive delay between transfers in terms of clock count
+ * @setup: CS setup time specified via @spi_delay
+ * @hold: CS hold time specified via @spi_delay
+ * @inactive: CS inactive delay between transfers specified via @spi_delay
+ *
+ * Return: zero on success, else a negative error code.
  */
-void spi_set_cs_timing(struct spi_device *spi, u8 setup, u8 hold,
-		       u8 inactive_dly)
+int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup,
+		      struct spi_delay *hold, struct spi_delay *inactive)
 {
+	size_t len;
+
 	if (spi->controller->set_cs_timing)
-		spi->controller->set_cs_timing(spi, setup, hold, inactive_dly);
+		return spi->controller->set_cs_timing(spi, setup, hold,
+						      inactive);
+
+	if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) ||
+	    (hold && hold->unit == SPI_DELAY_UNIT_SCK) ||
+	    (inactive && inactive->unit == SPI_DELAY_UNIT_SCK)) {
+		dev_err(&spi->dev,
+			"Clock-cycle delays for CS not supported in SW mode\n");
+		return -ENOTSUPP;
+	}
+
+	len = sizeof(struct spi_delay);
+
+	/* copy delays to controller */
+	if (setup)
+		memcpy(&spi->controller->cs_setup, setup, len);
+	else
+		memset(&spi->controller->cs_setup, 0, len);
+
+	if (hold)
+		memcpy(&spi->controller->cs_hold, hold, len);
+	else
+		memset(&spi->controller->cs_hold, 0, len);
+
+	if (inactive)
+		memcpy(&spi->controller->cs_inactive, inactive, len);
+	else
+		memset(&spi->controller->cs_inactive, 0, len);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(spi_set_cs_timing);
 
+static int _spi_xfer_word_delay_update(struct spi_transfer *xfer,
+				       struct spi_device *spi)
+{
+	int delay1, delay2;
+
+	delay1 = spi_delay_to_ns(&xfer->word_delay, xfer);
+	if (delay1 < 0)
+		return delay1;
+
+	delay2 = spi_delay_to_ns(&spi->word_delay, xfer);
+	if (delay2 < 0)
+		return delay2;
+
+	if (delay1 < delay2)
+		memcpy(&xfer->word_delay, &spi->word_delay,
+		       sizeof(xfer->word_delay));
+
+	return 0;
+}
+
 static int __spi_validate(struct spi_device *spi, struct spi_message *message)
 {
 	struct spi_controller *ctlr = spi->controller;
@@ -3345,8 +3646,8 @@
 				return -EINVAL;
 		}
 
-		if (xfer->word_delay_usecs < spi->word_delay_usecs)
-			xfer->word_delay_usecs = spi->word_delay_usecs;
+		if (_spi_xfer_word_delay_update(xfer, spi))
+			return -EINVAL;
 	}
 
 	message->status = -EINPROGRESS;
@@ -3357,6 +3658,7 @@
 static int __spi_async(struct spi_device *spi, struct spi_message *message)
 {
 	struct spi_controller *ctlr = spi->controller;
+	struct spi_transfer *xfer;
 
 	/*
 	 * Some controllers do not support doing regular SPI transfers. Return
@@ -3372,6 +3674,13 @@
 
 	trace_spi_message_submit(message);
 
+	if (!ctlr->ptp_sts_supported) {
+		list_for_each_entry(xfer, &message->transfers, transfer_list) {
+			xfer->ptp_sts_word_pre = 0;
+			ptp_read_system_prets(xfer->ptp_sts);
+		}
+	}
+
 	return ctlr->transfer(spi, message);
 }
 
@@ -3675,8 +3984,7 @@
  * is zero for success, else a negative errno status code.
  * This call may only be used from a context that may sleep.
  *
- * Parameters to this routine are always copied using a small buffer;
- * portable code should never use this for more than 32 bytes.
+ * Parameters to this routine are always copied using a small buffer.
  * Performance-sensitive or bulk transfer code should instead use
  * spi_{async,sync}() calls with dma-safe buffers.
  *
@@ -3848,7 +4156,7 @@
 	struct device *dev;
 
 	dev = bus_find_device_by_acpi_dev(&spi_bus_type, adev);
-	return dev ? to_spi_device(dev) : NULL;
+	return to_spi_device(dev);
 }
 
 static int acpi_spi_notify(struct notifier_block *nb, unsigned long value,
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index be503a0..859910e 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -62,7 +62,8 @@
 #define SPI_MODE_MASK		(SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \
 				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
 				| SPI_NO_CS | SPI_READY | SPI_TX_DUAL \
-				| SPI_TX_QUAD | SPI_RX_DUAL | SPI_RX_QUAD)
+				| SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL \
+				| SPI_RX_QUAD | SPI_RX_OCTAL)
 
 struct spidev_data {
 	dev_t			devt;
@@ -145,7 +146,7 @@
 spidev_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos)
 {
 	struct spidev_data	*spidev;
-	ssize_t			status = 0;
+	ssize_t			status;
 
 	/* chipselect only toggles at start or end of operation */
 	if (count > bufsiz)
@@ -175,7 +176,7 @@
 		size_t count, loff_t *f_pos)
 {
 	struct spidev_data	*spidev;
-	ssize_t			status = 0;
+	ssize_t			status;
 	unsigned long		missing;
 
 	/* chipselect only toggles at start or end of operation */
@@ -270,22 +271,24 @@
 		k_tmp->tx_nbits = u_tmp->tx_nbits;
 		k_tmp->rx_nbits = u_tmp->rx_nbits;
 		k_tmp->bits_per_word = u_tmp->bits_per_word;
-		k_tmp->delay_usecs = u_tmp->delay_usecs;
+		k_tmp->delay.value = u_tmp->delay_usecs;
+		k_tmp->delay.unit = SPI_DELAY_UNIT_USECS;
 		k_tmp->speed_hz = u_tmp->speed_hz;
-		k_tmp->word_delay_usecs = u_tmp->word_delay_usecs;
+		k_tmp->word_delay.value = u_tmp->word_delay_usecs;
+		k_tmp->word_delay.unit = SPI_DELAY_UNIT_USECS;
 		if (!k_tmp->speed_hz)
 			k_tmp->speed_hz = spidev->speed_hz;
 #ifdef VERBOSE
 		dev_dbg(&spidev->spi->dev,
 			"  xfer len %u %s%s%s%dbits %u usec %u usec %uHz\n",
-			u_tmp->len,
-			u_tmp->rx_buf ? "rx " : "",
-			u_tmp->tx_buf ? "tx " : "",
-			u_tmp->cs_change ? "cs " : "",
-			u_tmp->bits_per_word ? : spidev->spi->bits_per_word,
-			u_tmp->delay_usecs,
-			u_tmp->word_delay_usecs,
-			u_tmp->speed_hz ? : spidev->spi->max_speed_hz);
+			k_tmp->len,
+			k_tmp->rx_buf ? "rx " : "",
+			k_tmp->tx_buf ? "tx " : "",
+			k_tmp->cs_change ? "cs " : "",
+			k_tmp->bits_per_word ? : spidev->spi->bits_per_word,
+			k_tmp->delay.value,
+			k_tmp->word_delay.value,
+			k_tmp->speed_hz ? : spidev->spi->max_speed_hz);
 #endif
 		spi_message_add_tail(k_tmp, &msg);
 	}
@@ -457,10 +460,11 @@
 
 			spi->max_speed_hz = tmp;
 			retval = spi_setup(spi);
-			if (retval >= 0)
+			if (retval == 0) {
 				spidev->speed_hz = tmp;
-			else
-				dev_dbg(&spi->dev, "%d Hz (max)\n", tmp);
+				dev_dbg(&spi->dev, "%d Hz (max)\n",
+					spidev->speed_hz);
+			}
 			spi->max_speed_hz = save;
 		}
 		break;